danielecook
4/13/2017 - 8:05 PM

Compare Strains to see where they differ

Compare Strains to see where they differ

library(tidyverse)
library(bigrquery)


variant_strains <- function(...) {
  strains <- unlist(list(...))
  strain_list <- paste0(paste0("'", strains, "'"), collapse = ",")
  print(strain_list)
  sql <- sprintf("
  SELECT 
    CHROM, 
    CEIL(POS/1000000) AS POS_BIN,
    COUNT(DIFF) AS COUNT
  FROM
      (SELECT
      CHROM,
      POS,
        SUM(COUNT(DISTINCT GT)) AS DIFF
      FROM 
        [andersen-lab:WI.20170312]
      WHERE
        SAMPLE IN(%s)
      AND
        GT IN('0/0','1/1')
      GROUP BY CHROM, POS
      ORDER BY CHROM, POS
      ) 
  WHERE
    DIFF >= 2
  AND
    FILTER == 'PASS'
  AND
    FT == 'PASS'
  GROUP BY
    CHROM,
    POS_BIN
  ORDER BY
    CHROM,
    POS_BIN
  ", strain_list)
  print(sql)
  query_exec(sql, project = 'andersen-lab')
}

ff <- variant_strains("ED3046","ED3049")

ff %>% dplyr::filter(CHROM != "MtDNA") %>%
ggplot(., aes(x = POS_BIN, y = COUNT)) +
  geom_bar(stat = "identity") +
  facet_grid(.~CHROM, scales = "free_x") +
  labs(x = "Position (Mb)", y = "Count")