Compare Strains to see where they differ
library(tidyverse)
library(bigrquery)
variant_strains <- function(...) {
strains <- unlist(list(...))
strain_list <- paste0(paste0("'", strains, "'"), collapse = ",")
print(strain_list)
sql <- sprintf("
SELECT
CHROM,
CEIL(POS/1000000) AS POS_BIN,
COUNT(DIFF) AS COUNT
FROM
(SELECT
CHROM,
POS,
SUM(COUNT(DISTINCT GT)) AS DIFF
FROM
[andersen-lab:WI.20170312]
WHERE
SAMPLE IN(%s)
AND
GT IN('0/0','1/1')
GROUP BY CHROM, POS
ORDER BY CHROM, POS
)
WHERE
DIFF >= 2
AND
FILTER == 'PASS'
AND
FT == 'PASS'
GROUP BY
CHROM,
POS_BIN
ORDER BY
CHROM,
POS_BIN
", strain_list)
print(sql)
query_exec(sql, project = 'andersen-lab')
}
ff <- variant_strains("ED3046","ED3049")
ff %>% dplyr::filter(CHROM != "MtDNA") %>%
ggplot(., aes(x = POS_BIN, y = COUNT)) +
geom_bar(stat = "identity") +
facet_grid(.~CHROM, scales = "free_x") +
labs(x = "Position (Mb)", y = "Count")