genomewalker
3/9/2016 - 8:35 AM

BGC boxplot

BGC boxplot

library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)
osd2014_bgc_abun <- tbl_df(read.table(gzfile("~/ownCloud/OSD_analysis/repo/BGCs/abundances/osd2014_bgc_abundance_sample.csv.gz"), header = T, sep = ","))

osd2014_bgc_abun <- osd2014_bgc_abun %>% 
  select(label, bgc_class, abun) %>%
  group_by(label, bgc_class) %>%
  summarise (n = n(), abun1 = sum(abun)) %>%
  mutate(freq = n / sum(n), abun.freq = abun1/sum(abun1))

osd2014_bgc_abun$study<-"OSD"

tara_bgc_abun <- tbl_df(read.table(gzfile("~/ownCloud/OSD_analysis/repo/BGCs/abundances/TARA_bgc_abundance_sample.csv.gz"), header = T, sep = ","))

tara_bgc_abun <- tara_bgc_abun %>% 
  select(label, bgc_class, abun) %>%
  group_by(label, bgc_class) %>%
  summarise (n = n(), abun1 = sum(abun)) %>%
  mutate(freq = n / sum(n), abun.freq = abun1/sum(abun1))

tara_bgc_abun$study<-"TARA"

all_bgc_abun <- rbind(osd2014_bgc_abun, tara_bgc_abun)

ggplot(all_bgc_abun, aes(x=study, y=(abun.freq))) +
  geom_jitter() +
  geom_boxplot(aes(fill=study, width=0.5), alpha=0.6) +
  facet_wrap(~bgc_class, scale="free_y") +
  scale_y_continuous(labels=percent) + theme_bw()