nievergeltlab
9/18/2017 - 9:07 PM

Compare two sets of p-values

Say you generate 2 sets of p-values using 2 forms of analysis. This plots them against eachother and calculates r.

zcat daner_pts_mrsc_mix_am-qc.hg19.ch.fl.gz  | awk '{print $2,$11}'  | grep -v NA >  daner_pts_mrsc_mix_am-qc_short2
awk '{print $2,$12}' mrsc_gemma_pcs > mrsc_gemma_pcs_short2

gzip -d mrsc_gemma_pcs_short2.gz
gzip -d daner_pts_mrsc_mix_am-qc_short2.gz

LC_ALL=C join <(awk '{if (NR==1)$1="SNP", $2="P"; else print}' mrsc_gemma_pcs_short2 | LC_ALL=C sort -k1b,1 ) <(LC_ALL=C sort -k1b,1 daner_pts_mrsc_mix_am-qc_short2) > files_joined
sort -g -k2 files_joined > files_joined2
grep -v P files_joined2 > files_joined3


R

library(data.table)

dm <- fread('files_joined3',data.table=F)
names(dm) <- c("SNP","P1","P2")

png('results_correlation.png')
plot(-log10(dm$P1),-log10(dm$P2))
dev.off()
cor.test(-log10(dm$P1),-log10(dm$P2))