nievergeltlab
5/1/2017 - 7:03 PM

Filter QUANTO outputs into power plots Plot the detectable OR (at a set power and alpha) for a given sample size and MAF

Filter QUANTO outputs into power plots

Plot the detectable OR (at a set power and alpha) for a given sample size and MAF

setwd('C:/Users/adam/Desktop/roycepower')
library(plyr)

dat0 <- read.csv('me_power.csv', header=T)

dat <- dat0

#Fill in the blanks
for (i in 1:dim(dat)[1])
{
 if(is.na(dat[i,]$Frequency))
 {
  dat[i,]$Frequency <- dat[i-1,]$Frequency
 }
 if(is.na(dat[i,]$RG))
 {
  dat[i,]$RG <- dat[i-1,]$RG
 }
}

#Identify all locations with approximately 80 percent or more power
pwrfind2 <- function(x)
{
 pwrlocs <- x$Gene - .8 > -0.02

 #If there is no powered location, return null
 if(sum(pwrlocs) == 0)
 {
  return(NA)
 }

 minloc <- which.min(abs(x[pwrlocs,]$Gene - .8)) 
 return(x[pwrlocs,]$RG[minloc])
 
}


#Identify, for a given MAF and N, which RG gives 80 percent power
 dmaxpower <- ddply(dat, ~ Frequency + N,pwrfind2)
 names(dmaxpower)[3] <- "RG"
 
 write.csv(dmaxpower,'power_mainanalysis.csv')


#Subset the data to different mafs

m05 <- subset(dmaxpower, Frequency == 0.05 )
m1 <- subset(dmaxpower, Frequency == 0.1)
m2 <- subset(dmaxpower, Frequency == 0.2)


pdf ("power_curve.pdf")
	plot(m05$N, m05$RG, xlim=c(5000,80000), ylim=c(1.01,1.45),type="l",col="green",xlab="",ylab="", cex.axis=1.45,lwd=3,xaxt='n',yaxt='n',las=1,pch=17,bty='l')
	lines(m1$N,m1$RG, type="l",col="blue",lwd=3,pch=18)
	lines(m2$N,m2$RG, type="l",col="purple",lwd=3,pch=15)
    
	axis(1,at=seq(5000,85000,by=5000),labels=c("",10,"","20","",30,"",40,"",50,"",60,"",70,"","80","") , cex.axis=1.45) 

     
	axis(2,at=c(1,1.05,1.1,1.15,1.2,1.25,1.3,1.35,1.4,1.45,1.5,1.55,1.6,1.65),labels=c(1,"",1.1,"",1.2,"",1.3,"",1.4,"",1.5,"",1.6,""),cex.axis=1.45)

	points(m05$N, m05$RG,col="green",pch=16,cex=1.3)
	points(m1$N, m1$RG,col="blue",pch=18,cex=1.3)
	points(m2$N, m2$RG,col="purple",pch=15,cex=1.3)
dev.off()