Make ricopili phenotypes. Investigators give their own subject IDs, which usually need to be linked to the ricopili IDs by modifying the ricopili assigned names.
setwd('C:/Users/adam/Desktop/starrs_cdrisc')
dat <- read.csv('CD_RISC_v2.csv',header=T,stringsAsFactors=F,na.strings=c("NA","#N/A"))
dat <- subset(dat,visit <= 3)
library(plyr)
cdr_max <- ddply(dat, ~studyid,colwise(.fun=max,'CDR_SUM',na.rm=T))
cdr_max <- cdr_max[-which(cdr_max$CDR_SUM == -Inf),]
cdr_postdep_max <- ddply(subset(dat, visit != 0), ~studyid,colwise(.fun=max,'CDR_SUM',na.rm=T))
#cdr_postdep_max <- cdr_postdep_max[-which(cdr_postdep_max$CDR_SUM == -Inf),]
#Viable transformation?
library(MASS)
boxcox(CDR_SUM ~ 1, data=cdr_max)
#square is best, is still fucked because it tops out, but oh well..
hist(cdr_max$CDR_SUM^2)
summary(lm(CDR_SUM ~ 1, data=cdr_max))
dat2 <- merge(cdr_postdep_max, cdr_max,by="studyid",all=TRUE,suffixes=c("_postdep_max","_studymax"))
names(dat2)[1] <- "IIDx"
unlist_split <- function(x, ...)
{
toret <- unlist(strsplit(x, ...) )
return(t(toret))
}
fam <- read.table('pts_mrsc_mix_am-qc.fam',header=F,stringsAsFactors=F)
names(fam) <- c("FID","IID","M","F","G","P")
fam$IIDx <- t(sapply(fam$FID,unlist_split,split="[/*]"))[,2]
dexp <- merge(dat2,fam,by="IIDx",all.y=TRUE)
dexp2 <- subset(dexp, select=c(FID,IID,CDR_SUM_postdep_max, CDR_SUM_studymax))
dexp2$CDR_SUM_postdep_max_sq <- dexp2$CDR_SUM_postdep_max^2
dexp2$CDR_SUM_studymax_sq <- dexp2$CDR_SUM_studymax^2
write.table(dexp2,'mrs_cdr.pheno',row.names=F,quote=F)