nievergeltlab
8/28/2017 - 6:46 PM

Make ricopili phenotypes. Investigators give their own subject IDs, which usually need to be linked to the ricopili IDs by modifying the ric

Make ricopili phenotypes. Investigators give their own subject IDs, which usually need to be linked to the ricopili IDs by modifying the ricopili assigned names.

setwd('C:/Users/adam/Desktop/starrs_cdrisc')
dat <- read.csv('CD_RISC_v2.csv',header=T,stringsAsFactors=F,na.strings=c("NA","#N/A"))

dat <- subset(dat,visit <= 3)

library(plyr)
cdr_max <- ddply(dat, ~studyid,colwise(.fun=max,'CDR_SUM',na.rm=T))

cdr_max <- cdr_max[-which(cdr_max$CDR_SUM == -Inf),]

cdr_postdep_max <- ddply(subset(dat, visit != 0), ~studyid,colwise(.fun=max,'CDR_SUM',na.rm=T))
#cdr_postdep_max <- cdr_postdep_max[-which(cdr_postdep_max$CDR_SUM == -Inf),]


#Viable transformation?
library(MASS)
boxcox(CDR_SUM ~ 1, data=cdr_max)

#square is best, is still fucked because it tops out, but oh well..
hist(cdr_max$CDR_SUM^2)

summary(lm(CDR_SUM ~ 1, data=cdr_max))



dat2 <- merge(cdr_postdep_max, cdr_max,by="studyid",all=TRUE,suffixes=c("_postdep_max","_studymax"))
names(dat2)[1] <- "IIDx"
unlist_split <- function(x, ...)
{
    toret <- unlist(strsplit(x, ...) )
    return(t(toret))
}

fam <- read.table('pts_mrsc_mix_am-qc.fam',header=F,stringsAsFactors=F)
names(fam) <- c("FID","IID","M","F","G","P")
 fam$IIDx <- t(sapply(fam$FID,unlist_split,split="[/*]"))[,2]
 

dexp <- merge(dat2,fam,by="IIDx",all.y=TRUE)

dexp2 <- subset(dexp, select=c(FID,IID,CDR_SUM_postdep_max, CDR_SUM_studymax))
dexp2$CDR_SUM_postdep_max_sq <- dexp2$CDR_SUM_postdep_max^2
dexp2$CDR_SUM_studymax_sq <- dexp2$CDR_SUM_studymax^2

write.table(dexp2,'mrs_cdr.pheno',row.names=F,quote=F)