nievergeltlab
4/3/2017 - 4:37 PM

## Implementation of formula converting beta from a linear regression onto the log odds scale EFFICIENT COMPUTATION WITH A LINEAR MIXED MODEL

Implementation of formula converting beta from a linear regression onto the log odds scale

EFFICIENT COMPUTATION WITH A LINEAR MIXED MODEL ON LARGE-SCALE DATA SETSWITH APPLICATIONS TO GENETIC STUDIES Author(s): Matti Pirinen, Peter Donnelly and Chris C. A. Spencer

``````
##quick version just based on prevalence, see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5237383/
awk -v phi=\$phi '{if(NR == 1) print \$0,"beta_logscale","se_logscale"; else print \$0,\$8/(phi*(1-phi)),\$9/(phi*(1-phi))}' "\$study"_gemma_"\$outname_append" > "\$study"_gemma_"\$outname_append".logscale2
``````
``````#Reformat GEMMA outputs to be on the log scale. Filter files down to meta-analysis ready files. Meta analyse, make QQ and Manhattan plots, do forest plots, LDSC

#Set working directoy=ry
workingdir=\$(pwd)

awk '{if(NR==1) print "rs","SNP", "ngt", "Info"; else print \$1,\$18,\$9,\$37}' Metadata_allchr.txt | LC_ALL=C sort -k1,1b > Metadata_allchr_qimr.txt.sorted

LC_ALL=C sort -k2,2b <(zcat PGC_PTSD_allchr_lmm_with5PCs.assoc.txt.gz) > PGC_PTSD_allchr_lmm_with5PCs.assoc.txt.sorted
LC_ALL=C sort -k2,2b <(zcat PGC_PTSD_allchrfemale_lmm_with5PCs.assoc.txt.gz) > PGC_PTSD_allchrfemale_lmm_with5PCs.assoc.txt.sorted
LC_ALL=C sort -k2,2b <(zcat PGC_PTSD_allchrmale_lmm_with5PCs.assoc.txt.gz) > PGC_PTSD_allchrmale_lmm_with5PCs.assoc.txt.sorted

LC_ALL=C join -1 1 -2 2 Metadata_allchr_qimr.txt.sorted PGC_PTSD_allchr_lmm_with5PCs.assoc.txt.sorted | cut -d " " -f 2- | awk '{if (\$1 != ".") print}' | sort -g -k 11 >  qimr_all_rp_v1_oct6_2016.fixed
LC_ALL=C join -1 1 -2 2 Metadata_allchr_qimr.txt.sorted  PGC_PTSD_allchrfemale_lmm_with5PCs.assoc.txt.sorted | cut -d " " -f 2- | awk '{if (\$1 != ".") print}' | sort -g -k 11 > qimr_females_rp_v1_oct6_2016.fixed
LC_ALL=C join -1 1 -2 2 Metadata_allchr_qimr.txt.sorted  PGC_PTSD_allchrmale_lmm_with5PCs.assoc.txt.sorted | cut -d " " -f 2- | awk '{if (\$1 != ".") print}' | sort -g -k 11  > qimr_males_rp_v1_oct6_2016.fixed

#Calculate prevalence
# 325 cases (69% female), 1797 controls (48% female) with density and concordance within families as per the table.

# 325/(1797+325) = .153 overall prevalence
# 325*.69/(1797*.48+325*.69) = .206 fem prevalence
# 325*.31/(1797*.52+325*.31) = 0.097 male prevalence

# 224 cases 863 controls for women
# 101 cases 934 controls for men

ls | grep qimr_* > study_files.txt

#Convert all data into the log scale

file=qimr_all_rp_v1_oct6_2016.fixed
phi=.153
qsub -lwalltime=00:25:00 beta_to_logscale.pbs -e errandout/ -o errandout -d \$workingdir -F "-s beta_to_logscale.rscript -i \$file -v \$phi -b beta -m af -p p_wald -o qimr_all.logscale"

file=qimr_females_rp_v1_oct6_2016.fixed
phi=.206
qsub -lwalltime=00:25:00 beta_to_logscale.pbs -e errandout/ -o errandout -d \$workingdir -F "-s beta_to_logscale.rscript -i \$file -v \$phi -b beta -m af -p p_wald -o qimr_fem.logscale"

file=qimr_males_rp_v1_oct6_2016.fixed
phi=.098
qsub -lwalltime=00:25:00 beta_to_logscale.pbs -e errandout/ -o errandout -d \$workingdir -F "-s beta_to_logscale.rscript -i \$file -v \$phi -b beta -m af -p p_wald -o qimr_mal.logscale"

columns are currently:
snp chr       ps      n_miss  allele1 allele0 af      beta    se      l_remle l_mle   p_wald  p_lrt   p_score beta_logit se_logit

FRQ_A_0 FRQ_U_#samplesizehere#

#Set AF in cases and controls to same value -- not exactly right but to hell with it, it gives me what I want in the end - the overall maf

echo "CHR         SNP          BP  A1  A2   FRQ_A_325 FRQ_U_1797  INFO      OR      SE       P   ngt" > header1.txt

cat header1.txt <( awk '{print \$2,\$1, \$3 ,\$5 ,\$6 ,\$6,\$7, 0, 1,\$15,\$16,\$12, 1}' qimr_all.logscale) > pts_qimr_mix_nm.logscale.results

``````
``````#!/bin/bash

while getopts s:i:v:b:m:p:o: option
do
case "\${option}"
in
s) scriptname=\${OPTARG};;
i) infile=\${OPTARG};;
v) phi=\${OPTARG};;
b) beta_colname=\${OPTARG};;
m) maf_colname=\${OPTARG};;
p) p_colname=\${OPTARG};;
o) outfile=\${OPTARG};;
esac
done

echo scriptname : \$scriptname
echo infile : \$infile
echo prevalence: \$phi
echo beta column name : \$beta_colname
echo maf column name  : \$maf_colname
echo p column name : \$p_colname
echo outfile : \$outfile

Rscript \$scriptname \$infile \$phi \$beta_colname \$maf_colname \$p_colname \$outfile

``````
``````args <- commandArgs(trailingOnly = TRUE)
infile <- args[1]
phi <- args[2]
beta_colname <- args[3]
maf_colname <- args[4]
p_colname <- args[5]
outfile <- args[6]

print(c(infile,phi,beta_colname,maf_colname,p_colname,outfile))

phi <- as.numeric(phi)

new_beta_se <- function(x ,phi=0.153)
{
#Input should be a vector consisting of beta, p, maf. phi should be a value for the proportion of cases
B=x[1]
P=x[2]
theta=x[3]

B2=B/(phi*(1 - phi) + .5*(1-2*phi)*(1-2*theta)*B - (0.084 + 0.9*phi*(1 - 2*phi)*theta*(1-theta))/(phi*(1-phi))*B^2)
Zsq=qchisq(P,1,lower.tail=F)
se=sqrt(B2^2/Zsq)
return(c(B2,se))
}

library(data.table)

newbetas <- t(apply(dat[,c(beta_colname,p_colname,maf_colname)],1,new_beta_se,phi=phi))

colnames(newbetas) <- c("BETA_LOGIT","SE_LOGIT")
dat_exp <- cbind(dat,newbetas)
#gz1 <- gzfile(paste(outfile,'.gz',sep=''),'w')
write.table(dat_exp,outfile, row.names=F,quote=F)
#close(gz1)``````
``````B=-0.01540708 #Beta from linear model
phi= .1 #Proportion of cases in data
theta=.4 #Minor allele Frequency
P= 0.4119744 #pvalue of data

#Calculate the beta value on the log odds scale
B2=B/(phi*(1 - phi) + .5*(1-2*phi)*(1-2*theta)*B - (0.084 + 0.9*phi*(1 - 2*phi)*theta*(1-theta))/(phi*(1-phi))*B^2)

#The associated odds ratio
exp(B2)

#For meta-analysis, still need to know SE of beta. Since p-value is known, SE can be derived e.g.
Zsq=qchisq(P,1,lower.tail=F)
se=sqrt(B2^2/Zsq)
``````
``````R

new_beta_se <- function(x ,phi=0.153)
{
#Input should be a vector consisting of beta, p, maf. phi should be a value for the proportion of cases
B=x[1]
P=x[2]
theta=x[3]

B2=B/(phi*(1 - phi) + .5*(1-2*phi)*(1-2*theta)*B - (0.084 + 0.9*phi*(1 - 2*phi)*theta*(1-theta))/(phi*(1-phi))*B^2)
Zsq=qchisq(P,1,lower.tail=F)
se=sqrt(B2^2/Zsq)
print(B2)
print(se)
return(c(B2,se))
}