[LD Score Regression]
GWAS=/hp_shares/mgandal/datasets/GWAS/BP.PGC.2012/BD.PGC.2012.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/SCZ.Clozuk.2018/SCZ.Clozuk.2018.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/ASD.iPSYCH+PGC.2017/ASD.iPSYCHPGC.2018.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/ASD.PGC.2017/ASD.PGC.2017.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/IBD.2015/IBD.2015.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/Neuroticism.SSGAP.2016/Neuroticism.SSGAP.2016.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/EduYears.SSGAC.2016/EduYears.SSGAC.2016.sumstats.gz
GWAS=/hp_shares/mgandal/datasets/GWAS/SubjWellBeing.SSGAC.2016/SubjWellBeing.SSGAC.2016.sumstats.gz
screen
qrsh -l h_rt=04:00:00,h_vmem=64G
annotDir=/hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/output/
outdir=/hp_shares/mgandal/datasets/LDscore/PsychEncode/results/
cd $annotDir
GWAS=/hp_shares/mgandal/datasets/GWAS/EduYears.SSGAC.2016/EduYears.SSGAC.2016.sumstats.gz
GWAS_name=`basename $GWAS .sumstats.gz`
for annot in ${annotDir}/*iso*.1.l2.ldscore.gz; do
set=`basename $annot .1.l2.ldscore.gz`
if [ ! -e ${outdir}/${GWAS_name}_${set}.results ]
then
python ~/bin/ldsc/ldsc.py \
--h2 $GWAS \
--out ${outdir}/${GWAS_name}_${set} \
--ref-ld-chr ${annotDir}/${set}.,${annotDir}/baseline. \
--w-ld-chr /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/weights_hm3_no_hla/weights. \
--overlap-annot \
--frqfile-chr /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/1000G_Phase3_frq/1000G.EUR.QC. \
--print-coefficients &
fi
done
setdir=/hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/output/
for set in output/*.1.annot; do
set=`basename $set .1.annot`
for chr in {1..22}; do
if [ ! -e ${setdir}/${set}.${chr}.l2.ldscore.gz ]
then
qsub -b y -cwd -e ./log -o ./log -V -N sLDSC -l h_vmem=8G,h_rt=4:00:00 /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/qsubWrapper.sh $set $chr
fi
done
done
#===========#===========#===========#===========#===========#===========#===========
#!/bin/bash
#qsubWrapper.sh
setdir=/hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/output/
set=$1
chr=$2
echo $set, $chr
python ~/bin/ldsc/ldsc.py \
--l2 \
--bfile /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/1000G_EUR_Phase3_plink/1000G.EUR.QC.${chr} \
--ld-wind-cm 1 \
--annot /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/output/${set}.${chr}.annot \
--out ${setdir}/${set}.${chr} \
--print-snps /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/hapmap3_snps/hm.${chr}.snp
#!/share/apps/R-3.3.1/bin/Rscript
#Usage: Rscript CreateNewAnnotation_02.R $CHR_NUM $GENE_ANNOTATION $SET_ANNOTATION
#SET_ANNOTATION FORMAT: TSV ensembl_gene_id set_name
options(stringsAsFactors = F)
args <- commandArgs(trailingOnly = TRUE)
##args=list("1", "annotation.gene.gencodeV19.csv", "customAnnotation.txt")
print(args)
OFFSET = 10000
outdir="./output/"
chr = args[[1]]
geneAnno_file = args[[2]]
setAnno_file = args[[3]]
geneAnnotation = read.csv(geneAnno_file, head=T)
setAnnotation = read.table(setAnno_file, head=T)
geneAnnotation = geneAnnotation[match(setAnnotation[,1], geneAnnotation$gene_id),]
setAnnotation = cbind(setAnnotation, geneAnnotation)
setAnnotation$chr = as.numeric(gsub("chr", "", setAnnotation$chr))
template = read.table(gzfile(paste0("/hp_shares/mgandal/datasets/LDscore/1000G_Phase3/1000G_Phase3_cell_type_groups/cell_type_group.1.", chr, ".annot.gz")), head=T)[,1:4]
print(paste("Loading",chr,"there are", nrow(template), "SNPs"))
for(set in unique(setAnnotation[,2])) {
print(paste("Working on chr", chr, ": ",set))
setGenes = intersect(setAnnotation[,1][setAnnotation[,2] == set], geneAnnotation$gene_id[geneAnnotation$chr == paste0("chr",chr)])
binaryVec = rep(0, times=nrow(template))
for(g in setGenes) {
idx = match(g, geneAnnotation$gene_id)
snp_match = which(( template$BP > geneAnnotation$start[idx] - OFFSET) & (template$BP < geneAnnotation$stop[idx] + OFFSET))
if(length(snp_match) > 0) {
binaryVec[snp_match] = 1
}
}
annotOut = cbind(template, binaryVec)
colnames(annotOut)[5] = set
print(paste0("Found ", sum(binaryVec), " SNPs in the set"))
write.table(annotOut, file = paste0(outdir, set, ".", chr, ".annot"), quote = F, row.names = F)
}
print('done')
#!/bin/bash
# https://github.com/bulik/ldsc/wiki/LD-Score-Estimation-Tutorial
# Download the baseline model and weights
wget https://data.broadinstitute.org/alkesgroup/LDSCORE/1000G_Phase3_baseline_ldscores.tgz
wget https://data.broadinstitute.org/alkesgroup/LDSCORE/weights_hm3_no_hla.tgz
wget https://data.broadinstitute.org/alkesgroup/LDSCORE/1000G_Phase3_plinkfiles.tgz
wget https://data.broadinstitute.org/alkesgroup/LDSCORE/hapmap3_snps.tgz
wget https://data.broadinstitute.org/alkesgroup/LDSCORE/1000G_Phase3_frq.tgz
tar -xzvf 1000G_Phase3_plinkfiles.tgz
tar -xvzf 1000G_Phase3_baseline_ldscores.tgz
tar -xvzf weights_hm3_no_hla.tgz
tar -xzvf 1000G_Phase3_frq.tgz
tar -xzvf hapmap3_snps.tgz
geneAnnotation=annotation.gene.gencodeV19.csv
setAnnotation=customAnnotation.txt
screen
qrsh -l h_vmem=16G -pe shared 4
for i in {1..22}; do
echo $i
/share/apps/R-3.3.1/bin/Rscript CreateNewAnnotation_02.R $i $geneAnnotation $setAnnotation
done
for set in *_*.1.annot; do
set=`basename $set .1.annot`
for i in {1..22}; do
echo $set, $i
python ~/bin/ldsc/ldsc.py \
--l2 \
--bfile /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/1000G_EUR_Phase3_plink/1000G.EUR.QC.$i \
--ld-wind-cm 1 \
--annot /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/makeCustomAnnotation/output/${set}.${i}.annot \
--out ${set}.${i} \
--print-snps /hp_shares/mgandal/datasets/LDscore/1000G_Phase3/hapmap3_snps/hm.${i}.snp
done
done