nievergeltlab
11/30/2018 - 7:24 PM

Prepare data for HRC imputation

Reformat PLINK data to VCF to be loaded in VCF


# Will Rayner provides a great toolbox to prepare data: HRC or 1000G Pre-imputation Checks.

# The main steps for HRC are:
# Download tool and sites

#wget http://www.well.ox.ac.uk/~wrayner/tools/HRC-1000G-check-bim-v4.2.7.zip
#wget ftp://ngs.sanger.ac.uk/production/hrc/HRC.r1-1/HRC.r1-1.GRCh37.wgs.mac5.sites.tab.gz

# Convert ped/map to bed

# Create a frequency file

$plink_location --freq --bfile "$pfile" --out "$pfile"_freq

# Execute script

perl /mnt/sdb/genetics/tiff1/hrc_impute/HRC-1000G-check-bim.pl -b "$pfile".bim -f "$pfile"_freq.frq -r /mnt/sdb/genetics/tiff1/hrc_impute/HRC.r1-1.GRCh37.wgs.mac5.sites.tab -h
#qsub -lwalltime=02:00:00 HRC-1000gcheck.sh
#set plink location in script to real plink2 location otherwise plink1 will run..

sh Run-plink.sh


#Make VCFs
mkdir temporary_files
for chr in 23 # {1..23}
do
$plink_location  --bfile "$pfile"-updated-chr$chr --set-hh-missing  --recode vcf --out temporary_files/eaco_chr$chr
done

for chr in {1..22}
do
vcf-sort < temporary_files/eaco_chr"$chr".vcf | /mnt/sdb/genetics/tiff1/hrc_impute/tabix-master/bgzip -c > eaco_chr"$chr".vcf.gz
done

#Special handling for chr 23...
chr=23
vcf-sort < temporary_files/eaco_chr"$chr".vcf | awk 'BEGIN{OFS="\t"}{if (NR > 6 && $1=="23") $1="X"; print}' | /mnt/sdb/genetics/tiff1/hrc_impute/tabix-master/bgzip -c > eaco_chr"$chr".vcf.gz