Reformat PLINK data to VCF to be loaded in VCF
# Will Rayner provides a great toolbox to prepare data: HRC or 1000G Pre-imputation Checks.
# The main steps for HRC are:
# Download tool and sites
#wget http://www.well.ox.ac.uk/~wrayner/tools/HRC-1000G-check-bim-v4.2.7.zip
#wget ftp://ngs.sanger.ac.uk/production/hrc/HRC.r1-1/HRC.r1-1.GRCh37.wgs.mac5.sites.tab.gz
# Convert ped/map to bed
# Create a frequency file
$plink_location --freq --bfile "$pfile" --out "$pfile"_freq
# Execute script
perl /mnt/sdb/genetics/tiff1/hrc_impute/HRC-1000G-check-bim.pl -b "$pfile".bim -f "$pfile"_freq.frq -r /mnt/sdb/genetics/tiff1/hrc_impute/HRC.r1-1.GRCh37.wgs.mac5.sites.tab -h
#qsub -lwalltime=02:00:00 HRC-1000gcheck.sh
#set plink location in script to real plink2 location otherwise plink1 will run..
sh Run-plink.sh
#Make VCFs
mkdir temporary_files
for chr in 23 # {1..23}
do
$plink_location --bfile "$pfile"-updated-chr$chr --set-hh-missing --recode vcf --out temporary_files/eaco_chr$chr
done
for chr in {1..22}
do
vcf-sort < temporary_files/eaco_chr"$chr".vcf | /mnt/sdb/genetics/tiff1/hrc_impute/tabix-master/bgzip -c > eaco_chr"$chr".vcf.gz
done
#Special handling for chr 23...
chr=23
vcf-sort < temporary_files/eaco_chr"$chr".vcf | awk 'BEGIN{OFS="\t"}{if (NR > 6 && $1=="23") $1="X"; print}' | /mnt/sdb/genetics/tiff1/hrc_impute/tabix-master/bgzip -c > eaco_chr"$chr".vcf.gz