danielecook
3/17/2015 - 9:38 PM

Generate primers from VCF

Generate primers from VCF

function vcf_primers {
	bcftools view -H --min-ac 4 -m2 -M2 ${1} | gshuf | head -n ${2} | cat <(bcftools view -h ${1}) - > ${1/.vcf/}.primers.vcf
	IFS=$'\n'
	bcftools query -f '%CHROM\_%POS\t%REF\t%ALT[\t%SAMPLE=%TGT]\n' ${1/.vcf/}.primers.vcf > ${1/.vcf/}.samples.vcf
	# Output var position info, etc.
	cat ${1/.vcf/}.primers.vcf | while read L
	do
	    C=`echo $L | cut  -f 1`
	    P=`echo $L | cut  -f 2`
	    S=$(( $P - 1000))
	    E=$(( $P + 1000))
	    echo "PRIMER_SEQUENCE_ID=${C}_${P}"
	    echo "SEQUENCE_TARGET=990,20"
	    echo "PRIMER_PRODUCT_SIZE_RANGE=400-450"
	    echo -n "SEQUENCE_TEMPLATE="
	    samtools faidx \
	        ~/Documents/git/pyPipeline/genomes/WS245/c_elegans.PRJNA13758.WS245.genomic.fa ${C}:${S}-${E} |\
	        grep -v ">" | tr -d "\n" | tr "atgcn" "ATGCN" 
	    echo
	    echo "=" 
	done | primer3_core -default_version=1 | awk ' BEGIN { SET=0; FS="\t"; print "SET\tID\tPRIMER_LEFT\tPRIMER_RIGHT\tPOS_LEFT\tPOS_RIGHT\tTM_LEFT\tTM_RIGHT\tGC_LEFT\tGC_RIGHT\tSEQUENCE_TEMPLATE"}
	$1 ~ "PRIMER_SEQUENCE_ID*" { ID=gensub("PRIMER_SEQUENCE_ID=","",$1) } 
	$1 ~ "PRIMER_LEFT_[0-9]+=" { POS_LEFT=gensub("PRIMER_LEFT_[0-9]+=","",$1) }
	$1 ~ "PRIMER_RIGHT_[0-9]+=" { POS_RIGHT=gensub("PRIMER_RIGHT_[0-9]+=","",$1) }
	$1 ~ "PRIMER_LEFT.*TM" { LEFT_TM=gensub("PRIMER_LEFT.*TM=","",$1) }
	$1 ~ "PRIMER_RIGHT.*TM" { RIGHT_TM=gensub("PRIMER_RIGHT.*TM=","",$1) }
	$1 ~ "PRIMER_LEFT.*GC_PERCENT" { LEFT_GC=gensub("PRIMER_LEFT.*GC_PERCENT=","",$1) }
	$1 ~ "PRIMER_RIGHT.*GC_PERCENT" { RIGHT_GC=gensub("PRIMER_RIGHT.*GC_PERCENT=","",$1) }
	$1 ~ "SEQUENCE_TEMPLATE" { SEQUENCE_TEMPLATE=gensub("SEQUENCE_TEMPLATE=","",$1) }
	$1 ~ "PRIMER_LEFT.*SEQUENCE" { LEFT_SEQ=gensub("PRIMER_LEFT.*SEQUENCE=","",$1) }
	$1 ~ "PRIMER_RIGHT.*SEQUENCE=" { RIGHT_SEQ=gensub("PRIMER_RIGHT.*SEQUENCE=","",$1) }
	$1 == "=" { SET=SET+1;
	 print SET,ID, LEFT_SEQ, RIGHT_SEQ, POS_LEFT, POS_RIGHT, LEFT_TM, RIGHT_TM, LEFT_GC, RIGHT_GC, SEQUENCE_TEMPLATE }
	' | uniq | tr ' ' '\t' > ${1/.vcf/}.primers.txt
}

echo 'ACTTTCACAAGACCAACGGG' | blastn -task blastn-short  -db ~/Documents/git/pyPipeline/genomes/WS245/WS245.blast