Download rsids for annotation
#!/usr/bin/bash
# @author = Daniel E. Cook
# @date = 2017-10-09
# This script downloads rs identifiers
# The function 'annotate_rsid' can be used to annotate a strat file with rsid.
# Usage:
# zcat chr1.strat.frq.gz | annotate_rside - | bgzip > out.bed.gz
curl -s http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/snp150.txt.gz | \
gunzip -c | \
awk -v OFS='\t' '{ print $2, $3, $4, $5, $10 }' | \
sed 's/chr//g' | \
bgzip -c > snps150.txt.bed.gz
tabix -p snps150.txt.bed.gz
function annotate_rsid() {
awk -F $'\t' '''
{
chrom_pos = $1 ":" $2 "-" $2;
command = "tabix /var/www/genetic_reference/snps150.txt.bed.gz " chrom_pos
if (command != command_last) {
command | getline result;
}
close(command);
split(result, rs, "\t");
chrom = rs[1];
pos = rs[3];
rs_id = rs[4];
ref_alt = $5 "/" $6;
alt_ref = $6 "/" $5;
test_ref_alt = rs[5];
if ($1 == chrom && $2 == pos) {
out = 1;
if (ref_alt == test_ref_alt) {
ref = $5;
alt = $6;
out = 0;
} else if (alt_ref == test_ref_alt) {
ref = $6;
alt = $5;
out = 0;
}
if (out == 0) {
print $1 "\t" $2 "\t" rs_id "\t" $4 "\t" ref "\t" alt "\t" $7 "\t" $8 "\t" $9 "\t" $10;
} else {
print $1 "\t" $2 "\t\t" $4 "\t" $5 "\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10;
}
}
command_last = command;
}
''' $1
}