danielecook
10/10/2017 - 1:45 AM

Download rsids for annotation

Download rsids for annotation

#!/usr/bin/bash
# @author = Daniel E. Cook
# @date = 2017-10-09
# This script downloads rs identifiers
# The function 'annotate_rsid' can be used to annotate a strat file with rsid.
# Usage:
# zcat chr1.strat.frq.gz | annotate_rside - | bgzip > out.bed.gz

curl -s http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/snp150.txt.gz | \
gunzip -c | \
awk -v OFS='\t' '{ print $2, $3, $4, $5, $10 }' | \
sed 's/chr//g' | \
bgzip -c > snps150.txt.bed.gz
tabix -p snps150.txt.bed.gz

function annotate_rsid() {
    awk -F $'\t' '''
    {
        chrom_pos = $1 ":" $2 "-" $2;
        command = "tabix /var/www/genetic_reference/snps150.txt.bed.gz " chrom_pos
        if (command != command_last) {
            command | getline result;
        }
        close(command);
        split(result, rs, "\t");
        chrom = rs[1];
        pos = rs[3];
        rs_id = rs[4];
        ref_alt = $5 "/" $6;
        alt_ref = $6 "/" $5;
        test_ref_alt = rs[5];

        if ($1 == chrom && $2 == pos) {
            out = 1;
            if (ref_alt == test_ref_alt) {
                ref = $5;
                alt = $6;
                out = 0;
            } else if (alt_ref == test_ref_alt) {
                ref = $6;
                alt = $5;
                out = 0;
            }
            if (out == 0) {
                print $1 "\t" $2 "\t" rs_id "\t" $4 "\t" ref "\t" alt "\t" $7 "\t" $8 "\t" $9 "\t" $10;
            } else {
                print $1 "\t" $2 "\t\t" $4 "\t" $5 "\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10;
            }
        }
        command_last = command;
    }
    ''' $1
}