Tabea-K
1/19/2016 - 3:32 PM

Prints the coordinates of all 5'UTR exons from a refGenefile. Based on https://www.biostars.org/p/10907/#10910

Prints the coordinates of all 5'UTR exons from a refGenefile. Based on https://www.biostars.org/p/10907/#10910

# gives the coordinates of all 5'UTR exons
awk '
BEGIN { OFS = "\t"; FS = "\t"} ;
            {
                # $7 is cdsStart
                delete astarts;
                delete aends;
                split($10, astarts, /,/);
                split($11, aends, /,/);
                for(i=1; i <= length(astarts); i++){
                    if (! astarts[i]) continue
                    if ($4 == "+"){
                        if(astarts[i] > $7) break
                        if(aends[i] > $7) aends[i]=$7
                        print $3,astarts[i],aends[i],$2"_"i,length(astarts)-1,$4
                    }
                    else if ($4 == "-"){
                        if(aends[i] < $8) continue
                        if(astarts[i] < $8) astarts[i]=$8
                        print $3,astarts[i],aends[i],$2"_"i,length(astarts)-1,$4
                    }
                }
            }'