genomewalker
2/28/2017 - 8:19 AM

eggNOG mapping

eggNOG mapping

#!/bin/bash
MC_SRC="/bioinf/home/afernand/mocat2-lite"
MC_MOD=${MC_SRC}

NAME2=${1}
FILE=${2}
CWD=${3}

mawk '$12 >= 60{print $0}' ${FILE}  > $CWD/$NAME2-eggNOG.filtered

echo "OG mapping..." &&
BRH=$MC_SRC/find_best_hit.pl &&
OG=$MC_SRC/og_mapping.py &&
LEN=$MC_MOD/eggNOG.len &&
OGFILE=$MC_MOD/eggNOG.OG &&
perl $BRH -i $CWD/$NAME2-eggNOG.filtered -f NCBI > $CWD/$NAME2-eggNOG.besthit &&
python $OG -b $CWD/$NAME2-eggNOG.filtered -t $CWD/$NAME2-eggNOG.besthit -g $OGFILE -p $LEN -f NCBI 2> $CWD/$NAME2-eggNOG.OG.err > $CWD/$NAME2-eggNOG.OG.pre &&
LC_ALL=C egrep -v ' NONE ' $CWD/$NAME2-eggNOG.OG.pre > $CWD/$NAME2-eggNOG.OG &&
rm $CWD/$NAME2-eggNOG.OG.pre &&
FILE=$CWD/$NAME2-eggNOG.OG &&
FILE2=$CWD/$NAME2-eggNOG.annotation &&
echo -e "#gene\teggNOG_OG" > $FILE2
cut -f 1,3 $FILE | grep -v '^#' | perl -ane '
chomp(@F);
foreach $i (1 .. scalar @F-1){
$h{$F[0]}{$i}{$F[$i]}=1
};
END{
foreach $g (keys %h) {
print "$g";
foreach $i (1 .. scalar @F-1) {
print "\t";
foreach $j (keys %{$h{$g}{$i}}){
print "$j|";
}
}
print "\n"
}
}' | sed 's/|$//' | sed 's/\t\t/\tNA\t/' | sed 's/\t\t/\tNA\t/' | sed 's/\t\t/\tNA\t/' | sed 's/|\t/\t/' | sort -k 1,1 --parallel 16 -S25% >> $FILE2