genomewalker
3/9/2016 - 10:39 AM

OSD 16S-KO networks

OSD 16S-KO networks

# KO files at /bioinf/projects/osd/main/2014/06/analysis-results/KEGG/READS
find . -name "*kegg*gz" | while read LINE; do NAM=$(basename ${LINE} .kegg_20140317.gz); awk -vO=${NAM} 'BEGIN{FS=","; OFS="\t"}{print O,$2,$4}' <(zcat ${LINE}) ; done > osd2014_reads_kegg_20140317.tsv

# Retrieve all KOs from 
cat  | ~/opt/parallel/bin/parallel -j 128 --progress ./getKO.sh

# Some will fail
cat osd2014_reads_ko_list.txt <(cat ko_files/*txt | grep  ENTRY | awk '{print $2}') | sort | uniq -u > repeat

# repeat until no more
# there are 105 KO deprecated in the actual KEGG version 09.03.2016
#!/bin/bash
KO=${1}
curl -s http://rest.kegg.jp/get/${KO} > ${KO}.txt
# We get all mappings with the KEGG API
for i in $(curl -S http://rest.kegg.jp/list/pathway | awk -F"\t" '{print $1}'); do curl -S http://rest.kegg.jp/link/ko/$i; done > ko2kegg
awk -F"\t" '!/^$/{gsub("^path:","",$1);gsub("^ko:","",$2);print $1"\t"$2}' ko2kegg > ko2kegg_final

# We split all KOs for each OSD sample
mawk '{print $2"\t"$3 > $1".txt"}' <(gzcat osd2014_reads_kegg_20140317_fixed.tsv.gz)

# We run MinPath
find ../KEGG_minpath/ -name "OSD*" | parallel -j 4 MinPath1.2.py -any ../KEGG_minpath/{} -map ../KEGG_minpath/ko2kegg_final -report {}.report -details {}.details

# And parse the results
for i in *report; do NAM=$(basename $i .txt.report); awk -vL=${NAM} '{print L"\t"$14"\t"$12"\t"$10}' $i; done >> osd2014_minpath_ko.txt

# Get map definitions
curl -S http://rest.kegg.jp/list/pathway | cut -f2- -d ':' > map_names.txt