UNKNOWN analyses
# Origin of sequences (we use aa)
# GOS: ftp://ftp.imicrobe.us/projects/26
# GOS sample2shortname CAM_PROJ_GOS.smpl.txt
# TARA: Shini assemblies but my own gene prediction with Prodigal
# OSD: my own assemblies and gene predictions with Prodigal
# Prepare GOS data
# Gene prediction of GOS reads
prodigal -q -i CAM_PROJ_GOS.read.fa -a GOS.aa.fasta -d GOS.nt.fasta -p meta -o GOS.gff -f gff
# Rename gene names using sample file
# From https://gist.github.com/genomewalker/e06c8f270e3324f011fd#file-rename-orf-bysample-gos-awk
mawk file-rename-orf-bysample-gos.wk GOS.aa.fasta CAM_PROJ_GOS.smpl.txt
# Combine all translated orfs TARA, OSD2014 and GOS
cat TARA.orfs.aa.fasta OSD.orfs.aa.fasta GOS.orfs.aa.fasta > TARA_OSD2014_GOS.orf.aa.fasta
# Look for pfam domains using UPROC and PFAM27
uproc-prot -z TARA_OSD_GOS.pfam27.txt.gz -p -t 8 /local/biodb/uproc/pfam27 /local/biodb/uproc/model TARA_OSD2014_GOS.orf.aa.fasta
# Get IDs for the hits