mschecht
1/7/2019 - 3:47 PM

extract_sequences

fastest way to extract sequences from fasta file

# Need to download seqkit
# fx2tab converst a fasta to tabular format
seqkit fx2tab allORFs.fasta | sort -k1,1 --parallel 32 -S20% > allORFs.sorted.tsv

# grep list of headers against tabular fasta then convert back to standard fasta
LC_ALL=C grep -w -F -f <(sort -k1,1 toextract.txt)  allORFs.sorted.tsv | seqkit tab2fx  > toextract.fasta