swuecho
12/16/2014 - 3:53 AM

fasta_0_parser.pl

use v5.20;
use DDP;

my $fasta = <<'END';
>hello
GCTATATAAGC
>world prot
TATAKEKEKELKL
END

my $parser = do {
    use Regexp::Grammars;
    qr/
    <TOP>
    <nocontext:>
    <token: TOP>  <[record]>+
    <token: record> <.start=(\>)><id><comment>?\n<sequence> 
    <token: id>  [^\-\s\n]+ 
    <token: comment> \s[^\n]+
    <token: sequence> <dna>|<rna>|<aa> 
    <token: dna> [ACGTRYKMSWBDHVNX\-\n]+ 
    <token: rna> [ACGURYKMSWBDHVNX\-\n]+ 
    <token: aa> [A-Z\*\-\n]+ 
    /;
};

if ($fasta =~ $parser) {
    p %/;
}

__END__
{
  TOP   {
    record   [
	      [0] {
		   id         "hello",
		   sequence   {
                     dna   "GCTATATAAGC
"
		   }
		  },
	      [1] {
		   comment    " prot",
		   id         "world",
		   sequence   {
		     dna   "TATAK"
		   }
		  }
	     ]
  }
}