seq_mass.pl

9/4/2012 - 1:50 AM

#!/usr/bin/perl

use strict;
use warnings;

# the specification is  https://dl.dropbox.com/u/40857893/cv/spTag_database_creation.docx
# the input file is https://dl.dropbox.com/u/40857893/cv/Input.fasta
#use Bio::SeqIO; fail to install the bioperl, so analysis the seq manually
my %mass_table=(
A => 71.03711,
C => 156.10111,
D => 114.04293,
E => 115.02694,
F => 103.00919,
G => 129.04259,
H => 128.05858,
I => 57.02146,
K => 137.05891,
L => 113.08406,
M => 113.08406,
N => 128.09496,
P => 131.04049,
Q => 147.06841,
R => 97.05276,
S => 87.03203,
T => 101.04768,
V => 186.07931,
W => 163.06333,
Y => 99.06841,
);

#digest seq->peps
sub digest {
	my $seq = shift;
	$seq =~ s/R/R;/g;
	$seq =~ s/K/K;/g;
	split(/;/,$seq);
}

#mass pep->mass
sub mass {
	my $pep = shift;
	my $mass_pep = 0;
	foreach my $amino (split //, $pep) {
		$mass_pep = $mass_pep + $mass_table{$amino};
	}
	return $mass_pep;
}

##################################################
# seqs process begins here.
# "input.fasta" should be the file to be processed
##################################################
open INPUT, "<", "input.fasta" or die $!;
open OUT1, ">", "outfile1.txt" or die $!;

my @mass_freq = ();

local $/ = '>';

while( <INPUT> ) {
    chomp;
    my ($desc,$seq) = split("\n",$_,2);
	if ($seq){
	$seq =~ s[\n][]g;
		my @peps = digest($seq);
		foreach my $pep (sort {length($a) <=> length($b)} @peps)
		{
			my $mass_temp = mass($pep);
			if ($mass_temp >= 400 && $mass_temp <= 6000){
			push(@mass_freq, $mass_temp);
			printf OUT1 ">Mass= %.2f | %s\n\n %s \n\n", $mass_temp,$desc,$pep;
			}
		}
	}
}
close INPUT;
close OUT1;

open  OUT2, ">", "outfile2.txt" or die $!;
# print freq table;
my %freq;
foreach my $mas (@mass_freq) {
	$freq{$mas}++;
}

print OUT2 "Mass Frequency\n";
foreach my $mas (sort {$a <=> $b} keys %freq) {
    printf OUT2 "%.2f %d\n",$mas,$freq{$mas};
}
close OUT2;
Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

seq_mass.pl