lh3
8/26/2016 - 10:36 PM

3732-to-labels.pl

#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Std;

# ReCount download link: http://duffel.rail.bio/recount/SRP012682/SRP012682.tsv

my %opts = (c=>30);
getopts('mc:e', \%opts);

die("Usage: gtex2feat.pl [-me] [-c $opts{c}] <SRP012682.tsv>\n") if @ARGV == 0 && -t STDIN;

my (@a, %cnt, %extract);
while (<>) {
	next if $. == 1;
	chomp;
	my @t = split("\t");
	next if $t[8] eq 'TRUE';
	my $t = $t[25];
	$t =~ tr/ /_/;
	my $g = $t[18];
	my $male = ($g =~ /_male_/)? 1 : 0;
	my $female = 1 - $male;
	my $e = $t[30];
	$e =~ tr/ /_/;
	$extract{$e} = 1;
	if (defined $opts{m}) {
		if ($t =~ /Cerebell/) {
			$t = "Cerebellum";
		} elsif ($t =~ /Brain/) {
			$t = "Brain";
		}
	}
	$cnt{$t} = 0 unless defined($cnt{$t});
	++$cnt{$t};
	push(@a, [$t[3], $t, $male, $female, $e]) if ($. > 1 && $t ne "");
}

my $id = 0;
my %tissues;
for my $t (sort keys(%cnt)) {
	$tissues{$t} = $id++ if $cnt{$t} >= $opts{c};
	warn("Warning: dropped tissue '$t'\n") if $cnt{$t} < $opts{c};
}

if (defined $opts{e}) {
	$id = 0;
	for my $e (sort keys(%extract)) {
		$extract{$e} = $id++;
	}
	print(join("\t", "#sample", "male", "female", sort(keys(%extract)), sort(keys(%tissues))), "\n");
} else {
	print(join("\t", "#sample", "male", "female", sort(keys(%tissues))), "\n");
}

for my $x (@a) {
	next unless defined($tissues{$x->[1]});
	my (@b, @e);
	for (my $i = 0; $i < scalar(keys(%tissues)); ++$i) {
		$b[$i] = 0;
	}
	$b[$tissues{$x->[1]}] = 1;
	if (defined $opts{e}) {
		for (my $i = 0; $i < scalar(keys(%extract)); ++$i) {
			$e[$i] = 0;
		}
		$e[$extract{$x->[4]}] = 1;
		print(join("\t", $x->[0], $x->[2], $x->[3], @e, @b), "\n");
	} else {
		print(join("\t", $x->[0], $x->[2], $x->[3], @b), "\n");
	}
}
#!/usr/bin/perl

use strict;
use warnings;

die("Usage: gtex-eval.pl <truth.y52.snd> <predict.y52.snd>\n") if @ARGV < 2;

open(TRUTH, $ARGV[0]=~/\.gz$/? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die;
open(PREDICT, $ARGV[1]=~/\.gz$/? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die;

my $lineno = 0;
my @tissues;
my @g = (0, 0);
my @ti = (0, 0);
while (1) {
	my $t = <TRUTH>;
	my $p = <PREDICT>;
	last unless $t && $p;
	chomp($t);
	chomp($p);
	++$lineno;
	my @t = split("\t", $t);
	my @p = split("\t", $p);
	if ($lineno == 1) {
		@tissues = @t;
	} else {
		my $start;
		if (@t == 53 || @t == 38) {
			my $tg = $t[1] > $t[2]? 0 : 1;
			my $pg = $p[1] > $p[2]? 0 : 1;
			if ($tg == $pg) {
				++$g[0];
			} else {
				++$g[1];
				print("GE\t$t[0]\t$t[1]\t$t[2]\n");
			}
			$start = 3;
		} elsif (@t == 51 || @t == 36 || @t == 40) {
			$start = 1;
		} else {
			die;
		}
		my ($max, $tt, $pt) = (-1, -1, -1);
		for (my $i = $start; $i < @t; ++$i) {
			($max, $tt) = ($t[$i], $i) if $max < $t[$i];
		}
		$max = -1;
		for (my $i = $start; $i < @p; ++$i) {
			($max, $pt) = ($p[$i], $i) if $max < $p[$i];
		}
		printf("TT\t$t[0]\t%.3f\t$tissues[$tt]\n", $p[$tt]);
		if ($pt == $tt) {
			++$ti[0];
		} else {
			++$ti[1];
			printf("TE\t$t[0]\t%.3f\t%.3f\t$tissues[$tt]\t$tissues[$pt]\n", $p[$tt], $p[$pt]);
		}
	}
}

close(PREDICT);
close(TRUTH);

printf("GA\t%.2f%%\n", 100 * $g[0] / ($g[0] + $g[1])) if @tissues == 53 || @tissues == 38;
printf("TA\t%.2f%%\n", 100 * $ti[0] / ($ti[0] + $ti[1]));
#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Std;

my %opts = (t=>'ct', m=>50);
getopts('t:m:3', \%opts);

my $type;
if ($opts{t} eq 'p') { # body part
	$type = 0;
} elsif ($opts{t} eq 'ct') { # cell type
	$type = 1;
} elsif ($opts{t} eq 'd') { # disease status
	$type = 2;
} elsif ($opts{t} eq 'cl') { # cell line
	$type = 3;
}

my (@a, %c);
while (<>) {
	chomp;
	next if $. == 1;
	tr/ /_/;
	my @t = split("\t");
	my $key;
	if ($type == 0) {
		$key = $t[6];
		$key = undef if $key =~ /_(and|or)_/; # skip mixed tissues
	} elsif ($type == 2) {
		$key = $t[8];
	} elsif ($type == 1) {
		my $a = $t[3];
		my $b = $t[6];
		my $c = $t[5];
		$key = $c eq '__'? "$b:$a" : "$c:$a";
		$key =~ s/__/\*/g;
		$key = undef if $key eq '*:*' || $key =~ /_(and|or)_/;
	} elsif ($type == 3) {
		my $a = $t[4];
		my $b = $t[6];
		$b = '*' if $b eq '__';
		$key = "$b:$a";
		$key = undef if $a eq '__';
	}
	if ($key && $key ne '__' && $key ne '-') {
		push(@a, [$t[0], $key]);
		$c{$key} = 0 unless defined $c{$key};
		++$c{$key};
	}
}

my $id = 0;
my %h;
for my $key (sort keys %c) {
	$h{$key} = $id++ if $c{$key} >= $opts{m};
}
unless (defined $opts{3}) {
	print join("\t", '#sample', sort(keys %h)), "\n";
}

for my $x (@a) {
	next unless defined $h{$x->[1]};
	if (defined $opts{3}) {
		print join("\t", $x->[0], $h{$x->[1]}, $x->[1]), "\n";
	} else {
		my $n = scalar(keys %h);
		my @b;
		for (my $i = 0; $i < $n; ++$i) {
			$b[$i] = 0;
		}
		$b[$h{$x->[1]}] = 1;
		print join("\t", $x->[0], @b), "\n";
	}
}