lh3
7/30/2015 - 4:36 PM

sum-decoy.js

var max_leftover = 0.1, max_diff = 0.05, min_mapq = 20;

var file = arguments.length == 0? new File() : new File(arguments[0]);
var buf = new Bytes();

var re = /(\d+)([MIDSH])/g;
var decoy = {};
while (file.readline(buf) >= 0) {
	var m, line = buf.toString();
	if ((m = /^@SQ.*\tSN:(\S+).*\tLN:(\d+)/.exec(line)) != null) {
		if (/_decoy/.test(m[1])) decoy[m[1]] = {len:parseInt(m[2]), reg:[]};
	} else if (line.charAt(0) == '@') {
	} else {
		var t = line.split("\t");
		if (decoy[t[2]] == null) continue;
		if (parseInt(t[4]) < min_mapq) continue;
		if (parseInt(t[1])&4) continue;
		if ((m = /\tNM:i:(\d+)/.exec(line)) == null) continue;
		var NM = parseInt(m[1]);
		var start = parseInt(t[3]) - 1, end, n_M = 0, n_I = 0, n_D = 0, nn_I = 0, nn_D = 0, clip = [0, 0];
		while ((m = re.exec(t[5])) != null) {
			var l = parseInt(m[1]);
			if (m[2] == 'M') n_M += l;
			else if (m[2] == 'I') n_I += l, ++nn_I;
			else if (m[2] == 'D') n_D += l, ++nn_D;
			else if (m[2] == 'S' || m[2] == 'H') clip[n_M == 0? 0 : 1] += l;
		}
		end = start + n_M + n_D;
		var diff = NM - n_I - n_D + nn_I + nn_D;
		var hang_l = clip[0] < start? clip[0] : start;
		var hang_r = clip[1] < decoy[t[2]].len - end? clip[1] : decoy[t[2]].len - end;
		var leftover = hang_l + hang_r;
//		print(start, end, NM, leftover);
		if (leftover / (n_M + n_I + leftover) > max_leftover) continue;
		if (diff / (n_M + n_I + n_D) > max_diff) continue;
		decoy[t[2]].reg.push([start, end]);
	}
}

buf.destroy();
file.close();

var keys = [];
for (var x in decoy) keys.push(x);
keys.sort();
for (var k = 0; k < keys.length; ++k) {
	var x = keys[k], t = decoy[x];
	t.reg.sort(function(a,b){return a-b;});
	var start = 0, end = 0, len = 0;
	for (var i = 0; i < t.reg.length; ++i) {
		if (t.reg[i][0] > end) {
			len += end - start;
			start = t.reg[i][0], end = t.reg[i][1];
		} else end = end > t.reg[i][1]? end : t.reg[i][1];
	}
	len += end - start;
	print(x, t.len, len);
}