s4553711
9/23/2013 - 6:53 AM

FastaMD5.java

d89517b400226d3b56e753972a7cad67	chr17_ctg5_hap1	1680828
641e4338fa8d52a5b781bd2a2c08d3c3	chr3	198022430
fa24f81b680df26bcfb6d69b784fbe36	chr4_ctg9_hap1	590426
fe71bc63420d666884f37a3ad79f3317	chr6_apd_hap1	4622290
18c17e1641ef04873b15f40f6c8659a4	chr6_cox_hap2	4795371
2a3c677c426a10e137883ae1ffb8da3f	chr6_dbb_hap3	4610396
9d51d4152174461cd6715c7ddc588dc8	chr6_mann_hap4	4683263
efed415dd8742349cb7aaca054675b9a	chr6_mcf_hap5	4833398
094d037050cad692b57ea12c4fef790f	chr6_qbl_hap6	4611984
3b6d666200e72bcc036bf88a4d7e0749	chr6_ssto_hap7	4928567
d2ed829b8a1628d16cbeee88e88e39eb	chrM	16571
1e86411d73e6f00a10590f976be01623	chrY	59373566


fdfd811849cc2fadebc929bb925902e5	3 dna:chromosome chromosome:GRCh37:3:1:198022430:1	198022430
c68f52674c9fb33aef52dcf399755519	MT gi|251831106|ref|NC_012920.1| Homo sapiens mitochondrion, complete genome	16569
1fa3474750af0948bdf97d5a0ee52e51	Y dna:chromosome chromosome:GRCh37:Y:2649521:59034049:1	59373566
1b22b98cdeb4a9304cb5d48026a85128	1 dna:chromosome chromosome:GRCh37:1:1:249250621:1	chr1
988c28e000e84c26d552359af1ea2e1d	10 dna:chromosome chromosome:GRCh37:10:1:135534747:1	chr10
98c59049a2df285c76ffb1c6db8f8b96	11 dna:chromosome chromosome:GRCh37:11:1:135006516:1	chr11
06cbf126247d89664a4faebad130fe9c	GL000202.1 dna:supercontig supercontig::GL000202.1:1:40103:1	chr11_gl000202_random
51851ac0e1a115847ad36449b0015864	12 dna:chromosome chromosome:GRCh37:12:1:133851895:1	chr12
283f8d7892baa81b510a015719ca7b0b	13 dna:chromosome chromosome:GRCh37:13:1:115169878:1	chr13
98f3cae32b2a2e9524bc19813927542e	14 dna:chromosome chromosome:GRCh37:14:1:107349540:1	chr14
e5645a794a8238215b2cd77acb95a078	15 dna:chromosome chromosome:GRCh37:15:1:102531392:1	chr15
fc9b1a7b42b97a864f56b348b06095e6	16 dna:chromosome chromosome:GRCh37:16:1:90354753:1	chr16
351f64d4f4f9ddd45b35336ad97aa6de	17 dna:chromosome chromosome:GRCh37:17:1:81195210:1	chr17
96358c325fe0e70bee73436e8bb14dbd	GL000203.1 dna:supercontig supercontig::GL000203.1:1:37498:1	chr17_gl000203_random
efc49c871536fa8d79cb0a06fa739722	GL000204.1 dna:supercontig supercontig::GL000204.1:1:81310:1	chr17_gl000204_random
d22441398d99caf673e9afb9a1908ec5	GL000205.1 dna:supercontig supercontig::GL000205.1:1:174588:1	chr17_gl000205_random
43f69e423533e948bfae5ce1d45bd3f1	GL000206.1 dna:supercontig supercontig::GL000206.1:1:41001:1	chr17_gl000206_random
b15d4b2d29dde9d3e4f93d1d0f2cbc9c	18 dna:chromosome chromosome:GRCh37:18:1:78077248:1	chr18
f3814841f1939d3ca19072d9e89f3fd7	GL000207.1 dna:supercontig supercontig::GL000207.1:1:4262:1	chr18_gl000207_random
1aacd71f30db8e561810913e0b72636d	19 dna:chromosome chromosome:GRCh37:19:1:59128983:1	chr19
aa81be49bf3fe63a79bdc6a6f279abf6	GL000208.1 dna:supercontig supercontig::GL000208.1:1:92689:1	chr19_gl000208_random
f40598e2a5a6b26e84a3775e0d1e2c81	GL000209.1 dna:supercontig supercontig::GL000209.1:1:159169:1	chr19_gl000209_random
d75b436f50a8214ee9c2a51d30b2c2cc	GL000191.1 dna:supercontig supercontig::GL000191.1:1:106433:1	chr1_gl000191_random
325ba9e808f669dfeee210fdd7b470ac	GL000192.1 dna:supercontig supercontig::GL000192.1:1:547496:1	chr1_gl000192_random
a0d9851da00400dec1098a9255ac712e	2 dna:chromosome chromosome:GRCh37:2:1:243199373:1	chr2
0dec9660ec1efaaf33281c0d5ea2560f	20 dna:chromosome chromosome:GRCh37:20:1:63025520:1	chr20
2979a6085bfe28e3ad6f552f361ed74d	21 dna:chromosome chromosome:GRCh37:21:1:48129895:1	chr21
851106a74238044126131ce2a8e5847c	GL000210.1 dna:supercontig supercontig::GL000210.1:1:27682:1	chr21_gl000210_random
a718acaa6135fdca8357d5bfe94211dd	22 dna:chromosome chromosome:GRCh37:22:1:51304566:1	chr22
23dccd106897542ad87d2765d28a19a1	4 dna:chromosome chromosome:GRCh37:4:1:191154276:1	chr4
dbb6e8ece0b5de29da56601613007c2a	GL000193.1 dna:supercontig supercontig::GL000193.1:1:189789:1	chr4_gl000193_random
6ac8f815bf8e845bb3031b73f812c012	GL000194.1 dna:supercontig supercontig::GL000194.1:1:191469:1	chr4_gl000194_random
0740173db9ffd264d728f32784845cd7	5 dna:chromosome chromosome:GRCh37:5:1:180915260:1	chr5
1d3a93a248d92a729ee764823acbbc6b	6 dna:chromosome chromosome:GRCh37:6:1:171115067:1	chr6
618366e953d6aaad97dbe4777c29375e	7 dna:chromosome chromosome:GRCh37:7:1:159138663:1	chr7
5d9ec007868d517e73543b005ba48535	GL000195.1 dna:supercontig supercontig::GL000195.1:1:182896:1	chr7_gl000195_random
96f514a9929e410c6651697bded59aec	8 dna:chromosome chromosome:GRCh37:8:1:146364022:1	chr8
d92206d1bb4c3b4019c43c0875c06dc0	GL000196.1 dna:supercontig supercontig::GL000196.1:1:38914:1	chr8_gl000196_random
6f5efdd36643a9b8c8ccad6f2f1edc7b	GL000197.1 dna:supercontig supercontig::GL000197.1:1:37175:1	chr8_gl000197_random
3e273117f15e0a400f01055d9f393768	9 dna:chromosome chromosome:GRCh37:9:1:141213431:1	chr9
868e7784040da90d900d2d1b667a1383	GL000198.1 dna:supercontig supercontig::GL000198.1:1:90085:1	chr9_gl000198_random
569af3b73522fab4b40995ae4944e78e	GL000199.1 dna:supercontig supercontig::GL000199.1:1:169874:1	chr9_gl000199_random
75e4c8d17cd4addf3917d1703cacaf25	GL000200.1 dna:supercontig supercontig::GL000200.1:1:187035:1	chr9_gl000200_random
dfb7e7ec60ffdcb85cb359ea28454ee9	GL000201.1 dna:supercontig supercontig::GL000201.1:1:36148:1	chr9_gl000201_random
7daaa45c66b288847b9b32b964e623d3	GL000211.1 dna:supercontig supercontig::GL000211.1:1:166566:1	chrUn_gl000211
563531689f3dbd691331fd6c5730a88b	GL000212.1 dna:supercontig supercontig::GL000212.1:1:186858:1	chrUn_gl000212
9d424fdcc98866650b58f004080a992a	GL000213.1 dna:supercontig supercontig::GL000213.1:1:164239:1	chrUn_gl000213
46c2032c37f2ed899eb41c0473319a69	GL000214.1 dna:supercontig supercontig::GL000214.1:1:137718:1	chrUn_gl000214
5eb3b418480ae67a997957c909375a73	GL000215.1 dna:supercontig supercontig::GL000215.1:1:172545:1	chrUn_gl000215
642a232d91c486ac339263820aef7fe0	GL000216.1 dna:supercontig supercontig::GL000216.1:1:172294:1	chrUn_gl000216
6d243e18dea1945fb7f2517615b8f52e	GL000217.1 dna:supercontig supercontig::GL000217.1:1:172149:1	chrUn_gl000217
1d708b54644c26c7e01c2dad5426d38c	GL000218.1 dna:supercontig supercontig::GL000218.1:1:161147:1	chrUn_gl000218
f977edd13bac459cb2ed4a5457dba1b3	GL000219.1 dna:supercontig supercontig::GL000219.1:1:179198:1	chrUn_gl000219
fc35de963c57bf7648429e6454f1c9db	GL000220.1 dna:supercontig supercontig::GL000220.1:1:161802:1	chrUn_gl000220
3238fb74ea87ae857f9c7508d315babb	GL000221.1 dna:supercontig supercontig::GL000221.1:1:155397:1	chrUn_gl000221
6fe9abac455169f50470f5a6b01d0f59	GL000222.1 dna:supercontig supercontig::GL000222.1:1:186861:1	chrUn_gl000222
399dfa03bf32022ab52a846f7ca35b30	GL000223.1 dna:supercontig supercontig::GL000223.1:1:180455:1	chrUn_gl000223
d5b2fc04f6b41b212a4198a07f450e20	GL000224.1 dna:supercontig supercontig::GL000224.1:1:179693:1	chrUn_gl000224
63945c3e6962f28ffd469719a747e73c	GL000225.1 dna:supercontig supercontig::GL000225.1:1:211173:1	chrUn_gl000225
1c1b2cd1fccbc0a99b6a447fa24d1504	GL000226.1 dna:supercontig supercontig::GL000226.1:1:15008:1	chrUn_gl000226
a4aead23f8053f2655e468bcc6ecdceb	GL000227.1 dna:supercontig supercontig::GL000227.1:1:128374:1	chrUn_gl000227
c5a17c97e2c1a0b6a9cc5a6b064b714f	GL000228.1 dna:supercontig supercontig::GL000228.1:1:129120:1	chrUn_gl000228
d0f40ec87de311d8e715b52e4c7062e1	GL000229.1 dna:supercontig supercontig::GL000229.1:1:19913:1	chrUn_gl000229
b4eb71ee878d3706246b7c1dbef69299	GL000230.1 dna:supercontig supercontig::GL000230.1:1:43691:1	chrUn_gl000230
ba8882ce3a1efa2080e5d29b956568a4	GL000231.1 dna:supercontig supercontig::GL000231.1:1:27386:1	chrUn_gl000231
3e06b6741061ad93a8587531307057d8	GL000232.1 dna:supercontig supercontig::GL000232.1:1:40652:1	chrUn_gl000232
7fed60298a8d62ff808b74b6ce820001	GL000233.1 dna:supercontig supercontig::GL000233.1:1:45941:1	chrUn_gl000233
93f998536b61a56fd0ff47322a911d4b	GL000234.1 dna:supercontig supercontig::GL000234.1:1:40531:1	chrUn_gl000234
118a25ca210cfbcdfb6c2ebb249f9680	GL000235.1 dna:supercontig supercontig::GL000235.1:1:34474:1	chrUn_gl000235
fdcd739913efa1fdc64b6c0cd7016779	GL000236.1 dna:supercontig supercontig::GL000236.1:1:41934:1	chrUn_gl000236
e0c82e7751df73f4f6d0ed30cdc853c0	GL000237.1 dna:supercontig supercontig::GL000237.1:1:45867:1	chrUn_gl000237
131b1efc3270cc838686b54e7c34b17b	GL000238.1 dna:supercontig supercontig::GL000238.1:1:39939:1	chrUn_gl000238
99795f15702caec4fa1c4e15f8a29c07	GL000239.1 dna:supercontig supercontig::GL000239.1:1:33824:1	chrUn_gl000239
445a86173da9f237d7bcf41c6cb8cc62	GL000240.1 dna:supercontig supercontig::GL000240.1:1:41933:1	chrUn_gl000240
ef4258cdc5a45c206cea8fc3e1d858cf	GL000241.1 dna:supercontig supercontig::GL000241.1:1:42152:1	chrUn_gl000241
2f8694fc47576bc81b5fe9e7de0ba49e	GL000242.1 dna:supercontig supercontig::GL000242.1:1:43523:1	chrUn_gl000242
cc34279a7e353136741c9fce79bc4396	GL000243.1 dna:supercontig supercontig::GL000243.1:1:43341:1	chrUn_gl000243
0996b4475f353ca98bacb756ac479140	GL000244.1 dna:supercontig supercontig::GL000244.1:1:39929:1	chrUn_gl000244
89bc61960f37d94abf0df2d481ada0ec	GL000245.1 dna:supercontig supercontig::GL000245.1:1:36651:1	chrUn_gl000245
e4afcd31912af9d9c2546acf1cb23af2	GL000246.1 dna:supercontig supercontig::GL000246.1:1:38154:1	chrUn_gl000246
7de00226bb7df1c57276ca6baabafd15	GL000247.1 dna:supercontig supercontig::GL000247.1:1:36422:1	chrUn_gl000247
5a8e43bec9be36c7b49c84d585107776	GL000248.1 dna:supercontig supercontig::GL000248.1:1:39786:1	chrUn_gl000248
1d78abec37c15fe29a275eb08d5af236	GL000249.1 dna:supercontig supercontig::GL000249.1:1:38502:1	chrUn_gl000249
7e0e2e580297b7764e31dbc80c2540dd	X dna:chromosome chromosome:GRCh37:X:1:155270560:1	chrX
$ curl -s  "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/human_g1k_v37.fasta.gz"  | gunzip -c  | java FastaMD5 > a.txt

$ curl -s "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz" | gunzip -c | tar Oxvf - 2> /dev/null | java FastaMD5 > b.txt

##join
$ join -t '    ' -1 2 -2 2 <(sort -t ' ' -k2,2 a.txt ) <(sort -t '        ' -k2,2 b.txt ) | cut -d ' ' -f 1,2,4 | sort -t '   ' -k3,3

#unjoinable
$ join -t '    ' -1 2 -2 2 -v 1 -v 2  <(sort -t '      ' -k2,2 a.txt ) <(sort -t '        ' -k2,2 b.txt )   | sort -t '       ' -k2,2
import java.io.*;
import java.security.MessageDigest;

public class FastaMD5
{
public static void main(String args[]) throws Exception
       {
       int len=0;
       byte[] buffer = new byte[1];
       MessageDigest complete = null;
	for(;;)
		{
		int c=System.in.read();
		switch(c)
			{
			case -1: case '>':
				{
				if(complete!=null)
					{
					for(byte b:complete.digest())
				       		{
					   	System.out.print(Integer.toString( (b & 0xff ) + 0x100, 16).substring( 1 ));
				       		}
				       	System.out.println("\t"+len);
				       	complete=null;
				       	len=0;
					}
				if(c==-1) return;
				while((c=System.in.read())!=-1 && c!='\n') System.out.print((char)c);
				System.out.print('\t');
				complete=MessageDigest.getInstance("MD5");
				len=0;
				break;
				} 
			case '\n':case ' ':case '\r': break;
			default:
				{
				buffer[0]=(byte)Character.toUpperCase(c);
				complete.update(buffer, 0, 1);
				++len;
				break;
				}
			}
   		}
	}
}