bindiego
9/10/2014 - 6:44 AM

plain text processing

plain text processing

import sys
import csv
import re

filein = open('lcshtopics-synonymlinkage.tsv', 'r')
#filein = open('abc.tsv', 'r')
fileout = open('out', 'w')

try:
    i = 1
    for line in filein:
        if (i == 1):
            i = i + 1
            continue
        j = 1
        newline = ''
        for token in re.split(r'\t+', line):
            if (j == 2 or j == 3):
                j = j + 1
                continue
            elif (j == 1):
                newline = token
                #print 'line %d: %s' % (i, token)
                j = j + 1
            elif(j == 4):
                newline = newline + '+++++' + token
        fileout.write(newline)
        #print newline
        i = i + 1

finally:
    filein.close()
    fileout.close()