ca5
4/8/2014 - 1:27 AM

cdb

cdb

$ cat ../tool/grep-tsv-by-cdb.py 
#!/usr/bin/env python
'''
tsvのレコード中にcdbに登録された単語があれば
tsvの1カラム目を出力
'''

import sys
import cdb

argv = sys.argv
argv.pop(0)

cdb_reader = cdb.init(argv.pop(0))

for line in sys.stdin:
    line_list = line.split('\t')
    for word in line_list:
        if cdb_reader.get(word) != None:
            print line_list[0]
            continue
#!/usr/bin/env python
import sys
import cdb

argv = sys.argv
argv.pop(0)

cdb_reader = cdb.init(argv.pop(0))

for key in argv:
    print "key:%s, value:%s" %(key, cdb_reader.get(key))
#!/usr/bin/env python
import sys
import cdb

cdb_reader = cdb.init(sys.argv[1])

for key in cdb_reader.keys():
    #print "key:%s, value:%s" %(key, cdb_reader.get(key))
    print "\t".join([key, cdb_reader.get(key)])
#!/usr/bin/python 
'''
needs python-cdb
'''
import cdb
import sys


def mkcdb(out_file):
    maker = cdb.cdbmake(out_file, out_file + '.tmp')
    try:
        for line in sys.stdin:
            #print line
            line = line.rstrip()
            list = line.split("\t")
            key = list[0]
            value = "1"
            if len(list) > 1:
                key = list[1]
            maker.add(key, value)
    except:
        print("open stdin error")
    maker.finish()
    del(maker)

def main():
    argvs = sys.argv
    if len(argvs) < 2:
        print("usage: %s output_file" %(__file__))
    mkcdb(argvs[1])

if __name__ == "__main__":
    main()