cdb
$ cat ../tool/grep-tsv-by-cdb.py
#!/usr/bin/env python
'''
tsvのレコード中にcdbに登録された単語があれば
tsvの1カラム目を出力
'''
import sys
import cdb
argv = sys.argv
argv.pop(0)
cdb_reader = cdb.init(argv.pop(0))
for line in sys.stdin:
line_list = line.split('\t')
for word in line_list:
if cdb_reader.get(word) != None:
print line_list[0]
continue
#!/usr/bin/env python
import sys
import cdb
argv = sys.argv
argv.pop(0)
cdb_reader = cdb.init(argv.pop(0))
for key in argv:
print "key:%s, value:%s" %(key, cdb_reader.get(key))
#!/usr/bin/env python
import sys
import cdb
cdb_reader = cdb.init(sys.argv[1])
for key in cdb_reader.keys():
#print "key:%s, value:%s" %(key, cdb_reader.get(key))
print "\t".join([key, cdb_reader.get(key)])
#!/usr/bin/python
'''
needs python-cdb
'''
import cdb
import sys
def mkcdb(out_file):
maker = cdb.cdbmake(out_file, out_file + '.tmp')
try:
for line in sys.stdin:
#print line
line = line.rstrip()
list = line.split("\t")
key = list[0]
value = "1"
if len(list) > 1:
key = list[1]
maker.add(key, value)
except:
print("open stdin error")
maker.finish()
del(maker)
def main():
argvs = sys.argv
if len(argvs) < 2:
print("usage: %s output_file" %(__file__))
mkcdb(argvs[1])
if __name__ == "__main__":
main()