spiderChow
11/28/2017 - 8:04 AM

how to use stanford-ner.jar from python

how to use stanford-ner.jar from python

from jpype import *
import time

jars = ["stanford-ner.jar"]
jvm_path = getDefaultJVMPath()
jvm_cp = "-Djava.class.path={}".format(":".join(jars))
startJVM(jvm_path, jvm_cp)
CRFClassifier = JClass("edu.stanford.nlp.ie.crf.CRFClassifier")
StringUtils = JClass("edu.stanford.nlp.util.StringUtils")
SeqClassifierFlags = JClass("edu.stanford.nlp.sequences.SeqClassifierFlags")

'''
从flags创建classifier实例
    Properties props = StringUtils.argsToProperties(args);
    SeqClassifierFlags flags = new SeqClassifierFlags(props);
    CRFClassifier<CoreLabel> crf = chooseCRFClassifier(flags);
仿照传参过程模仿的是edu/stanford/nlp/ie/crf/CRFClassifier.java中的main函数的做法。
StringUtils.stringToProperties函数可以将string类型转化为Properties object,因此可以用字符串来描述命令行参数。
具体命令参数选项可以参看edu/stanford/nlp/sequences/SeqClassifierFlags.java 中描述
其他使用说明可以参照edu/stanford/nlp/ie/demo/NERDemo.java

'''


props = StringUtils.stringToProperties(
    " tokenizerFactory=edu.stanford.nlp.process.WhitespaceTokenizer, tokenizerOptions=tokenizeNLs=true")
flags = SeqClassifierFlags(props)
crf = CRFClassifier(flags)
crf.loadClassifierNoExceptions("ner-model.ser.gz", props)
result = crf.classifyToString("show the top #ticket# in this year.","slashTags",False)
print(result)