yuliji
8/23/2017 - 8:53 AM

language identification langid

language identification langid

import langid
>>> langid.classify("I do not speak english")
('en', 0.57133487679900674)
>>> langid.set_languages(['de','fr','it'])
>>> langid.classify("I do not speak english")
('it', 0.99999835791478453)
>>> langid.set_languages(['en','it'])
>>> langid.classify("I do not speak english")
('en', 0.99176190378750373)


>> from langid.langid import LanguageIdentifier, model
>> identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
>> identifier.classify("This is a test")
('en', 0.9999999909903544)