Dcrielaard
8/20/2019 - 11:38 AM

Gensim NLP

from gensim.corpora.dictionary import Dictionary

# Create a Dictionary from list of tokens <TOKEN>: dictionary
dictionary = Dictionary(<TOKEN>)

# Select the id for <WORD>: <word>
<word> = dictionary.token2id.get('<WORD>')

# Print the word
print(dictionary.get(<word>))

# Create a corpus
corpus = [dictionary.doc2bow(<iterable>) for <iterable> in <TOKEN>]