Dcrielaard
8/19/2019 - 12:13 PM

Bag of Words

from nltk import word_tokenize
from collections import Counter
from nltk.corpus import stopwords

text = "Dennis is a great guy to work and live with!"

# Primary solution
tokens = word_tokenize(text)

# Use list comprehension to put everything to lower case
tokens = [w for w in word_tokenize(text.lower())]

# Use list comprehension to remove stopwords
no_stop = [t for t in tokens if t not in stopwords.words('dutch')]

BoW = Counter(no_stop)
print(BoW)
print(BoW.most_common(2))