Associations Measures NLTK
bigram_measures = nltk.collocations.BigramAssocMeasures()
trigram_measures = nltk.collocations.TrigramAssocMeasures()
# change this to read in your data
finder = BigramCollocationFinder.from_words(
nltk.corpus.genesis.words('C:\CisionWikiSubject\data.txt'))
# only bigrams that appear 3+ times
finder.apply_freq_filter(2)
# return the 10 n-grams with the highest PMI
x = finder.nbest(bigram_measures.pmi, 20)