wut0n9
1/9/2019 - 11:48 AM

compute_idf_df_example.py

"""Calculates frequencies of terms in documents and in corpus. Also computes inverse document frequencies."""
for document in self.corpus:
    frequencies = {}
    self.doc_len.append(len(document))
    for word in document:
        if word not in frequencies:
            frequencies[word] = 0
        frequencies[word] += 1
    self.f.append(frequencies)

    for word, freq in iteritems(frequencies):
        if word not in self.df:
            self.df[word] = 0
        self.df[word] += 1

for word, freq in iteritems(self.df):
    self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5)