lzykevin
5/3/2020 - 4:21 AM

实现朴素贝叶斯算法

# 实现朴素贝叶斯算法
# 2019-6-22
# by llllzy
import os
os.getcwd()
os.chdir('E:\\python')
import numpy as np


import bayes
listOPosts, listClasses = bayes.loadDataSet()
myVocabList = bayes.createVocabList(listOPosts)
setWordsVec = bayes.setofWords2Vec(myVocabList, listOPosts[0])

# 4-2 朴素贝叶斯分类器训练函数
def trainNB0(trainMatrix, trainCategory):
    numTrainDocs = len(trainMatrix) # 总文件数
    numWords = len(trainMatrix[0]) # 总单词数
    pAbusive = sum(trainCategory)/float(numTrainDocs)
    p0Num = np.ones(numWords)
    p1Num = np.ones(numWords)
    p0Denom = 2.0
    p1Denom = 2.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    p1Vect = np.log(p1Num/p1Denom)
    p0Vect = np.log(p0Num/p0Denom)
    return p0Vect,p1Vect,pAbusive


trainMat = []
for postinDoc in listOPosts:
    trainMat.append(bayes.setofWords2Vec(myVocabList, postinDoc))

p0V,p1V,pAb = trainNB0(trainMat,listClasses)

bayes.testingNB()