# 实现朴素贝叶斯算法
# 2019-6-22
# by llllzy
import os
os.getcwd()
os.chdir('E:\\python')
import numpy as np
import bayes
listOPosts, listClasses = bayes.loadDataSet()
myVocabList = bayes.createVocabList(listOPosts)
setWordsVec = bayes.setofWords2Vec(myVocabList, listOPosts[0])
# 4-2 朴素贝叶斯分类器训练函数
def trainNB0(trainMatrix, trainCategory):
numTrainDocs = len(trainMatrix) # 总文件数
numWords = len(trainMatrix[0]) # 总单词数
pAbusive = sum(trainCategory)/float(numTrainDocs)
p0Num = np.ones(numWords)
p1Num = np.ones(numWords)
p0Denom = 2.0
p1Denom = 2.0
for i in range(numTrainDocs):
if trainCategory[i] == 1:
p1Num += trainMatrix[i]
p1Denom += sum(trainMatrix[i])
else:
p0Num += trainMatrix[i]
p0Denom += sum(trainMatrix[i])
p1Vect = np.log(p1Num/p1Denom)
p0Vect = np.log(p0Num/p0Denom)
return p0Vect,p1Vect,pAbusive
trainMat = []
for postinDoc in listOPosts:
trainMat.append(bayes.setofWords2Vec(myVocabList, postinDoc))
p0V,p1V,pAb = trainNB0(trainMat,listClasses)
bayes.testingNB()