import matplotlib.pyplot as plt
0.2*0.95+0.8*0.32
0.2*0.95/0.85
(0.2235294117647059 - 0.2*0.95)/0.8
2*(1/5)+3*(1/5)+4*(1/5)+5*(1/5)+6*(1/5)
x=[0]*20
y=[0]*20
for i in range(20):
x[i]=i+1
y[i]=4*(i+1)*(5/6)**(i+1)
plt.scatter(x,y)
y[5]
y[6]
import numpy as np
def load_data():
with np.load('enron.npz') as data:
return dict(**data)
data = load_data()
trainFeat = data['trainFeat']
trainLabels = data['trainLabels']
testFeat = data['testFeat']
testLabels = data['testLabels']
vocab = data['vocab']
W = len(vocab)
vocabInds = np.arange(W)
trainHam = trainFeat[trainLabels == 0][:, vocabInds]
trainSpam = trainFeat[trainLabels == 1][:, vocabInds]
numHam = len(trainHam)
numSpam = len(trainSpam)
countsHam = np.sum(trainHam, axis=0)
countsSpam = np.sum(trainSpam, axis=0)
p_11_=countsSpam/numSpam
p_10_=countsHam/numHam
p_01_=1-p_11_
p_00_=1-p_10_
print(p_11_)
print(p_10_)
print(p_01_)
print(p_00_)
vocabInds=179
trainHam = trainFeat[trainLabels == 0][:, vocabInds]
trainSpam = trainFeat[trainLabels == 1][:, vocabInds]
numHam = len(trainHam)
numSpam = len(trainSpam)
countsHam = np.sum(trainHam, axis=0)
countsSpam = np.sum(trainSpam, axis=0)
p_11=countsSpam/numSpam
p_10=countsHam/numHam
print(p_11)
print(p_10)
predictLables=testFeat[:,vocabInds]
matchedPrediction= predictLables == testLabels
accuracy= np.sum(matchedPrediction)/len(testLabels)
print(accuracy)
vocabInds=859
trainHam = trainFeat[trainLabels == 0][:, vocabInds]
trainSpam = trainFeat[trainLabels == 1][:, vocabInds]
numHam = len(trainHam)
numSpam = len(trainSpam)
countsHam = np.sum(trainHam, axis=0)
countsSpam = np.sum(trainSpam, axis=0)
p_11=countsSpam/numSpam
p_10=countsHam/numHam
print(p_11)
print(p_10)
predictLables=testFeat[:,vocabInds]
matchedPrediction= predictLables == testLabels
accuracy= 1-(np.sum(matchedPrediction)/len(testLabels))
print(accuracy)
vocabInds=2211
trainHam = trainFeat[trainLabels == 0][:, vocabInds]
trainSpam = trainFeat[trainLabels == 1][:, vocabInds]
numHam = len(trainHam)
numSpam = len(trainSpam)
countsHam = np.sum(trainHam, axis=0)
countsSpam = np.sum(trainSpam, axis=0)
p_11=countsSpam/numSpam
p_10=countsHam/numHam
print(p_11)
print(p_10)
predictLables=testFeat[:,vocabInds]
matchedPrediction= predictLables == testLabels
accuracy= np.sum(matchedPrediction)/len(testLabels)
print(accuracy)
vocabInds=[179, 859, 2211]
p_wordsSpam= p_11_[vocabInds]
p_wordsNotspam=p_10_[vocabInds]
p_notwordsSpam=p_01_[vocabInds]
p_notwordsNotSpam=p_00_[vocabInds]
emailsWord=testFeat[:,vocabInds]
n=len(testFeat)
predictLables=[]
for i in range(n):
p_xy1=1
p_xy0=1
for k in range (3):
if emailsWord[i,k] == True:
p_xy1 = p_xy1 * p_wordsSpam[k]
p_xy0 = p_xy0 * p_wordsNotspam[k]
else:
p_xy1 = p_xy1 * p_notwordsSpam[k]
p_xy0 = p_xy0 * p_notwordsNotSpam[k]
if p_xy1 > p_xy0:
predictLables.append(1)
else:
predictLables.append(0)
vocabInds= np.arange(W)
p_wordsSpam= p_11_[vocabInds]
p_wordsNotspam=p_10_[vocabInds]
p_notwordsSpam=p_01_[vocabInds]
p_notwordsNotSpam=p_00_[vocabInds]
emailsWord=testFeat[:,vocabInds]
n=len(testFeat)
predictLables=[]
for i in range(n):
p_xy1=0
p_xy0=0
for k in range (W):
if emailsWord[i,k] == True:
p_xy1 = p_xy1 + math.log(p_wordsSpam[k])
p_xy0 = p_xy0 + math.log(p_wordsNotspam[k])
else:
p_xy1 = p_xy1 + math.log(p_notwordsSpam[k])
p_xy0 = p_xy0 + math.log(p_notwordsNotSpam[k])
if p_xy1 > p_xy0:
predictLables.append(1)
else:
predictLables.append(0)