%pylab inline
import pandas as pd
Populating the interactive namespace from numpy and matplotlib
data = pd.read_csv('covid.csv')
data
Trainingdata = []
Traininganswers = []
for i in range(0,len(data['Person'])):
Trainingdata.append([1, data['loves going out'][i],data['cautious of diseases'][i]])
for i in range(0,len(data['Person'])):
if data['has coronavirus'][i] == 'Yes':
Traininganswers.append(1)
else:
Traininganswers.append(-1)
Trainingdata = np.array(Trainingdata)
Traininganswers = np.array(Traininganswers)
print(Trainingdata)
print(Traininganswers)
[[1 3 1]
[1 2 5]
[1 5 3]
[1 5 2]
[1 2 3]
[1 5 3]
[1 2 4]
[1 3 1]
[1 1 4]
[1 3 2]
[1 2 3]
[1 2 2]
[1 2 1]
[1 2 5]
[1 1 3]
[1 5 2]
[1 1 3]
[1 3 5]
[1 2 1]
[1 2 5]
[1 1 3]
[1 5 2]
[1 3 4]
[1 4 3]
[1 4 1]
[1 5 4]
[1 5 3]
[1 1 2]
[1 3 3]
[1 4 1]
[1 2 5]
[1 1 3]
[1 5 2]
[1 3 2]
[1 1 5]
[1 2 4]
[1 2 5]
[1 1 1]
[1 3 4]
[1 4 3]]
[ 1 -1 1 1 -1 1 -1 1 -1 1 -1 1 1 -1 -1 1 -1 -1 1 -1 -1 1 -1 1
1 1 1 -1 1 1 -1 -1 1 1 -1 -1 -1 -1 -1 1]
xlim(0,6) # plot x from 0 to 4
ylim(0,6) # plot y from 0 to 4
have_covid = (Traininganswers==1) # boolean, have covid?
not_have_covid = invert(have_covid) # boolean, inverse of have covid?
title("Covid data")
xlabel("Do you like going out?")
ylabel("Are you cautious of diseases?")
plot(Trainingdata[have_covid,1],Trainingdata[have_covid,2],'b.',label="covid")
plot(Trainingdata[not_have_covid,1],Trainingdata[not_have_covid,2],'r.',label="no covid")
legend()
grid()
def calc_error(y_arr, x_arr, w, train=False):
err_sum=0.0
for j in range(len(y_arr)):
y_out = (x_arr[j].dot(w)>=0.0) and 1 or -1 # make a prediction, +1 (versi) or -1 (vir)
err = y_arr[j]-y_out # compare to known species
err_sum += abs(err) and 1 or 0 # accumulate the total error
if train:
w += eta*err*x_arr[j] # adjust the weights during training only
return err_sum
def train(y, x, w):
return calc_error(y, x, w, train=True)
def test(y, x, w):
return calc_error(y, x, w)
eta=0.1 # how much should we "nudge" the weights each time?
w = rand(len(Trainingdata[0]))*.05 # random array of "small" weights
err_test=[]
err_train=[]
train_iters=50
for i in range(train_iters):
err_train.append(train(Traininganswers[:6], Trainingdata[:6], w))
err_test.append(test(Traininganswers[6:], Trainingdata[6:], w))
title("Training and testing error.")
plot(range(train_iters),err_train,'b', label="Train")
plot(range(train_iters),err_test,'r', label="Test")
xlabel("Iteration")
ylabel("Error")
legend()
grid()
xx,yy = meshgrid(linspace(min(Trainingdata[:,1])-1,max(Trainingdata[:,1]+1),100),
linspace(min(Trainingdata[:,2])-1,max(Trainingdata[:,2]+1),100))
z=w[0]+xx*w[1] + yy*w[2]
contourf(xx,yy,z,levels=[-10,0,+10])
title("Covid data")
xlabel("Do you like going out?")
ylabel("Are you cautious of diseases?")
plot(Trainingdata[have_covid,1],Trainingdata[have_covid,2],'b.',label="covid")
plot(Trainingdata[not_have_covid,1],Trainingdata[not_have_covid,2],'r.',label="no covid")
legend()
grid()