import numpy as np
import pandas as pd
df = pd.read_csv("C:/Users/Umaima/Untitled Folder/bank_data.csv", index_col=0, na_values=["?"])
Execution error
FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Umaima/Untitled Folder/bank_data.csv'
df.shape
vs = df.reindex(np.random.permutation(df.index))
vs.head(10)
vs_names = vs.columns.values
vs_names
vs_target = vs.pep
df.dtypes
vs = pd.get_dummies(df) #generating dummy data
vs.head(10)
from sklearn.model_selection import train_test_split
vs_train, vs_test, vs_target_train, vs_target_test = train_test_split(vs, vs_target, test_size=0.2, random_state=33)
print (vs_test.shape)
vs_test[0:5]
(120, 21)
print (vs_train.shape)
vs_train[0:5]
(480, 21)
np.set_printoptions(suppress=True, linewidth=120)
vs_train.head(10)
vs_test
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler().fit(vs_train)
vs_train_norm = min_max_scaler.transform(vs_train)
vs_test_norm = min_max_scaler.transform(vs_test) #preprocessing
np.set_printoptions(precision=2, linewidth=80, suppress=True) #setting precision to 2
vs_train_norm[0:5]
vs_test_norm[0:5]
from sklearn import neighbors, tree, naive_bayes
n_neighbors = 5 #checking for k 5 intially
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print (knnpreds_test)
['YES' 'NO' 'YES' 'YES' 'NO' 'NO' 'NO' 'NO' 'NO' 'YES' 'NO' 'NO' 'NO' 'NO'
'YES' 'NO' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'NO' 'NO' 'YES' 'NO' 'NO'
'NO' 'NO' 'NO' 'YES' 'NO' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'YES' 'YES' 'YES'
'NO' 'NO' 'YES' 'YES' 'NO' 'YES' 'NO' 'NO' 'NO' 'NO' 'NO' 'YES' 'YES' 'YES'
'NO' 'YES' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'NO'
'NO' 'YES' 'NO' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'YES' 'NO' 'NO' 'YES' 'NO'
'NO' 'NO' 'NO' 'YES' 'YES' 'YES' 'NO' 'YES' 'NO' 'NO' 'NO' 'NO' 'YES' 'YES'
'YES' 'NO' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'NO' 'NO' 'YES' 'YES' 'YES'
'YES' 'NO' 'NO' 'NO' 'YES' 'YES' 'NO' 'NO']
from sklearn.metrics import classification_report
print(classification_report(vs_target_test, knnpreds_test))
precision recall f1-score support
NO 0.58 0.55 0.56 69
YES 0.43 0.45 0.44 51
accuracy 0.51 120
macro avg 0.50 0.50 0.50 120
weighted avg 0.51 0.51 0.51 120
from sklearn.metrics import confusion_matrix
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print ("KNN confusion Matrix: ")
print (knncm)
KNN confusion Matrix:
[[38 31]
[28 23]]
print ("KNN Clasification Score test norm: "+str(knnclf.score(vs_test_norm, vs_target_test)))
KNN Clasification Score test norm: 0.5083333333333333
print ("KNN Clasification Score train norm: "+str(knnclf.score(vs_train_norm, vs_target_train)))
KNN Clasification Score train norm: 1.0
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=3)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print (treepreds_test)
['YES' 'NO' 'YES' 'NO' 'NO' 'NO' 'YES' 'NO' 'NO' 'NO' 'NO' 'YES' 'YES' 'NO'
'YES' 'NO' 'YES' 'YES' 'YES' 'YES' 'YES' 'NO' 'NO' 'YES' 'YES' 'YES' 'YES'
'NO' 'YES' 'YES' 'NO' 'YES' 'YES' 'YES' 'NO' 'YES' 'YES' 'YES' 'NO' 'NO' 'NO'
'NO' 'YES' 'NO' 'NO' 'YES' 'YES' 'NO' 'YES' 'YES' 'NO' 'YES' 'YES' 'NO' 'YES'
'YES' 'YES' 'YES' 'NO' 'YES' 'NO' 'YES' 'YES' 'YES' 'YES' 'YES' 'NO' 'YES'
'YES' 'NO' 'YES' 'NO' 'NO' 'NO' 'YES' 'NO' 'NO' 'YES' 'YES' 'YES' 'NO' 'NO'
'YES' 'NO' 'NO' 'YES' 'NO' 'YES' 'YES' 'YES' 'NO' 'NO' 'YES' 'NO' 'YES' 'NO'
'YES' 'NO' 'NO' 'NO' 'NO' 'YES' 'YES' 'YES' 'NO' 'YES' 'NO' 'YES' 'YES' 'NO'
'YES' 'NO' 'NO' 'NO' 'NO' 'YES' 'NO' 'NO' 'NO' 'YES']
print (treeclf.score(vs_test, vs_target_test))
0.49166666666666664
print (treeclf.score(vs_train, vs_target_train))
0.975
print(classification_report(vs_target_test, treepreds_test))
precision recall f1-score support
NO 0.57 0.46 0.51 69
YES 0.42 0.53 0.47 51
accuracy 0.49 120
macro avg 0.50 0.50 0.49 120
weighted avg 0.51 0.49 0.49 120
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
print (treecm)
[[27 24]
[37 32]]
import pylab as plt
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
n_neighbors = 11
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.63 0.62 0.63 69
YES 0.50 0.51 0.50 51
accuracy 0.57 120
macro avg 0.57 0.57 0.57 120
weighted avg 0.58 0.57 0.58 120
Confusion Martix:
[[43 26]
[25 26]]
Score Test
0.575
Score Train
1.0
n_neighbors = 11
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.60 0.70 0.64 69
YES 0.47 0.37 0.42 51
accuracy 0.56 120
macro avg 0.54 0.53 0.53 120
weighted avg 0.55 0.56 0.55 120
Confusion Martix:
[[48 21]
[32 19]]
Score Test
0.5583333333333333
Score Train
0.625
n_neighbors = 8
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.63 0.61 0.62 69
YES 0.49 0.51 0.50 51
accuracy 0.57 120
macro avg 0.56 0.56 0.56 120
weighted avg 0.57 0.57 0.57 120
Score Test
0.5666666666666667
Score Train
1.0
0.5666666666666667
0.975
precision recall f1-score support
NO 0.64 0.57 0.60 69
YES 0.49 0.57 0.53 51
accuracy 0.57 120
macro avg 0.57 0.57 0.56 120
weighted avg 0.58 0.57 0.57 120
n_neighbors = 8
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.57 0.75 0.65 69
YES 0.41 0.24 0.30 51
accuracy 0.53 120
macro avg 0.49 0.49 0.47 120
weighted avg 0.50 0.53 0.50 120
Confusion Martix:
[[52 17]
[39 12]]
Score Test
0.5333333333333333
Score Train
0.65625
n_neighbors =20
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test:")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train:")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.61 0.64 0.62 69
YES 0.48 0.45 0.46 51
accuracy 0.56 120
macro avg 0.55 0.54 0.54 120
weighted avg 0.56 0.56 0.56 120
Confusion Martix:
[[44 25]
[28 23]]
Score Test:
0.5583333333333333
Score Train:
1.0
n_neighbors =20
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test:")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train:")
print (knnclf.score(vs_train_norm, vs_target_train))
precision recall f1-score support
NO 0.56 0.71 0.63 69
YES 0.39 0.25 0.31 51
accuracy 0.52 120
macro avg 0.48 0.48 0.47 120
weighted avg 0.49 0.52 0.49 120
Confusion Martix:
[[49 20]
[38 13]]
Score Test:
0.5166666666666667
Score Train:
0.58125
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=8)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
Score Test: 0.5083333333333333
Score Train: 0.8833333333333333
precision recall f1-score support
NO 0.57 0.58 0.58 69
YES 0.42 0.41 0.42 51
accuracy 0.51 120
macro avg 0.50 0.50 0.50 120
weighted avg 0.51 0.51 0.51 120
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=12)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
Score Test: 0.5416666666666666
Score Train: 0.8458333333333333
precision recall f1-score support
NO 0.59 0.64 0.62 69
YES 0.46 0.41 0.43 51
accuracy 0.54 120
macro avg 0.53 0.52 0.52 120
weighted avg 0.54 0.54 0.54 120
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=18)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
Score Test: 0.5333333333333333
Score Train: 0.7791666666666667
precision recall f1-score support
NO 0.58 0.65 0.62 69
YES 0.44 0.37 0.40 51
accuracy 0.53 120
macro avg 0.51 0.51 0.51 120
weighted avg 0.52 0.53 0.53 120