import numpy as np
import pandas as pd
df = pd.read_csv("C:/Users/Umaima/Untitled Folder/bank_data.csv", index_col=0, na_values=["?"])
df.shape
vs = df.reindex(np.random.permutation(df.index))
vs.head(10)
vs_names = vs.columns.values
vs_names
vs_target = vs.pep
df.dtypes
vs = pd.get_dummies(df) #generating dummy data
vs.head(10)
from sklearn.model_selection import train_test_split
vs_train, vs_test, vs_target_train, vs_target_test = train_test_split(vs, vs_target, test_size=0.2, random_state=33)
print (vs_test.shape)
vs_test[0:5]
print (vs_train.shape)
vs_train[0:5]
np.set_printoptions(suppress=True, linewidth=120)
vs_train.head(10)
vs_test
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler().fit(vs_train)
vs_train_norm = min_max_scaler.transform(vs_train)
vs_test_norm = min_max_scaler.transform(vs_test) #preprocessing
np.set_printoptions(precision=2, linewidth=80, suppress=True) #setting precision to 2
vs_train_norm[0:5]
vs_test_norm[0:5]
from sklearn import neighbors, tree, naive_bayes
n_neighbors = 5 #checking for k 5 intially
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print (knnpreds_test)
from sklearn.metrics import classification_report
print(classification_report(vs_target_test, knnpreds_test))
from sklearn.metrics import confusion_matrix
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print ("KNN confusion Matrix: ")
print (knncm)
print ("KNN Clasification Score test norm: "+str(knnclf.score(vs_test_norm, vs_target_test)))
print ("KNN Clasification Score train norm: "+str(knnclf.score(vs_train_norm, vs_target_train)))
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=3)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print (treepreds_test)
print (treeclf.score(vs_test, vs_target_test))
print (treeclf.score(vs_train, vs_target_train))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
print (treecm)
import pylab as plt
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
n_neighbors = 11
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
n_neighbors = 11
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
n_neighbors = 8
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
n_neighbors = 8
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train")
print (knnclf.score(vs_train_norm, vs_target_train))
n_neighbors =20
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test:")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train:")
print (knnclf.score(vs_train_norm, vs_target_train))
n_neighbors =20
knnclf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
knnclf.fit(vs_train_norm, vs_target_train)
knnpreds_test = knnclf.predict(vs_test_norm)
print(classification_report(vs_target_test, knnpreds_test))
knncm = confusion_matrix(vs_target_test, knnpreds_test)
print("Confusion Martix:")
print(knncm)
print("Score Test:")
print (knnclf.score(vs_test_norm, vs_target_test))
print("Score Train:")
print (knnclf.score(vs_train_norm, vs_target_train))
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=8)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=12)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
treeclf = tree.DecisionTreeClassifier(criterion='entropy', min_samples_split=18)
treeclf = treeclf.fit(vs_train, vs_target_train)
treepreds_test = treeclf.predict(vs_test)
print ("Score Test: "+str(treeclf.score(vs_test, vs_target_test)))
print ("Score Train: "+str(treeclf.score(vs_train, vs_target_train)))
print(classification_report(vs_target_test, treepreds_test))
treecm = confusion_matrix(vs_target_test, treepreds_test, labels=['YES','NO'])
%matplotlib inline
plt.matshow(treecm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()