import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
df = pd.read_csv('bank.csv', header =0,delimiter=";")
df.head(10)
df.describe()
print(df.shape)
print(list(df.columns))
import seaborn as sns
sns.pairplot(df[['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y']])
df.columns
df_new = df.drop(df.columns[[0, 3, 7, 8, 9, 10, 11, 12, 13, 14]], axis =1)
df_new
data = pd.get_dummies(df_new, columns=['job','marital','default','housing','poutcome'],drop_first=False)
data.columns[[13,22,24]]
len(data.columns)
data = data.drop(data.columns[[13,22,24]],axis=1)
Y = data.iloc[:,1]
#y = data.iloc[:,1:2]
Y.head()
X = data.drop(data.columns[[1]],axis=1)
X.head()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 0)
classifier = LogisticRegression(solver = "lbfgs", random_state= 0)
classifier.fit(X_train, Y_train)
predicted_y = classifier.predict(X_test)
sum(predicted_y.tolist() == Y_test)/len(Y_test)
classifier.score(X_test, Y_test)
df["y"].value_counts()
print(df["y"].value_counts()[0]/(df["y"].value_counts()[0]+df["y"].value_counts()[1]))