Churn Project
Daniele Vecchiarelli - Júlia Révai - Antonio Gerardo Lapenna
import pandas as pd
import numpy as np
dataset = pd.read_csv('./churn.csv') # Local File
dataset = dataset[dataset.iloc[:,0:18]!= 'Unknown'] #drop all rows where status is unknown
dataset = dataset.dropna(axis=0, how='any') #drop na rows
# set x and y values
X = dataset.iloc[:, 3:18].values
y = dataset.iloc[:, 2].values
Data Processing
# Encoding categorical data
from sklearn import *
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import ConfusionMatrixDisplay
labelencoder = LabelEncoder()
for i in range(1,7):
if i != 2:
X[:, i] = labelencoder.fit_transform(X[:, i])
# Y encoding 1 = Existing Customer / 0 = Attrited Customer
y[:] = labelencoder.fit_transform(y[:])
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([("X", OneHotEncoder(dtype = int), [1])], remainder = 'passthrough')
X = ct.fit_transform(X)
X = X[:, 1:]
print(X)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
ANN Creation / Training
# Importing the Keras libraries and packages
from tensorflow.keras import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.callbacks import Callback
import tensorflow as tf
# Initialising the ANN
classifier = Sequential(
[
layers.Dense(24, kernel_initializer = 'uniform', activation = 'relu'),
layers.Dense(24, kernel_initializer = 'uniform', activation = 'relu'),
layers.Dense(24, kernel_initializer = 'uniform', activation = 'relu'),
layers.Dense(12, kernel_initializer = 'uniform', activation = 'relu'),
layers.Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid')
]
)
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Testing
X_t = np.asarray(X_train).astype('float32').reshape(-1,15)
Y_t = np.asarray(y_train).astype('float32').reshape(-1,1)
class Callback(tf.keras.callbacks.Callback):
def __init__(self):
self.data = []
def on_epoch_end(self,epoch,logs):
self.data.append(logs)
def data(self):
return self.data
batch_size = 32
callback = Callback()
data = classifier.fit(
X_t,
Y_t,
batch_size = batch_size, # Default
epochs = int(len(X_t) / batch_size),
callbacks=[callback],
verbose=1
)
#print(data.params)
Results
# Part 3 - Making the predictions and evaluating the model
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
y_pred2 = [int(i) for i in y_pred]
y_test2 = list(y_test)
# Confusion Matrix Plot
ConfusionMatrixDisplay.from_predictions(y_test2, y_pred2)
# Computing Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score, precision_score, recall_score
print("Accuracy :",accuracy_score(y_test2,y_pred2))
print("Precision :",precision_score(y_test2,y_pred2))
print("Recall :",recall_score(y_test2,y_pred2))
Graph Results
import matplotlib.pyplot as plt
x = []
y = []
for i in callback.data:
x.append(i["loss"])
y.append(i["accuracy"])
fig, ax = plt.subplots(constrained_layout=True)
pl1 = ax.plot(range(0,len(y)) , y,label='Accuracy')
ax.set_xlabel('Epoch')
ax2 = ax.twinx()
pl2 = ax2.plot(range(0,len(x)) , x , color='#ff6f00',label='Loss Function')
lns = pl1+pl2
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='center right')
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()