Pre-procesado de datos
#we load the libraries
import numpy as np
import pandas as pd
import tensorflow as tf
df = pd.read_csv('/work/Churn_Modelling.csv')
df.head()
# we define X and y variables
X = df.iloc[:,3:-1].values
y = df.iloc[:,-1].values #columna de 'exited' clients
print(y)
Defining category variables
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2]) #dividimos por categorias binarias 0 = male, 1 = female
df.dtypes.unique()
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [1])], remainder='passthrough') #clase que tiene metodo fit transform para usarlo despues en la columna que vamos a codificar [1]= paises
X = np.array(ct.fit_transform(X)) #aplicamos
Separating data for traning and testing
Train test nos regresa 4 arreglos, le pasamos nuestros datos, el porcentaje que le vamos a dar para test (20% test-80%training), para que lo haga random o no.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) #dividimos datos para entrenamineto y para testeo
Reescalamiento de datos, hay que normalizar los datos para evitar valores extremos que afecten los pesos.
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
Build of neural network
ann = tf.keras.models.Sequential() #inicializando red neuronal
ann.add(tf.keras.layers.Dense(units = 7, activation = 'relu')) #anadiendo capa de entrada y capa escondida
ann.add(tf.keras.layers.Dense(units = 7, activation = 'relu')) #anadiendo segunda capa escondida
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid')) #anadiendo capa de salida
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy']) #croos entropy
ann.fit(X_train, y_train, batch_size=32, epochs=20) #epoch=cada vez que nuestros datos pasan por la red neuronal
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
Evaluation of the model
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(y_test,y_pred) #vemos cual es el porcentaje de precision del modelo
confusion_matrix(y_test, y_pred)