import numpy as np # Librería para operaciones matemáticas con arrays
import pandas as pd # Librería para manipulación de datos en formato tabla
import matplotlib.pyplot as plt # Librería para visualización de gráficos
import seaborn as sns # Librería para visualización de datos estadísticos
from sklearn.datasets import load_digits # Conjunto de datos de dígitos escritos a mano
from sklearn.linear_model import LogisticRegression # Modelo de regresión logística
# Cargamos el conjunto de datos digits
digits = load_digits()
# Accedemos a la primera fila de datos que representa la primera imagen en formato vectorial
digits.data[0]
# Transforma el vector de la primera imagen en una matriz de 8x8 píxeles
image = np.reshape(digits.data[8],(8,8))
image
# Vemos la representación del a primera imagen
plt.imshow(image,cmap='gray')
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=0)
print(f"""X_train: {X_train.shape}
y_train: {y_train.shape}
X_test: {X_test.shape}
y_test: {y_test.shape}""")
# Instanciamos y ajustamos el modelo de regresión logística
logistic_reg = LogisticRegression()
logistic_reg.fit(X_train, y_train)
pred = logistic_reg.predict(X_test)
# ignorar las advertencias de FutureWarning.
import warnings; warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import accuracy_score
plot_confusion_matrix(logistic_reg,X_test, y_test);
print(accuracy_score(y_test,pred)*100)
from PIL import Image # Importamos la librería PIL para trabajar con imágenes
import numpy as np # Importamos numpy para manejar los datos de las imágenes
def classify(img_path):
image_raw = Image.open(img_path).convert('L') # Abrimos la imagen y la convertimos a escala de grises
image_procesed = image_raw.resize((8,8)) # Redimensionamos la imagen a una de 8x8 pixeles
image_procesed = [np.array(image_procesed).flatten()] # Aplanamos la imagen y la convertimos a un arreglo numpy
prediction = logistic_reg.predict(image_procesed)[0] # Predecimos con el modelo previamente entrenado
return image_raw, prediction
import matplotlib.pyplot as plt
import os
path = './handmade_digits'
files = os.listdir(path)
fig, ax = plt.subplots(2, 5, figsize=(12,6))
for i in range(2):
for j in range(5):
img,pred, = classify(f'./handmade_digits/{files[i*5+j]}')
ax[i, j].imshow(img, cmap='gray')
ax[i, j].axis('off')
ax[i, j].set_title(f'\n The image has a {pred}')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', context='notebook')
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()
df.info()
df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
df.info()
print(f'''
{'='*52}
DATOS FALTANTES EN TODAS LAS COLUMNAS.
{'='*52}
{df.isnull().sum()}
{'='*52}
PROPORCIÓN DE DATOS FALTANTES EN TOTALCHARGES: {((df.isnull().sum()['TotalCharges']/df.count()['TotalCharges'])*100).round(3)}%
{'='*52}''')
df.dropna(inplace=True)
df.isna().sum()
df.drop('customerID',axis=1, inplace=True)
df.head()
df.Churn.replace(to_replace=['Yes','No'], value=[1,0], inplace=True)
df_processing = df.copy()
df_processing = pd.get_dummies(df_processing)
df_processing.head()
plt.figure(figsize=(9,12))
corr = df_processing.corr()['Churn'].sort_values(ascending=True).reset_index()[0:45]
sns.barplot(data=corr, x='Churn',y='index')
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_processing_scaled = scaler.fit_transform(df_processing)
df_processing_scaled
df_processing_scaled = pd.DataFrame(df_processing_scaled)
df_processing_scaled.columns = df_processing.columns
df_processing_scaled
columns_cat = df.select_dtypes(include='object').columns
# Crear la figura y los subplots
fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(23, 23))
for var, ax in zip(columns_cat, axs.flatten()):
sns.countplot(data=df, x=var,hue='Churn', ax=ax)
sns.pairplot(data=df, hue='Churn')
X = df_processing_scaled.drop('Churn', axis=1)
y = df_processing_scaled['Churn'].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.3, random_state=42)
print(f"""X_train:{X_train.shape}
X_test:{X_test.shape}
y_train:{y_train.shape}
y_test:{y_test.shape}""")
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
result = model.fit(X_train,y_train)
from sklearn import metrics
pred_test = model.predict(X_test)
print(metrics.accuracy_score(y_test, pred_test))
weights = pd.Series(model.coef_[0], index=X.columns.values)
fig,axs = plt.subplots(1,2,figsize=(16,4))
weights.sort_values(ascending=False).head(10).plot(kind='bar',ax=axs[0],title='Los 10 factores principales que influyen en el abandono de clientes')
weights.sort_values(ascending=False).tail(10).plot(kind='bar',ax=axs[1],title='Los 10 factores principales que retienen a los clientes')
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_test,pred_test,labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot();
# import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
sns.set(style='whitegrid', context='notebook')
df = pd.read_csv('Dry_Bean.csv')
df
df.Class.unique()
df.describe().T
df.drop_duplicates(inplace=True)
print(
df.shape,
df.isnull().sum(),
sep='\n\n')
plt.figure(figsize=(7,7))
labels, counts = np.unique(df.Class, return_counts=True)
plt.pie(counts, autopct='%1.1f%%',labels=labels)
plt.title('Proporcion de clases de frijoles')
plt.show()
print(df.Class.value_counts())
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(random_state=42)
X = df.drop('Class',axis=1)
y = df.Class
X_over, y_over = undersample.fit_resample(X,y)
plt.figure(figsize=(7,7))
labels, counts = np.unique(y_over, return_counts=True)
plt.pie(counts, autopct='%1.1f%%',labels=labels)
plt.title('Proporcion de clases de frijoles')
plt.show()
print(y_over.value_counts())
df.shape
X_over.shape
list(np.unique(y_over))
y_over.replace(
['BARBUNYA', 'BOMBAY', 'CALI', 'DERMASON', 'HOROZ', 'SEKER', 'SIRA'],
[1,2,3,4,5,6,7],
inplace=True)
list(np.unique(y_over))
df_dea = X_over.copy(deep=True)
df_dea['Class'] = y_over
plt.figure(figsize=(15,15))
sns.heatmap(df_dea.corr(),annot=True,vmax=1,vmin=-1);
X_over.drop(['ConvexArea','EquivDiameter'], inplace=True, axis=1)
X_over
sns.pairplot(df_dea, hue='Class')
X_train, X_test, y_train, y_test = train_test_split(X_over,y_over, random_state=42,shuffle=True,test_size=.20)
st_x = StandardScaler()
X_train = st_x.fit_transform(X_train)
X_test = st_x.transform(X_test)
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')
parameters = {
'solver':['lbfgs', 'sag', 'saga', 'newton-cg','liblinear'],
'multi_class':['ovr','multinomial'],
}
model = LogisticRegression()
clf = GridSearchCV(model,
param_grid=parameters,
scoring='accuracy',
cv=10)
clf.fit(X_train,y_train)
print('Mejores hiperparametros:', clf.best_params_)
print('Mejores accuracy:', clf.best_score_)
pred = clf.predict(X_test)
print(confusion_matrix(y_test, pred))
print(accuracy_score(y_test, pred))
cm = confusion_matrix(y_test, pred, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot(cmap='gray');