Dispersión y Correlación

# Tratamiento de datos # ============================================================================== import pandas as pd import numpy as np # Gráficos # ============================================================================== import matplotlib.pyplot as plt from matplotlib import style import seaborn as sns # Preprocesado y modelado # ============================================================================== from scipy.stats import pearsonr from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error import statsmodels.api as sm import statsmodels.formula.api as smf # Configuración matplotlib # ============================================================================== plt.rcParams['image.cmap'] = "bwr" #plt.rcParams['figure.dpi'] = "100" plt.rcParams['savefig.bbox'] = "tight" style.use('ggplot') or plt.style.use('ggplot') # Configuración warnings # ============================================================================== import warnings warnings.filterwarnings('ignore') iris = sns.load_dataset('iris') iris

Vamos a realizar el gráfico de dispersión entre sépalo y pétalo de la base de datos

fig, ax = plt.subplots(1, 1, figsize=(6,4)) ax.set_title('Correlación de longitud de pétalos y Sépalos') ax.scatter(x=iris.sepal_length, y=iris.petal_length, alpha= 0.8) ax.set_xlabel('Sépalo (l)') ax.set_ylabel('Pétalo (l)') plt.tight_layout();

Calculamos el coeficiente de correlación de Pearson

print('Correlación Pearson: ', iris['sepal_length'].corr(iris['petal_length'], method='pearson'))

# División de los datos en train y test # ============================================================================== X = iris[['sepal_length']] y = iris['petal_length'] X_train, X_test, y_train, y_test = train_test_split( X.values.reshape(-1,1), y.values.reshape(-1,1), train_size = 0.8, random_state = 1234, shuffle = True ) # Creación del modelo # ============================================================================== modelo = LinearRegression() modelo.fit(X = X_train.reshape(-1, 1), y = y_train) # Información del modelo # ============================================================================== print("Intercepto (b): ", modelo.intercept_) print("Pendiente (m): ", list(zip(X.columns, modelo.coef_.flatten(), ))) print("Coeficiente de determinación R^2:", modelo.score(X, y))

plt.style.use('seaborn-v0_8-ticks') fig = plt.figure() #plt.title('Apr') plt.scatter(x=iris.sepal_length, y=iris.petal_length) plt.plot(X, modelo.coef_.flatten()*X + modelo.intercept_, color='blue' ) plt.grid() fig.show()

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Dispersión y Correlación

Dispersión y Correlación