Análisis de Tendencias 1
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
dt = pd.read_csv('Analisis1.csv')
dt
data = pd.read_csv('Analisis1.csv')
categoryValues = data['Categoria'].unique()
categories = {}
for categoryValue in categoryValues:
categories[categoryValue] = data.loc[data['Categoria'] == categoryValue]
categories[categoryValue].columns = [column.strip() for column in categories[categoryValue].columns]
categories[categoryValue]['Fecha'] = pd.to_datetime(categories[categoryValue]['Fecha'], format="%d/%m/%Y")
categories[categoryValue]['Fecha ordinal'] = categories[categoryValue]['Fecha'].apply(lambda x: x.toordinal())
categories[categoryValue]['Venta total'] = categories[categoryValue]['Venta total'].apply(lambda x: x.replace('.', ''))
categories[categoryValue]['Venta total'] = categories[categoryValue]['Venta total'].astype(int)
categories[categoryValue].set_index('Fecha', inplace=True)
print(categoryValues)
print(data.columns)
categories['A']
def plot_line(df, category):
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(12, 4))
fig.suptitle(f'Categoria {category}', fontsize=16)
ax1.plot(df.index, df['Venta total'], color="blue", marker='o')
ax1.set_title('Venta total', fontsize=14)
ax1.set_xlabel('Fecha', fontsize=14)
ax1.set_ylabel('Venta total', fontsize=14)
ax1.grid(True)
ax2.plot(df.index, df['Clientes compradores'], color="green", marker='o')
ax2.set_title('Clientes compradores', fontsize=14)
ax2.set_xlabel('Fecha', fontsize=14)
ax2.set_ylabel('Clientes compradores', fontsize=14)
ax2.grid(True)
for category in categoryValues:
plot_line(categories[category], category)
time_series = {}
for category in categoryValues:
time_series[category] = {}
time_series[category]['Venta total'] = seasonal_decompose(categories['A']['Venta total'])
time_series[category]['Clientes compradores'] = seasonal_decompose(categories['A']['Clientes compradores'])
fig, (ax1,ax2,ax3) = plt.subplots(1, 3, figsize=(16,4))
fig.suptitle(f'Categoria {category}: Venta total', fontsize=14)
time_series[category]['Venta total'].trend.plot(ax=ax1)
time_series[category]['Venta total'].seasonal.plot(ax=ax2)
time_series[category]['Venta total'].resid.plot(ax=ax3)
ax1.set_title('Tendencia')
ax2.set_title('Estacionalidad')
ax3.set_title('Ruido')
fig, (ax1,ax2,ax3) = plt.subplots(1,3, figsize=(16,4))
fig.suptitle(f'Categoria {category}: Clientes compradores', fontsize=14)
time_series[category]['Clientes compradores'].trend.plot(ax=ax1)
time_series[category]['Clientes compradores'].seasonal.plot(ax=ax2)
time_series[category]['Clientes compradores'].resid.plot(ax=ax3)
ax1.set_title('Tendencia')
ax2.set_title('Estacionalidad')
ax3.set_title('Ruido')
def adfuller_test(df, keys):
row = []
round_value = 6
for key in keys:
X = df[key].values
result = adfuller(X)
row.append(round(result[0], round_value))
row.append(round(result[1], round_value))
for key, value in result[4].items():
row.append(round(value, round_value))
return row
time_series_keys = ['Venta total', 'Clientes compradores']
adfuller_keys = ['Estadistico ADF', 'Valor p', '1%', '5%', '10%']
adfuller_data = []
for category in categoryValues:
row = adfuller_test(categories[category], time_series_keys)
adfuller_data.append(row)
adfuller_df = pd.DataFrame(adfuller_data,
index=pd.Index(categoryValues, name='Categoría:'),
columns=pd.MultiIndex.from_product([time_series_keys, adfuller_keys], names=['Variable:', 'Dickey-Fuller:']))
adfuller_df.style