import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("/content/precio-utilidad.txt", sep=",")
df.head()
# Convertimos Consolidado como un indice en el mismo data frame
df.set_index("Consolidado", inplace=True)
# Convertimos objet index to datatime index
df.index = pd.to_datetime(df.index)
# Mostramos la ultimas 10 registros
df.head()
plt.figure(figsize=(14, 8))
plt.title("Multiplo P/U Walmex")
plt.ylabel("Precio/utilidad")
plt.plot(df);
plt.figure(figsize=(14, 8))
train = df[df.index < pd.to_datetime("2020-11-01", format='%Y-%m-%d')]
test = df[df.index > pd.to_datetime("2020-11-01", format='%Y-%m-%d')]
plt.plot(train, color = "black")
plt.plot(test, color = "red")
plt.ylabel('Precio/utilidad')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.title("Train/Test split for BTC Data")
plt.show()
from statsmodels.tsa.statespace.sarimax import SARIMAX
y = train[' Precio-utilidad']
ARMAmodel = SARIMAX(y, order = (1, 0, 1))
ARMAmodel = ARMAmodel.fit()
y_pred = ARMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha = 0.05)
y_pred_df["Predictions"] = ARMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]
plt.plot(train, color = "black")
plt.plot(test, color = "red")
plt.plot(y_pred_out, color='green', label = 'Predictions')
plt.ylabel('Precio/utilidad')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.title("Train/Test split for BTC Data")
plt.legend()
import numpy as np
from sklearn.metrics import mean_squared_error
arma_rmse = np.sqrt(mean_squared_error(test[" Precio-utilidad"].values, y_pred_df["Predictions"]))
print("RMSE: ",arma_rmse)
from statsmodels.tsa.arima.model import ARIMA
ARIMAmodel = ARIMA(y, order = (2, 2, 2))
ARIMAmodel = ARIMAmodel.fit()
y_pred = ARIMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha = 0.05)
y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]
plt.plot(train, color = "black")
plt.plot(test, color = "red")
plt.plot(y_pred_out, color='Yellow', label = 'ARIMA Predictions')
plt.legend()
import numpy as np
from sklearn.metrics import mean_squared_error
arma_rmse = np.sqrt(mean_squared_error(test[" Precio-utilidad"].values, y_pred_df["Predictions"]))
print("RMSE: ",arma_rmse)
# Metricas
from sklearn.metrics import mean_absolute_error
def mean_absolute_percentage_error(y_true, y_predict):
return np.mean(np.abs(y_true - y_predict / y_true)) * 100
# Media movil
def moving_average(series, n):
"""Calcula la media móvil de una serie de tiempo
Arguments:
-----
series -- np.array con los datos de la serie
n -- el retraso (lag) de la ventana para calcular la media movil
Returns
------:
Un escalar con la media para la ventana indicada.
"""
return np.average(series[-n:])
moving_average(df, 24)
def plot_MA(series, window, plot_intervals=False, scale=1.96,
plot_anormalies=False):
"""
plot_intervals - para graficar el error
plot_anormalies - nos ayuda a hacer zoom, ver la anomlias
"""
rolling_mean = series.rolling(window=window).mean() # pandas
plt.figure(figsize=(15,5), )
plt.plot(rolling_mean, 'g', label='Tendencia')
if plot_intervals:
mae = mean_absolute_error(series[window:], rolling_mean[window:])
dev = np.std(series[window:]-rolling_mean[window:])
upper_ =rolling_mean + (mae + scale*dev)
lower_ =rolling_mean - (mae + scale*dev)
plt.plot(upper_, 'r--', label='Límites superior/inferior')
plt.plot(lower_, 'r--')
if plot_anormalies:
anomalies = pd.DataFrame(index=series.index, columns=series.columns)
anomalies[series < lower_] = series[series < lower_]
anomalies[series > upper_] = series[series > upper_]
plt.plot(anomalies, 'ro', markersize=10)
plt.plot(series[window:], label='Valores reales')
plt.legend(loc='upper left')
plt.grid(True)
plot_MA(df, 4, plot_intervals=True)