# Start writing code here...
import numpy as np
import plotly.figure_factory as ff
import pandas as pd
import datetime
Lunes_1= hist_data = np.random.lognormal(mean=5.5, sigma= 0.18, size= 156)
Lunes_2=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Lunes_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Martes_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Martes_2=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Martes_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Miercoles_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Miercoles_2=hist_data = np.random.lognormal(mean=4.5, sigma= 0.18, size= 156)
Miercoles_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Jueves_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Jueves_2=hist_data = np.random.lognormal(mean=4.5, sigma= 0.18, size= 156)
Jueves_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Viernes_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Viernes_2=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Viernes_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Sabado_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Sabado_2=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Sabado_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Domingo_1=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
Domingo_2=hist_data = np.random.lognormal(mean=5.5, sigma= 0.18, size= 156)
Domingo_3=hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 156)
days= ['Lunes', 'Martes', 'Miercoles', 'Jueves', 'Viernes', 'Sábado', 'Domingo']*156
days = pd.DataFrame(pd.Series(days))
morning = np.vstack((Lunes_1, np.vstack((Martes_1, np.vstack((Miercoles_1, np.vstack((Jueves_1, np.vstack((Viernes_1, np.vstack((Sabado_1, Domingo_1 )) )) )) )) )) )).T.reshape((1092,1))
evening = np.vstack((Lunes_2, np.vstack((Martes_2, np.vstack((Miercoles_2, np.vstack((Jueves_2, np.vstack((Viernes_2, np.vstack((Sabado_2, Domingo_2 )) )) )) )) )) )).T.reshape((1092, 1))
night = np.vstack((Lunes_3, np.vstack((Martes_3, np.vstack((Miercoles_3, np.vstack((Jueves_3, np.vstack((Viernes_3, np.vstack((Sabado_3, Domingo_3 )) )) )) )) )) )).T.reshape((1092, 1))
morning.shape
morning_final = pd.concat((pd.DataFrame(morning), days), axis=1)
evening_final = pd.concat((pd.DataFrame(evening), days), axis=1)
night_final = pd.concat((pd.DataFrame(night), days), axis=1)
date=datetime.date.today()
time_delta = datetime.timedelta(hours=35064)
First_Date=date-time_delta
date_time = [First_Date]
aux_time = First_Date
for i in range(1091):
aux_time = aux_time + datetime.timedelta(hours=24)
date_time.append(aux_time)
hist_data = np.random.lognormal(mean=5, sigma= 0.18, size= 50000)
#hist_data = [x for x in hist_data if x <= 200]
group_labels = ['Martes en la noche', 'Lunes en la Mañana', 'Miercoles por la tarde'] # name of the dataset
fig = ff.create_distplot([Martes_3, Lunes_1, Miercoles_2], group_labels)
fig.update_layout(title_text='Demandas para una semana')
fig.show()
# Using plotly.express
import plotly.express as px
df = pd.concat((pd.DataFrame(morning), pd.DataFrame(evening), pd.DataFrame(night), pd.DataFrame(days), pd.DataFrame(date_time)), axis=1)
df.columns=['Morning', 'Evening', 'Night', 'Weekday', 'Date']
df_aux = df.head(150)
fig = px.line(df_aux, x='Date', y=['Morning', 'Evening', 'Night'])
fig.show()
Aplicando Arima
!pip install statsmodels
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot
# split into train and test sets
X = df_morning = df[['Date', 'Morning']]
X['Morning'] = pd.to_numeric(df["Morning"], downcast="float")
X['Date']= pd.to_datetime(X['Date'])
print(X)
X.set_index('Date', inplace=True)
print(X)
X=X.values
print(X.shape)
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
# walk-forward validation
for t in range(len(test)):
model = ARIMA(history, order=(7,1,7))
model_fit = model.fit()
output = model_fit.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
print(len(test))
print(t)
# evaluate forecasts
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
# plot forecasts against actual outcomes
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
# plot forecasts against actual outcomes
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
# plot forecasts against actual outcomes
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()
import statsmodels.api as sm
import itertools
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))