import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from datetime import datetime
df_bicis = pd.read_csv("./Data/bikes.csv")
print(df_bicis.shape)
df_bicis.head(8)
df_bicis.describe()
import seaborn as sns
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["bikes"], axes = axes, kde = True, stat='density', bins=20)
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["isHoliday"], axes = axes, kde = True, stat='density', bins=20)
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["year"], axes = axes, kde = True, stat='density', bins=5)
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["day"], axes = axes, kde = True, stat='density', bins=31)
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["month"], axes = axes, kde = True, stat='density', bins=12)
figura = plt.figure()
axes = figura.add_subplot()
sns.histplot(x = df_bicis["precipitation.l.m2"], axes = axes, kde = True, stat='count')
def crear_target():
df_bicis['bikes3h'] = df_bicis["bikes"].mean()
df_bicis['bikes3h'] = df_bicis["bikes"].shift(-3)
crear_target()
df_bicis["bikes"]
target = df_bicis["bikes3h"]
target.head()
df_bicis_bruto = df_bicis.copy()
df_bicis_bruto.isnull().sum()
df_corr = df_bicis_bruto[["windMaxSpeed.m.s","windMeanSpeed.m.s"]]
correlacion = df_corr.corr()
sn.heatmap(correlacion, annot=True)
df_bicis_bruto.drop(columns=["windDirection.grades", "relHumidity.HR", "airPressure.mb","windMaxSpeed.m.s","windMeanSpeed.m.s","temperature.C","precipitation.l.m2"], axis=1, inplace=True)
print(df_bicis_bruto.shape)
df_bicis_bruto.head()
df_bicis_bruto.isnull().sum()
# viento_media= df_bicis_bruto["windMeanSpeed.m.s"].mean()
# temp_media= df_bicis_bruto["temperature.C"].mean()
# prec_media= df_bicis_bruto["precipitation.l.m2"].mean()
# print(viento_media)
# print(temp_media)
# print(prec_media)
# df_bicis_bruto["windMeanSpeed.m.s"][df_bicis_bruto["windMeanSpeed.m.s"].isnull() == True] = viento_media
# df_bicis_bruto
# df_bicis_bruto["windMeanSpeed.m.s"][df_bicis_bruto["windMeanSpeed.m.s"].isnull() == True]
# df_bicis_bruto["temperature.C"][df_bicis_bruto["temperature.C"].isnull() == True] = temp_media
# df_bicis_bruto[df_bicis_bruto["temperature.C"].isnull() == True]
# df_bicis_bruto["precipitation.l.m2"][df_bicis_bruto["precipitation.l.m2"].isnull() == True] = prec_media
# df_bicis_bruto[df_bicis_bruto["precipitation.l.m2"].isnull() == True]
df_bicis_bruto.isnull().sum()
df_bicis_bruto["bikes3h"][df_bicis_bruto["bikes3h"].isnull() == True] = df_bicis_bruto["bikes"].mean()
df_bicis_bruto["bikes"][df_bicis_bruto["bikes"].isnull() == True] = df_bicis_bruto["bikes"].mean()
df_bicis_bruto.shape
df_bicis_bruto.drop(columns="station",axis=1, inplace=True )
df_bicis_bruto.shape
df_bicis_bruto.drop(columns=["latitude","longitude"],axis=1, inplace=True )
print(df_bicis_bruto.shape)
df_bicis_bruto
df_bicis_bruto[df_bicis_bruto["numDocks"] != 20]
df_bicis_bruto.drop(columns=["numDocks"],axis=1, inplace=True )
print(df_bicis_bruto.shape)
df_bicis_bruto
df_bicis_bruto.drop(columns=["timestamp"],axis=1, inplace=True )
print(df_bicis_bruto.shape)
df_bicis_bruto
df_bicis_bruto.drop(columns=["weekhour"],axis=1, inplace=True )
print(df_bicis_bruto.shape)
df_bicis_bruto
df_bicis_bruto.to_csv("./Data/Bikes_no_nulos.csv",index=False)
df_bicis_bruto = pd.read_csv("./Data/Bikes_no_nulos.csv")
df_bicis_bruto.info()
df_bicis_bruto[["year","month","day","hour","isHoliday","bikes","bikes3h"]] = df_bicis_bruto[["year","month","day","hour","isHoliday","bikes","bikes3h"]].astype("int32")
df_bicis_bruto.info()
df_bicis_bruto.dtypes
from sklearn.preprocessing import OrdinalEncoder
df_bicis_bruto["weekday"].value_counts()
weekorder = ["Monday", "Tuesday", "Wednesday","Thursday", "Friday", "Saturday", "Sunday"]
enc_ord = OrdinalEncoder(categories = [weekorder])
df_bicis_bruto["weekday"] = enc_ord.fit_transform(df_bicis_bruto[["weekday"]]).astype("int32")[:]
enc_ord.categories_
df_bicis_bruto.head(100)
df_bicis_bruto.drop(columns=["weekday"],axis=1, inplace=True)
df_bicis_bruto.to_csv("./Data/Bikes_limpios.csv",index=False)
df_bicis_bruto = pd.read_csv("./Data/Bikes_limpios.csv")
df_bicis_bruto["bikes3h"].to_csv("./Data/target.csv", index=False)
df_bicis_bruto.to_csv("./Data/Bikes_limpios_con_target.csv",index=False)
df_bicis_bruto.drop(columns=["bikes3h"], axis=1, inplace=True)
df_bicis_bruto.to_csv("./Data/Bikes_limpios_sin_target.csv",index=False)
print(df_bicis_bruto.shape)
df_bicis_bruto.head()
df_bicis = pd.read_csv("./Data/Bikes_limpios_sin_target.csv")
target = pd.read_csv("./Data/target.csv")
df_bicis
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_bicis, target, test_size=0.3)
print(X_train.shape)
print(X_test.shape)
X_train
"""max = X_train['year'].max()
min = X_train['year'].min()
normalized_year=(X_train['year']-min)/(max-min)
X_train["year"] = normalized_year
X_train
max = X_train['day'].max()
min = X_train['day'].min()
normalized_day=(X_train['day']-min)/(max-min)
X_train["day"] = normalized_day
X_train
max = X_train['month'].max()
min = X_train['month'].min()
normalized_month=(X_train['month']-min)/(max-min)
print(normalized_month)
X_train["month"] = normalized_month
max = X_train['hour'].max()
min = X_train['hour'].min()
normalized_hour=(X_train['hour']-min)/(max-min)
print(normalized_hour)
X_train["hour"] = normalized_hour
max = X_train['weekday'].max()
min = X_train['weekday'].min()
normalized_weekday=(X_train['weekday']-min)/(max-min)
X_train["weekday"] = normalized_weekday
max = X_train['bikes'].max()
# min = X_train['bikes'].min()
normalized_bikes=(X_train['bikes'])/(max)
X_train["bikes"] = normalized_bikes
"""
X_train
!pip install tensorflow
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense
import numpy as np
from keras.models import Sequential
def build_model(capas,epocas,X,y):
np.random.seed(5)
tf.random.set_seed(5)
model=Sequential()
for key,capa in enumerate(capas):
if key == 0:
model.add(Dense(capa, activation='relu',input_dim=6))
elif key == len(capas) - 1:
model.add(Dense(capa, activation='linear'))
else:
model.add(Dense(capa, activation='relu'))
model.compile(loss='mean_squared_error')
history = model.fit(X, y,epochs=epocas,verbose=False )
return model,history
capas_red1 = [8, 16, 8, 1]
capas_red2 = [8, 16, 8, 4, 1]
capas_red3 = [16, 32, 16, 8, 1]
capas_red4 = [32, 64, 32, 8, 1]
capas_red5 = [64, 128, 64, 8, 1]
capas_red6 = [8, 16, 32, 64, 32, 16, 8, 1]
epocas = 50
modelo1,history1 = build_model(capas_red1,epocas,X_train,y_train)
modelo2,history2 = build_model(capas_red2,epocas,X_train,y_train)
modelo3,history3 = build_model(capas_red3,epocas,X_train,y_train)
modelo4,history4 = build_model(capas_red4,epocas,X_train,y_train)
modelo5,history5 = build_model(capas_red5,epocas,X_train,y_train)
modelo6,history6 = build_model(capas_red6,epocas,X_train,y_train)
def plot_metrics(axes,history,title):
x = np.linspace(0,len(history.history["loss"]),len(history.history["loss"]))
axes.set_ylim(0,40)
axes.plot(x,history.history["loss"], color="blue", label="loss")
axes.set_title(str(title) + ":" + str(round(histories[index].history['loss'][-1],2)))
axes.set_xlabel("Épocas")
axes.set_ylabel("loss")
axes.legend()
axes.set_facecolor("lightblue")
figura = plt.figure(figsize=(15,8))
axes = figura.add_subplot(2,3,1)
axes2 = figura.add_subplot(2,3,2)
axes3 = figura.add_subplot(2,3,3)
axes4 = figura.add_subplot(2,3,4)
axes5 = figura.add_subplot(2,3,5)
axes6 = figura.add_subplot(2,3,6)
axeses = [axes,axes2,axes3,axes4,axes5,axes6]
capas = [capas_red1,capas_red2,capas_red3,capas_red4,capas_red5,capas_red6]
histories = [history1,history2,history3,history4,history5,history6]
rank = 100
for index,best in enumerate(histories):
if best.history['loss'][-1] < rank:
rank = best.history['loss'][-1]
best_model = index + 1
print("El mejor modelo es el número " + str(best_model))
for index,ax in enumerate(axeses):
plot_metrics(ax,histories[index],capas[index])
figura.tight_layout()
def day_predict(dia,mes,anyo, datos):
resultado = datos[(datos.day == dia) & (datos.month == mes) & (datos.year == anyo)]
prediccion = modelo3.predict([[resultado]])
resultado["prediction"] = prediccion
resultado["prediction"] = round(resultado["prediction"],0)
union = resultado[['hour','bikes','prediction']]
return union
# Cargamos el Dataset sin el target
df_bicis_limpio = pd.read_csv("./Data/Bikes_limpios_sin_target.csv")
# Cogemos los datos de la fecha indicada y los pasamos a int32. Ya que al volverlos a cargar con Pandas los tipos se reestablecen.
df_bicis_limpio[["year","month", "day"]] = df_bicis_limpio[["year","month", "day"]].astype("int32")
d_predict = 19
month_predict = 6
year_predict = 2012
res = day_predict(d_predict,month_predict,year_predict,df_bicis_limpio)
print(f'El tamaño del resultado es:{res.shape[0]} filas y {res.shape[1]} columnas.')
res
figura = plt.figure(figsize=(8,8))
axes = figura.add_subplot()
axes.set_xlabel("Horas del día")
axes.set_ylabel("Número de bicicletas")
hours = [x for x in range(0,24)]
bikes = [x for x in range(0,21)]
axes.xaxis.set_ticks(hours)
axes.yaxis.set_ticks(bikes)
axes.set_ylim(0,20)
axes.set_title("Predicción vs real para el día " + str(d_predict) + "/" + str(month_predict) + "/" + str(year_predict) )
axes.plot(res["hour"],res['bikes'], label="real", color="blue")
axes.plot(res["hour"],res['prediction'], label="predicción", color="green")
axes.set_facecolor("lightblue")
axes.legend()