import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
import datetime
data = pd.read_csv("data/2014-08 - Citi Bike trip data.csv")
frames = []
for files in glob.iglob('data/*.csv'):
data = pd.read_csv(files)
data["starttime"] = pd.to_datetime(data["starttime"])
data["hour"] = data["starttime"].apply(lambda x: x.hour)
data["day"] = data["starttime"].apply(lambda x: x.day)
data["month"] = data["starttime"].apply(lambda x: x.month)
data_grouped = data.groupby(["month", "day", "hour"])["starttime"].count().reset_index()
frames.append(data_grouped)
correct_frame = pd.concat(frames, ignore_index=True)
print(len(correct_frame))
print(f"Number of samples: {len(data)}")
print(f"Number of variables: {len(data.columns)}")
data.head()
correct_frame["date"] = correct_frame.apply(lambda x: datetime.datetime(2014,x["month"], x["day"], x["hour"]),axis=1)
correct_frame = correct_frame.set_index("date")
plt.figure(figsize= (10, 6))
plt.plot(correct_frame["starttime"])
plt.title("Hourly trips for 2014")
plt.ylabel("Number of trips")
plt.xlabel("Date")
december = correct_frame[correct_frame.month==12]
june = correct_frame[correct_frame.month==6]
plt.figure(figsize= (10, 6))
plt.subplot(211)
plt.plot(december["starttime"])
plt.xticks(rotation=70)
plt.title("Hourly bike trips in december 2014")
plt.xlabel("Date")
plt.ylabel("Bike trips")
plt.subplot(212)
plt.plot(june["starttime"])
plt.xticks(rotation=70)
plt.title("Hourly bike trips in June 2014")
plt.xlabel("Date")
plt.ylabel("Bike trips")
plt.tight_layout()
plt.figure(figsize= (10, 6))
plt.plot(june["starttime"].iloc[0:7*24])
plt.title("Hourly bike trips for first week of june 2014")
plt.xlabel("Date")
plt.ylabel("Bike trips")
plt.figure(figsize= (10, 6))
plt.subplot(211)
plt.plot(june["hour"].iloc[0:24],june["starttime"].iloc[0:24])
plt.title("Hourly bike trips on 01.06.2014 (Weekend)")
plt.xlabel("Hour")
plt.ylabel("Bike trips")
plt.xlim([0, 23])
plt.subplot(212)
plt.plot(june["hour"].iloc[24:48],june["starttime"].iloc[24:48])
plt.title("Hourly bike trips on 02.06.2014 (Weekday)")
plt.xlabel("Hour")
plt.ylabel("Bike trips")
plt.xlim([0, 23])
plt.tight_layout()
christmas = december[december.day==25]
christmas["starttime"].plot()