Pre-Requisites to Understanding this Article
The Standard Operating Workflow as a Data Analytics Professional ⚙️
What is an Integration? 🤷
import time
import pandas as pd
start_time = time.time()
print("Reading Data Via Pandas")
citibike_df = pd.read_csv("/work/201306-citibike-tripdata.csv")
end_time = time.time()
time_diff = end_time - start_time
print("Time to Load Data is: " + str(time_diff))
print(len(citibike_df))
#!pip install dask
import dask.dataframe as dd
start_time = time.time()
print("Reading Data Via Dask")
dask_df = dd.read_csv('/work/201306-citibike-tripdata.csv')
end_time = time.time()
time_diff = end_time - start_time
print("Time to Load Data is: " + str(time_diff) + " seconds")
print(len(dask_df))
#!pip install dask
import time as time
import dask.dataframe as dd
start_time = time.time()
print("Reading Data Via Dask")
dask_df = dd.read_csv('/work/201306-citibike-tripdata.csv')
end_time = time.time()
time_diff = end_time - start_time
print("Time to Load Data is: " + str(time_diff) + " seconds")
print(len(dask_df))
dask_df.memory_usage()
Visualising your SQL on-the-go with SQL Cells, Plotly and Viz Cells 📈
SQL Cells on-the-go 🏃
SELECT COUNT(*) as "Number_Of_Trips",
"usertype" as "Customer",
HOUR(starttime) as "Hour"
FROM '/work/201306-citibike-tripdata.csv'
GROUP BY "Hour", "Customer"
ORDER BY "Customer","Number_Of_Trips"
SELECT COUNT(*) as "Number_Of_Trips",
"usertype" as "Customer",
DAYNAME(starttime) as "Day"
FROM '/work/201306-citibike-tripdata.csv'
GROUP BY "Day", "Customer"
ORDER BY "Customer","Number_Of_Trips"