Does temperature affect crimes being committed?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import seaborn as sns
weather = pd.read_csv('san_fran_weather.csv')
weather["DATE"]= pd.to_datetime(weather["DATE"],format="%d/%m/%Y")
#dt.datetime.strptime('2018-05-12','%Y-%m-%d').strftime('%Y-%m-%d')
weather.info()
weather.head()
weather_new=weather[weather['DATE']>='2018']
weather_new.info()
weather_new.tail()
# comment: just curious, why after 2018?
crimes = pd.read_csv('san_fran_crime_18_21.csv')
#crimes["Incident Date"]= pd.to_datetime(crimes["Incident Date"])
crimes.info()
#crimes.sort_values('Incident Date').tail()
#print(crimes[crimes['Incident Date']=='27/9/2021'])
crimes["Incident Date"]= pd.to_datetime(crimes["Incident Date"],format="%d/%m/%Y")
crimes.sort_values('Incident Date').tail()
crimes_grouped=crimes.groupby('Incident Date').size()
crimes_df=pd.DataFrame(crimes_grouped)
crimes_df = crimes_df.rename(columns={0: 'crimes'})
crimes_df.head()
weath_crim = weather_new.merge(crimes_df, left_on='DATE', right_on='Incident Date', how='left')
weath_crim.info()
# comment: well done!
weath_crim.head()
number_of_missing_fin = weath_crim['crimes'].isna().sum()
print(number_of_missing_fin)
# comment : nice check on nan values!
average_temp = weath_crim['TAVG'].mean()
max_temp=weath_crim['TAVG'].max()
average_crime = weath_crim['crimes'].mean()
print('The average # of crimes in SF is ', average_crime)
print('The average temperature in SF is ', average_temp)
print('The max average temp in SF over the 3 years is ', max_temp)
# comment: why not use TAVG as mean temperature instead of TMAX?
# this part may be unnecessary for
sns.regplot(data = weath_crim, x = weath_crim['TAVG'],y = weath_crim['crimes'])
plt.show()
# comment: why use TMAX instead of TAVG hahaha?
# the graph does not show a very strong positive correlation
!pip install statsmodels
import statsmodels.api as sm
# Fit and summarize OLS model
mod = sm.OLS(weath_crim.TMAX, weath_crim.crimes)
res = mod.fit()
print(res.summary(alpha = 0.01))
# Comment: Genereally very well done! :)