import pandas as pd
# to create spatial data
import geopandas as gpd
# for basemaps
import contextily as ctx
# For spatial statistics
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster,plot_moran_simulation
import libpysal as lps
# Graphics
import matplotlib.pyplot as plt
import plotly.express as px
df=pd.read_csv('/work/US_Accidents_Dec20_updated.csv')
df.head()
This chart is empty
Chart was probably not set up properly in the notebook
df.info()
# we will analyze these columns
df=df[['ID','Start_Lat','Start_Lng','City','State','Temperature(F)','Weather_Condition','Sunrise_Sunset']]
df.head()
cities_by_accident = df.City.value_counts()
cities_by_accident
states_by_accident = df.State.value_counts()
states_by_accident
# plot top 20 city and states by accidents
cities_by_accident[:20].plot(kind='bar')
states_by_accident[:20].plot(kind='bar')
US_states=gpd.read_file('/work/us-state-boundaries.geojson')
US_states
US_states.drop(US_states.index[[0,1,18,20,26,36,49]], inplace=True) # drop unnecessary rows
US_states.to_file('/work/US_states.geojson', driver="GeoJSON") # save changed file
US_states=gpd.read_file('/work/US_states.geojson') # read it
US_states.info()
US_states.crs
US_states=US_states.to_crs(epsg=3857)
#plot it
fig, ax=plt.subplots(figsize=(15,15))
US_states.plot(ax=ax,
color='black',
edgecolor='white',
lw=0.5,
alpha=0.4
)
ax.axis('off') #no axis
# add a basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
# convert accidents .csv into geopandas dataframe
from shapely import geometry
gdf=gpd.GeoDataFrame(df,crs='EPSG:4326',
geometry=gpd.points_from_xy(df.Start_Lng, df.Start_Lat))
gdf=gdf.to_crs(epsg=3857)
gdf.crs
minx, miny, maxx, maxy=gdf.geometry.total_bounds
print(minx)
print(miny)
print(maxx)
print(maxy)
#sub plot for multilayer maps
fig,ax=plt.subplots(1,1, figsize=(15,15))
US_states.plot(ax=ax,
color='black',
edgecolor='white',
alpha=0.5
)
gdf.plot(ax=ax,
color='red',
markersize=1,
alpha=0.2)
# use the bounding box coordinates to set the x and y limits
ax.set_xlim(minx-1000,maxx+1000) # added/substracted value is to give some margin around total bounds
ax.set_ylim(miny-1000,maxy+1000)
ax.axis('off')
# add a basemap
# ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
# Do the spatial join
join=gpd.sjoin(gdf,US_states, how='left')
join.head()
accidents_by_states=join.State.value_counts().rename_axis('State').reset_index(name='accidents_count')
accidents_by_states.head()
#bar chart of 20 geographies
accidents_by_states[:20].plot.bar(figsize=(20,4),
x='State',
y='accidents_count')
#rename stusab column name to State
US_states.rename(columns = {'stusab':'State'}, inplace = True)
US_states.head()
## join value counts to the US_states
US_states=US_states.merge(accidents_by_states, on='State')
US_states.head()
US_states.to_file('/work/US_states.geojson', driver="GeoJSON")
US_states=gpd.read_file('/work/US_states.geojson')
US_states.head()
Choropleth map of accidents by states
fig, ax=plt.subplots(figsize=(15,15))
US_states.plot(ax=ax,
column='accidents_count',
legend=True,
alpha=0.8,
cmap='RdYlGn_r',
scheme='quantiles')
ax.axis('off')
ax.set_title('US accidents per state', fontsize=22)
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)