import geopandas as gdp
import pysal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# from sklearn.cluster import DBSCAN
db = pd.read_csv('faults_clean.csv')
red_faults= db.loc[db["LAMP_STATUS"]==1, ['LONGITUDE','LATITUDE','MESSAGE_DATE_TIME','VEHICLE_ID','RED_LAMP_STATUS']]
# Convert a dataframe to a geodataframe
red= geopandas.GeoDataFrame(red_faults,
geometry=geopandas.points_from_xy(red_faults.LONGITUDE,red_faults.LATITUDE),
crs='EPSG:4269')
yellow= db.loc[db["LAMP_STATUS"]==2, ['LONGITUDE','LATITUDE','MESSAGE_DATE_TIME']]
yellow.info()
db_faults=db[['VEHICLE_ID', 'LONGITUDE','LATITUDE','LAMP_STATUS']]
db_city= pd.get_dummies(db_faults, prefix='STATUS', columns=['LAMP_STATUS'])
# Convert a de point dataframe to a geodataframe
xy_faults = geopandas.GeoDataFrame(db_faults,
geometry=geopandas.points_from_xy(db_city.LONGITUDE,db_city.LATITUDE),
crs='EPSG:4269')
xy_faults.head()
# sns.reset_defaults()
#import the shape file of US states into a gdf
us49 = geopandas.read_file('us49/us_49.shp')
fig = plt.figure(figsize=(10, 6))
ax = us49.plot(facecolor='LightGray', edgecolor='k', linewidth=.2)
x, y = red_faults['LONGITUDE'].values, red_faults['LATITUDE'].values
ax.scatter(x,y, color='red', alpha=0.7, s=0.5)
plt.show
import contextily
#convert the data to web mercartor projection
red_wm = red.to_crs(epsg=3857)
ax = red_wm.plot(color='red', markersize=0.5)
contextily.add_basemap(ax);
joint_axes = sns.jointplot(
x='LONGITUDE', y='LATITUDE', data=red_faults, s=0.5
)
contextily.add_basemap(
joint_axes.ax_joint,
crs="EPSG:4326",
source=contextily.providers.CartoDB.PositronNoLabels
);
Creating interactive maps with hvplot
import hvplot.pandas
import geoviews as gv
from cartopy import crs
#convert the data to web mercartor projection
xyfaults_wm = xy_faults.to_crs(epsg=3857)
fig = plt.figure(figsize=(10, 6))
ax = us49.plot(facecolor='LightGray', edgecolor='k', linewidth=.2)
x, y = yellow['LONGITUDE'].values, yellow['LATITUDE'].values
ax.scatter(x,y, color='Gold', alpha=0.7, s=0.5)
plt.show
uscities = geopandas.read_file('cb_2020_us_county_500k/cb_2020_us_county_500k.shp')
notstate=['Hawaii', 'United States Virgin Islands',
'Commonwealth of the Northern Mariana Islands', 'Guam',
'American Samoa', 'Puerto Rico','Alaska']
us_cities= uscities[~uscities.STATE_NAME.isin(notstate)]
us_cities.shape
# GeoPandas Spatial join Points to polygons
sj_faults = geopandas.sjoin(xy_faults,us_cities,how='inner', predicate='intersects',)
#GroupBy to count red alarms per county
pivot_faults=pd.pivot_table(sj_faults,
index='GEOID',
columns='LAMP_STATUS',
aggfunc={'LAMP_STATUS':len})
pivot_faults.columns = pivot_faults.columns.droplevel()
# Merging the data back
county_faults = us_cities.merge(pivot_faults, how='left', on='GEOID')
# fill in missing's with 0, valid in this case.
county_faults.fillna(0, inplace=True)
# Rename the columns just to make it easier to understand
county_faults= county_faults.rename(columns = {0:'Other',1:'Major',
2:'Minor', 3:'Exaust',
4:'Non_electric'})
# Creating a column that sums all the faults
county_faults['SUM_ALARM'] = county_faults[['Other','Major','Minor','Exaust', 'Non_electric']].sum(axis=1)
county_faults.shape
county_faults.plot(
column='SUM_ALARM',
figsize=(16, 9),
scheme='Quantiles',
k=10,
cmap='viridis_r',
legend=False,
)
plt.title('COUNT OF ALARMS IN US COUNTIES');
county_faults.plot(
column='Major',
figsize=(16, 9),
cmap='viridis_r',
legend=True,
)
plt.title('COUNT OF RED FAULT ALARMS IN US COUNTIES');
# plotting a ECDF graph
import numpy as np
x = np.sort(county_faults['SUM_ALARM'])
y = np.arange(1, len(x)+1) / len(x)
_ = plt.plot(x, y, marker='.', linestyle='none')
_ = plt.xlabel('Count of faults per county')
_ = plt.ylabel('ECDF')
plt.margins(0.02) # Keeps data off plot edges
plt.show()
#removing the island from the geodataframe
island= [1277,1460,2330]
county_faults.loc[[1277,1460,2330]]
gdf = county_faults.drop(labels=island, axis=0)
from libpysal.weights.contiguity import Queen
import os
import splot
y = gdf['SUM_ALARM'].values
w = Queen.from_dataframe(gdf)
w.transform = 'r'
y_r = gdf['Major'].values
from splot.libpysal import plot_spatial_weights
plot_spatial_weights(w, gdf);
from esda.moran import Moran
# w = Queen.from_dataframe(gdf)
moran = Moran(y, w)
moran.I
# Now for the REd Alarm faults
moran_r = Moran(y_r, w)
moran_r.I
from splot.esda import plot_moran
#
plot_moran(moran, zstandard=True, figsize=(10,4))
plt.show()
# Calculate if our observed value is statistically significant:
moran.p_sim
from splot.esda import moran_scatterplot
from esda.moran import Moran_Local
# calculate Moran_Local and plot
moran_loc = Moran_Local(y, w)
fig, ax = moran_scatterplot(moran_loc, p=0.05)
ax.set_xlabel('Faults')
ax.set_ylabel('Spatial Lag of faults')
plt.show()
moran_loc.q
# Counting the number of polygons with p value lower than 0.5
(moran_loc.p_sim < 0.05).sum()
# that's the most simple way of visualising, but you can't customize
from splot.esda import lisa_cluster
lisa_cluster(moran_loc, gdf, p=0.05, figsize = (9,9))
plt.show()
#Change the limit of p to 0.1
lisa_cluster(moran_loc, gdf, p=0.10, figsize = (9,9))
plt.show()
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(moran_loc, gdf, 'SUM_ALARM', quadrant=1)
plt.show()