import geopandas as gdp
import pysal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# from sklearn.cluster import DBSCAN
db = pd.read_csv('faults_clean.csv')
/var/folders/gc/gy5k76fn1cn487595g52tlzr0000gn/T/ipykernel_99685/3551357135.py:1: DtypeWarning: Columns (12,13) have mixed types. Specify dtype option on import or set low_memory=False.
db = pd.read_csv('faults_clean.csv')
red_faults= db.loc[db["LAMP_STATUS"]==1, ['LONGITUDE','LATITUDE','MESSAGE_DATE_TIME','VEHICLE_ID','RED_LAMP_STATUS']]
# Convert a dataframe to a geodataframe
red= geopandas.GeoDataFrame(red_faults,
geometry=geopandas.points_from_xy(red_faults.LONGITUDE,red_faults.LATITUDE),
crs='EPSG:4269')
yellow= db.loc[db["LAMP_STATUS"]==2, ['LONGITUDE','LATITUDE','MESSAGE_DATE_TIME']]
yellow.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 577862 entries, 3 to 3806561
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 LONGITUDE 577862 non-null float64
1 LATITUDE 577862 non-null float64
2 MESSAGE_DATE_TIME 577862 non-null object
dtypes: float64(2), object(1)
memory usage: 17.6+ MB
db_faults=db[['VEHICLE_ID', 'LONGITUDE','LATITUDE','LAMP_STATUS']]
db_city= pd.get_dummies(db_faults, prefix='STATUS', columns=['LAMP_STATUS'])
# Convert a de point dataframe to a geodataframe
xy_faults = geopandas.GeoDataFrame(db_faults,
geometry=geopandas.points_from_xy(db_city.LONGITUDE,db_city.LATITUDE),
crs='EPSG:4269')
xy_faults.head()
# sns.reset_defaults()
#import the shape file of US states into a gdf
us49 = geopandas.read_file('us49/us_49.shp')
Execution error
NameError: name 'geopandas' is not defined
fig = plt.figure(figsize=(10, 6))
ax = us49.plot(facecolor='LightGray', edgecolor='k', linewidth=.2)
x, y = red_faults['LONGITUDE'].values, red_faults['LATITUDE'].values
ax.scatter(x,y, color='red', alpha=0.7, s=0.5)
plt.show
import contextily
Execution error
ModuleNotFoundError: No module named 'contextily'
#convert the data to web mercartor projection
red_wm = red.to_crs(epsg=3857)
ax = red_wm.plot(color='red', markersize=0.5)
contextily.add_basemap(ax);
joint_axes = sns.jointplot(
x='LONGITUDE', y='LATITUDE', data=red_faults, s=0.5
)
contextily.add_basemap(
joint_axes.ax_joint,
crs="EPSG:4326",
source=contextily.providers.CartoDB.PositronNoLabels
);
Creating interactive maps with hvplot
import hvplot.pandas
import geoviews as gv
from cartopy import crs
Execution error
ModuleNotFoundError: No module named 'hvplot'
#convert the data to web mercartor projection
xyfaults_wm = xy_faults.to_crs(epsg=3857)
fig = plt.figure(figsize=(10, 6))
ax = us49.plot(facecolor='LightGray', edgecolor='k', linewidth=.2)
x, y = yellow['LONGITUDE'].values, yellow['LATITUDE'].values
ax.scatter(x,y, color='Gold', alpha=0.7, s=0.5)
plt.show
Execution error
NameError: name 'us49' is not defined
uscities = geopandas.read_file('cb_2020_us_county_500k/cb_2020_us_county_500k.shp')
notstate=['Hawaii', 'United States Virgin Islands',
'Commonwealth of the Northern Mariana Islands', 'Guam',
'American Samoa', 'Puerto Rico','Alaska']
us_cities= uscities[~uscities.STATE_NAME.isin(notstate)]
us_cities.shape
# GeoPandas Spatial join Points to polygons
sj_faults = geopandas.sjoin(xy_faults,us_cities,how='inner', predicate='intersects',)
#GroupBy to count red alarms per county
pivot_faults=pd.pivot_table(sj_faults,
index='GEOID',
columns='LAMP_STATUS',
aggfunc={'LAMP_STATUS':len})
pivot_faults.columns = pivot_faults.columns.droplevel()
# Merging the data back
county_faults = us_cities.merge(pivot_faults, how='left', on='GEOID')
# fill in missing's with 0, valid in this case.
county_faults.fillna(0, inplace=True)
# Rename the columns just to make it easier to understand
county_faults= county_faults.rename(columns = {0:'Other',1:'Major',
2:'Minor', 3:'Exaust',
4:'Non_electric'})
# Creating a column that sums all the faults
county_faults['SUM_ALARM'] = county_faults[['Other','Major','Minor','Exaust', 'Non_electric']].sum(axis=1)
county_faults.shape
county_faults.plot(
column='SUM_ALARM',
figsize=(16, 9),
scheme='Quantiles',
k=10,
cmap='viridis_r',
legend=False,
)
plt.title('COUNT OF ALARMS IN US COUNTIES');
county_faults.plot(
column='Major',
figsize=(16, 9),
cmap='viridis_r',
legend=True,
)
plt.title('COUNT OF RED FAULT ALARMS IN US COUNTIES');
# plotting a ECDF graph
import numpy as np
x = np.sort(county_faults['SUM_ALARM'])
y = np.arange(1, len(x)+1) / len(x)
_ = plt.plot(x, y, marker='.', linestyle='none')
_ = plt.xlabel('Count of faults per county')
_ = plt.ylabel('ECDF')
plt.margins(0.02) # Keeps data off plot edges
plt.show()
#removing the island from the geodataframe
island= [1277,1460,2330]
county_faults.loc[[1277,1460,2330]]
gdf = county_faults.drop(labels=island, axis=0)
from libpysal.weights.contiguity import Queen
import os
import splot
y = gdf['SUM_ALARM'].values
w = Queen.from_dataframe(gdf)
w.transform = 'r'
y_r = gdf['Major'].values
from splot.libpysal import plot_spatial_weights
plot_spatial_weights(w, gdf);
/Users/iraci/opt/anaconda3/envs/geo_env/lib/python3.9/site-packages/splot/_viz_libpysal_mpl.py:115: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
centroids_shp = gdf.centroid.values
/Users/iraci/opt/anaconda3/envs/geo_env/lib/python3.9/site-packages/splot/_viz_libpysal_mpl.py:154: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
gdf.centroid.plot(ax=ax, **node_kws)
from esda.moran import Moran
# w = Queen.from_dataframe(gdf)
moran = Moran(y, w)
moran.I
# Now for the REd Alarm faults
moran_r = Moran(y_r, w)
moran_r.I
from splot.esda import plot_moran
#
plot_moran(moran, zstandard=True, figsize=(10,4))
plt.show()
# Calculate if our observed value is statistically significant:
moran.p_sim
from splot.esda import moran_scatterplot
from esda.moran import Moran_Local
# calculate Moran_Local and plot
moran_loc = Moran_Local(y, w)
fig, ax = moran_scatterplot(moran_loc, p=0.05)
ax.set_xlabel('Faults')
ax.set_ylabel('Spatial Lag of faults')
plt.show()
moran_loc.q
# Counting the number of polygons with p value lower than 0.5
(moran_loc.p_sim < 0.05).sum()
# that's the most simple way of visualising, but you can't customize
from splot.esda import lisa_cluster
lisa_cluster(moran_loc, gdf, p=0.05, figsize = (9,9))
plt.show()
#Change the limit of p to 0.1
lisa_cluster(moran_loc, gdf, p=0.10, figsize = (9,9))
plt.show()
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(moran_loc, gdf, 'SUM_ALARM', quadrant=1)
plt.show()