import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
dfShooting = pd.read_csv('shootings.csv')
def count_clean(df, column):
series = df[column].value_counts()
new_df = series.to_frame(name='counts')
return new_df
state_df = count_clean(dfShooting, 'state')
fig = go.Figure(data=go.Choropleth(
locations=state_df.index, # Spatial coordinates
z = state_df['counts'], # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Reds',
marker_line_color='white', # line markers between states
colorbar_title = "Numbers of Police Shootings",
))
fig.update_layout(
title_text = '2015-2020 US Police Shootings',
geo_scope='usa', # limite map scope to USA
)
fig.show()
dfCA = dfShooting[dfShooting.state == 'CA']
CA_df = count_clean(dfCA, 'city')
city_df = pd.read_csv('CA cities.csv')
df = CA_df.merge(city_df, left_on=CA_df.index, right_on='Location', how='left')
df.dropna()
fig = px.density_mapbox(df, lat='Latitude', lon='Longitude', z='counts', radius=20,
center=dict(lat=35.5, lon=-120), zoom=5,
mapbox_style="stamen-terrain")
fig.show()
len(state_df)
state_df.sort_values(by='counts')
dfAbb = pd.read_csv('abb.csv')
df = state_df.merge(dfAbb, left_on=state_df.index, right_on='abbreviation', how='left')
del df['abbreviation']
df = df[['state full name', 'counts']]
df.tail()[::-1]
gender = dfShooting['gender'].value_counts()
age = dfShooting['age'].value_counts(bins=5)
gender, age
armed = dfShooting['armed'].value_counts()
mental = dfShooting['signs_of_mental_illness'].value_counts()
armed, mental
filtered_df = dfShooting[['state', 'race']]
sum_df = filtered_df['state'].value_counts()
filtered_df['combined'] = filtered_df['state'] + ', ' + filtered_df['race']
filtered_df['counts'] = 0
filtered_df = filtered_df[['combined', 'counts']]
count_df = filtered_df.groupby('combined').count()
count_df['combined'] = count_df.index
count_df.index = np.arange(len(count_df))
count_df[['state', 'race']] = count_df['combined'].str.split(pat=", ", expand=True)
count_df = count_df.merge(sum_df, left_on='state', right_on=sum_df.index, how='left')
count_df.rename(columns={'state_y': 'sum'}, inplace=True)
count_df['percentage'] = count_df['counts'] / count_df['sum']
dfPercentage = pd.read_csv('US population by race.csv')
final_df = count_df.merge(dfPercentage, left_on='race', right_on='self-identified race', how='left')
final_df = final_df[['state', 'race', 'counts', 'percentage', 'percentage of the population']]
fig = px.sunburst(final_df, path=['state', 'race'], values='percentage',
color='percentage of the population', color_continuous_scale='Rdbu_r')
fig.update_layout(autosize=False, width=800, height=600)
fig.show()
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy