!pip install ipywidgets
Requirement already satisfied: ipywidgets in /root/venv/lib/python3.7/site-packages (7.6.5)
Requirement already satisfied: nbformat>=4.2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (5.1.3)
Requirement already satisfied: ipykernel>=4.5.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (5.5.5)
Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= "3.6" in /root/venv/lib/python3.7/site-packages (from ipywidgets) (1.0.2)
Requirement already satisfied: ipython>=4.0.0; python_version >= "3.3" in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (7.29.0)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /root/venv/lib/python3.7/site-packages (from ipywidgets) (3.5.2)
Requirement already satisfied: ipython-genutils~=0.2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (0.2.0)
Requirement already satisfied: traitlets>=4.3.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (4.3.3)
Requirement already satisfied: jupyter-core in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbformat>=4.2.0->ipywidgets) (4.7.1)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbformat>=4.2.0->ipywidgets) (3.2.0)
Requirement already satisfied: jupyter-client in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1)
Requirement already satisfied: decorator in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (5.1.0)
Requirement already satisfied: jedi>=0.16 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.17.2)
Requirement already satisfied: pexpect>4.3; sys_platform != "win32" in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (4.8.0)
Requirement already satisfied: backcall in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.2.0)
Requirement already satisfied: matplotlib-inline in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.1.3)
Requirement already satisfied: setuptools>=18.5 in /root/venv/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (47.1.0)
Requirement already satisfied: pickleshare in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.5)
Requirement already satisfied: pygments in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (2.10.0)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (3.0.22)
Requirement already satisfied: notebook>=4.4.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from widgetsnbextension~=3.5.0->ipywidgets) (6.3.0)
Requirement already satisfied: six in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from traitlets>=4.3.1->ipywidgets) (1.16.0)
Requirement already satisfied: importlib-metadata; python_version < "3.8" in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (4.8.2)
Requirement already satisfied: pyrsistent>=0.14.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (0.18.0)
Requirement already satisfied: attrs>=17.4.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (21.2.0)
Requirement already satisfied: python-dateutil>=2.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.2)
Requirement already satisfied: pyzmq>=13 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (22.3.0)
Requirement already satisfied: parso<0.8.0,>=0.7.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jedi>=0.16->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.1)
Requirement already satisfied: ptyprocess>=0.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pexpect>4.3; sys_platform != "win32"->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.0)
Requirement already satisfied: wcwidth in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.2.5)
Requirement already satisfied: Send2Trash>=1.5.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.8.0)
Requirement already satisfied: prometheus-client in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.12.0)
Requirement already satisfied: terminado>=0.8.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.12.1)
Requirement already satisfied: nbconvert==6.0.7 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (6.0.7)
Requirement already satisfied: jinja2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.0.3)
Requirement already satisfied: argon2-cffi in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (21.1.0)
Requirement already satisfied: zipp>=0.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata; python_version < "3.8"->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (3.6.0)
Requirement already satisfied: typing-extensions>=3.6.4; python_version < "3.8" in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata; python_version < "3.8"->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (3.10.0.2)
Requirement already satisfied: mistune<2,>=0.8.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4)
Requirement already satisfied: jupyterlab-pygments in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.1.2)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.8)
Requirement already satisfied: defusedxml in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.7.1)
Requirement already satisfied: testpath in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.0)
Requirement already satisfied: bleach in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (4.1.0)
Requirement already satisfied: entrypoints>=0.2.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3)
Requirement already satisfied: MarkupSafe>=2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.0.1)
Requirement already satisfied: cffi>=1.0.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.15.0)
Requirement already satisfied: nest-asyncio in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.1)
Requirement already satisfied: webencodings in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1)
Requirement already satisfied: packaging in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (21.2)
Requirement already satisfied: pycparser in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.21)
Requirement already satisfied: pyparsing<3,>=2.0.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from packaging->bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.4.7)
WARNING: You are using pip version 20.1.1; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
! pip install graphviz
Requirement already satisfied: graphviz in /root/venv/lib/python3.7/site-packages (0.19.1)
WARNING: You are using pip version 20.1.1; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import beta, binom
import itertools
from ipywidgets import interact, interactive
import pymc3 as pm
pm.__version__
EDA
# reads in current dataset from Washington Post github
data = pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-police-shootings/master/fatal-police-shootings-data.csv')
data.head()
counts_by_race = data[['race']].groupby('race').size()
counts_by_race
# O:'Other', N: 'Native American', A: 'Asian', H: 'Hispanic', B: 'Black, non-Hispanic', W: 'White, non-Hispanic'
sns.barplot(counts_by_race.index,
counts_by_race,
order=['O', 'N', 'A', 'H', 'B', 'W'],
palette = 'Blues_d')
plt.xlabel('Race')
plt.ylabel('# Of Police Shootings');
plt.title('Number of Police Shootings by Race');
plt.savefig('1.jpeg')
# Drop null values
armed_race_data = data[['armed', 'race']].dropna()
# Get races used
races = list(armed_race_data['race'].unique())
# group by armed status and race
armed_race_data = armed_race_data.groupby(['race', 'armed']).size().sort_index(ascending=False).reset_index()
# change 'armed' column to 'armed' or 'unarmed'
armed_race_data['armed'] = ['unarmed' if x == 'unarmed' else 'armed' for x in armed_race_data['armed']]
# Rename column
armed_race_data = armed_race_data.rename(columns={0: 'count'})
armed_race_data.head(3)
# O:'Other', N: 'Native American', A: 'Asian', H: 'Hispanic', B: 'Black, non-Hispanic', W: 'White, non-Hispanic'
sns.barplot(x='race',
y='count',
hue = 'armed',
data = armed_race_data,
order=['O', 'N', 'A', 'H', 'B', 'W'],
palette = 'Blues_d')
plt.xlabel('Race')
plt.ylabel('# Of Police Shootings');
plt.title('Number of Police Shootings by \nRace and Armed Status');
plt.savefig('2.jpeg')
deaths_by_month = data[['date']]
deaths_by_month['date'] = [re.findall(r'\d{4}-\d{2}', x)[0] for x in deaths_by_month['date']]
deaths_by_month = deaths_by_month.groupby('date').size().to_frame().reset_index().rename(columns={0: 'deaths'})
deaths_by_month.head(3)
plot = sns.scatterplot(x='date', y='deaths', data=deaths_by_month);
[l.set_visible(False) for (i,l) in enumerate(plot.xaxis.get_ticklabels()) if i % 12 != 0]
plot.set_xlabel('Year-Month')
plot.set_ylabel('# Deaths per month');
plot.set_title('Number of Police Shootings by Month');
plot.figure.savefig('3.jpeg')
def by_quarter(date):
'''
Splits months into quarters (not as crowded as months and more precision than years)
date: a string in the form YYYY-MM-DD
'''
year = re.findall(r'\d{4}', date)[0]
month = int(re.findall(r'\d{4}-(\d{2})', date)[0])
if 1 <= month < 4:
return year + '-' + 'Q1'
if 4 <= month < 7:
return year + '-' + 'Q2'
if 7 <= month < 10:
return year + '-' + 'Q3'
if 10 <= month < 13:
return year + '-' + 'Q4'
deaths_by_month = data[['date', 'race']]
deaths_by_month['date'] = [by_quarter(x) for x in deaths_by_month['date']]
deaths_by_month = deaths_by_month.groupby(['date', 'race']).size().to_frame().reset_index().rename(columns={0: 'deaths'}).reset_index()
deaths_by_month.head(3)
# O:'Other', N: 'Native American', A: 'Asian', H: 'Hispanic', B: 'Black, non-Hispanic', W: 'White, non-Hispanic'
plot = sns.lineplot(x='date', y='deaths', hue='race', data=deaths_by_month);
[l.set_visible(False) for (i,l) in enumerate(plot.xaxis.get_ticklabels()) if i % 12 != 0]
plot.set_xlabel('Year-Month')
plot.set_ylabel('# Deaths per month');
plot.set_title('Number of Police Shootings by Month and Race');
plot.figure.savefig('4.jpeg')
# Data set with only race
race_only = data[['race']].dropna().groupby('race').size().to_frame().rename(columns={0: 'decedents'})
# Data set with race and armed status
# Drop null values
race_armed = data[['armed', 'race']].dropna()
# change 'armed' column to 'armed' or 'unarmed'
race_armed['armed'] = ['unarmed' if x == 'unarmed' else 'armed' for x in race_armed['armed']]
# group by armed status and race
race_armed = race_armed.groupby(['race', 'armed']).size().sort_index(ascending=False).reset_index()
# Rename column
race_armed = race_armed.rename(columns={0: 'count'})
# Data set with race and camera
race_camera = data[['race', 'body_camera']].dropna()
race_camera = race_camera.groupby(['race', 'body_camera']).size().sort_index(ascending=False).reset_index()
race_camera = race_camera.rename(columns = {0: 'count', 'body_camera': 'camera'})
total_shot = sum(race_only['decedents'])
print('Percentage African American: ' + str(race_only.loc[['B']]['decedents'][0] / total_shot))
Percentage African American: 0.2659265584970111
p_race_only = 1-binom.cdf(1557, 5855, 0.142)
p_race_only
p_armed = 1-binom.cdf(1364, 5284, 0.142)
p_armed
p_unarmed = 1-binom.cdf(137, 410, 0.142)
p_unarmed
p_NoCamera = 1-binom.cdf(1258, 5032, 0.142)
p_NoCamera
p_Camera = 1-binom.cdf(299, 832, 0.142)
p_Camera
p_val_list = np.sort([p_race_only, p_armed, p_unarmed, p_NoCamera, p_Camera])
p_val_list
# rank * alpha / M
values = np.array([i * 0.05 / 5 for i in range(1,6)])
for i in np.arange(4,-1,-1):
if p_val_list[i] < values[i]:
cutoff = p_val_list[i]
cutoff
# drop the undefined races
data = data.dropna(subset = ['race'])
# select features
Xf = data[['armed','age','gender','signs_of_mental_illness', 'threat_level', 'flee', 'body_camera']]
Xf['is_black'] = data['race'] == 'B'
# One hot encoding features
Xf = pd.get_dummies(Xf,dummy_na=True,drop_first=True)
# impute missing features
Xf.loc[Xf['age'].isna(),'age'] = Xf['age'].mean()
def pooled_inference(alpha, beta, study_df):
"""
Creates and fits a PyMC3 model corresponding to the graphical model above
Inputs:
alpha_value, beta_value : floats, parameters of the prior Beta Distribution
study_df : DataFrame containing study data
Outputs: (model, trace)
"""
with pm.Model() as model:
theta = pm.Beta('theta', alpha=alpha, beta=beta)
X = pm.Bernoulli('is_black', p=theta, observed=study_df['is_black'])
trace = pm.sample(1000, tune=1000, target_accept=0.95)
return (model, trace)
# assume the true theta is around beta(1,2) with mean of 0.33> 0.266
mdl, trace = pooled_inference(3, 6, Xf)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [theta]
Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 6 seconds.
# Trace plot
pm.traceplot(trace)
# report mean and credible interval
mean = np.mean(trace['theta'])
low = np.percentile(trace['theta'], 2.5)
high = np.percentile(trace['theta'], 97.5)
print(f"Estimated theta {mean:.3f}")
print(f"95% BCI: [{low:.3f}, {high:.3f}]")
Got error No model on context stack. trying to find log_likelihood in translation.
Got error No model on context stack. trying to find log_likelihood in translation.
Estimated theta 0.266
95% BCI: [0.255, 0.277]
counts_by_race = data.groupby(['race'])['body_camera'].mean()
sns.barplot(counts_by_race.index,
counts_by_race,
order=['O', 'N', 'A', 'H', 'B', 'W'],
palette = 'Blues_d')
plt.xlabel('Race')
plt.ylabel('Rate Body Camera Open');
plt.title('Rate of Body Camera');
def hierarchical_inference(alpha, beta, study_df):
"""
Creates and fits a PyMC3 model corresponding to the graphical model above
Inputs:
alpha_value, beta_value : floats, parameters of the prior Beta Distribution
study_df : DataFrame containing study data
Outputs: (model, trace)
"""
with pm.Model() as model:
body_camera = pm.Data("no_camera", 1-study_df["body_camera"])
theta = pm.Beta('theta', alpha=alpha, beta=beta)
beta = pm.Uniform('beta')
X = pm.Bernoulli('is_black', p=theta * np.exp(-beta*body_camera), observed=study_df['is_black'])
trace = pm.sample(1000, tune=1000, target_accept=0.95)
return (model, trace)
# assume the true thea is around beta(1,2) with mean of 0.33> 0.266
mdl, trace = hierarchical_inference(3, 6, Xf)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [beta, theta]
pm.model_to_graphviz(mdl)
# Trace plot
pm.traceplot(trace)
# report mean and credible interval
mean = np.mean(trace['theta'])
low = np.percentile(trace['theta'], 2.5)
high = np.percentile(trace['theta'], 97.5)
print(f"Estimated theta {mean:.3f}")
print(f"95% BCI: [{low:.3f}, {high:.3f}]")
# report mean and credible interval
mean = 1- np.exp(-np.mean(trace['beta']))
high = 1- np.exp(- np.percentile(trace['beta'], 2.5))
low =1-np.exp(-np.percentile(trace['beta'], 97.5))
print(f"Estimated probability of under-report if no camera present {mean:.3f}")
print(f"95% BCI: [{low:.3f}, {high:.3f}]")
Got error No model on context stack. trying to find log_likelihood in translation.
Got error No model on context stack. trying to find log_likelihood in translation.
Estimated theta 0.362
95% BCI: [0.330, 0.393]
Estimated probability of under-report if no camera present 0.309
95% BCI: [0.370, 0.237]