Analyzing Police Activity

import pandas as pd ri = pd.read_csv('police.csv')

ri.head()

ri.shape

ri.isnull().sum()

ri.dtypes

ri.is_arrested.unique()

# RI = Rhode Island ri.state.unique()

ri.drop(['state', 'county_name'], axis='columns', inplace=True)

ri.dropna(subset=['driver_gender'], inplace=True)

ri.isnull().sum()

ri['is_arrested'] = ri.is_arrested.astype('bool')

combined = ri.stop_date.str.cat(ri.stop_time, sep=' ')

ri['stop_datetime'] = pd.to_datetime(combined)

ri.set_index('stop_datetime', inplace=True)

ri.index

ri.violation.value_counts(normalize=True)

female = ri[ri['driver_gender'] == 'F'] male = ri[ri['driver_gender'] == 'M']

female.violation.value_counts(normalize=True)

male.violation.value_counts(normalize=True)

female_and_speeding = ri[(ri['driver_gender']=='F') & (ri['violation']=='Speeding')] male_and_speeding = ri[(ri['driver_gender']=='M') & (ri['violation']=='Speeding')]

female_and_speeding.stop_outcome.value_counts(normalize=True)

male_and_speeding.stop_outcome.value_counts(normalize=True)

ri.search_conducted.value_counts(normalize=True)

ri.groupby('driver_gender').search_conducted.mean()

ri.groupby(['violation', 'driver_gender']).search_conducted.mean()

ri.search_type.value_counts()

# Include: #Probable Cause,Protective Frisk; Incident to Arrest,Protective Frisk; Incident to Arrest,Inventory,Protective Frisk ... ri['frisk'] = ri.search_type.str.contains('Protective Frisk', na=False)

ri.frisk.sum()

searched = ri[ri['search_conducted']==True]

searched.groupby('driver_gender').frisk.mean()

import seaborn as sns sns.set()

#Calculating the hourly arrest rate hourly_arrest_rate = ri.groupby(ri.index.hour).is_arrested.mean()

import matplotlib.pyplot as plt hourly_arrest_rate.plot() plt.xlabel('Hour') plt.ylabel('Arrest Rate') plt.title('Arrest Rate by Time of Day') plt.show()

annual_drug_rate = ri.drugs_related_stop.resample('A').mean() annual_drug_rate.plot() plt.xlabel('Year') plt.ylabel('Drug-related stops Rate') plt.title('Drug-related stops Rate by Year') plt.show()

annual_search_rate = ri.search_conducted.resample('A').mean() annual = pd.concat([annual_drug_rate, annual_search_rate], axis='columns')

annual.plot(subplots=True) plt.xlabel('Year') plt.show()

# Tallying violations by district all_zones = pd.crosstab(ri.district, ri.violation)

k_zones = all_zones.loc['Zone K1':'Zone K3']

k_zones.plot(kind='bar', stacked=True) plt.ylabel('Count of violations') plt.title("Violations for K's zones") plt.show()

mapping = {'0-15 Min':8, '16-30 Min':23, '30+ Min':45} ri['stop_minutes'] = ri.stop_duration.map(mapping)

stop_length = ri.groupby('violation_raw').stop_minutes.mean()

stop_length.sort_values().plot(kind='barh') plt.show()

weather = pd.read_csv('weather.csv')

weather.head()

weather.shape

weather.isnull().sum()

weather[['TAVG', 'TMIN', 'TMAX']].describe()

weather[['TMIN', 'TAVG', 'TMAX']].plot(kind='box') plt.show()

weather['TDIFF'] = weather['TMAX'] - weather['TMIN']

weather.TDIFF.plot(kind='hist', bins=20)

WT = weather.loc[:,'WT01':'WT22']

# Adding bad_conditions column weather['bad_conditions'] = WT.sum(axis='columns')

weather['bad_conditions'] = weather.bad_conditions.fillna(0).astype(int)

weather.bad_conditions.plot(kind='hist') plt.show()

weather.bad_conditions.value_counts().sort_index()

mapping = {0:'good', 1:'bad', 2:'bad', 3:'bad', 4:'bad', 5:'worse', 6:'worse', 7:'worse', 8:'worse', 9:'worse'}

weather['rating'] = weather.bad_conditions.map(mapping)

weather.rating.value_counts()

cats = ['good', 'bad', 'worse']

from pandas.api.types import CategoricalDtype cat_type = CategoricalDtype(categories=cats, ordered=True) weather['rating'] = weather.rating.astype(cat_type)

ri.reset_index(inplace=True)

weather_rating = weather.loc[:,['DATE', 'rating']]

ri_weather = pd.merge(left=ri, right=weather_rating, left_on='stop_date', right_on='DATE', how='left')

ri_weather.set_index('stop_datetime',inplace=True)

ri_weather.groupby('rating').is_arrested.mean()

ri_weather.pivot_table(index='violation', columns='rating', values='is_arrested')