Libraries import
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Import DF
df = pd.read_csv("/work/crime-data.csv")
Data Cleaning
df = df.drop(df[df['franja_horaria'] == 'S/D'].index)
df = df.dropna(subset = ['comuna'])
df['franja_horaria'] = df['franja_horaria'].astype(int)
df['comuna'] = df['comuna'].astype(int)
df['fecha'] = pd.to_datetime(df['fecha'])
df['mes'] = df['fecha'].dt.month
df['mes'] = df['mes'].astype(int)
Crime count for commune
comunas = df['comuna'].unique()
comunas.sort()
a = []
for i in comunas:
a.append(df[(df['comuna'] == i)].shape[0])
plt.barh(comunas, a)
Crime count for city
barrios = df['barrio'].unique()
a = []
for i in barrios:
a.append(df[(df['barrio'] == i)].shape[0])
plt.bar(barrios, a)
plt.xticks(rotation=90,font={'size': 7})
plt.show()
Crime count for month
months = df['mes'].unique()
a = []
for i in months:
a.append(df[(df['mes'] == i)].shape[0])
plt.bar(months, a)
Crime count for time zone
f_horaria = df['franja_horaria'].unique()
f_horaria.sort()
a = []
for i in f_horaria:
a.append(df[df['franja_horaria'] == i].shape[0])
plt.bar(f_horaria, a)
Crime-kind of crime - General
tipos = df['tipo_delito'].unique()
a = []
for i in tipos:
a.append(df[df['tipo_delito'] == i].shape[0])
plt.bar(tipos, a)
Crime-kind of crime on a city
tipos = df['tipo_delito'].unique()
barrio = 'Constitución'
a = []
for i in tipos:
a.append(df[(df['barrio'] == f'{barrio}') & (df['tipo_delito'] == i)].shape[0])
plt.bar(tipos, a)
Crime-kind of crime on a commune
tipos = df['tipo_delito'].unique()
comuna = 3
a = []
for i in tipos:
a.append(df[(df['comuna'] == comuna) & (df['tipo_delito'] == i)].shape[0])
plt.bar(tipos, a)
Percentage of crime-kind on each city
barrios = df['barrio'].unique()
tipos = df['tipo_delito'].unique()
for i in barrios:
y = []
for j in tipos:
y.append(df[(df['barrio'] == i) & (df['tipo_delito'] == j)].shape[0])
plt.pie(y,labels = tipos, autopct='%1.1f%%', shadow=True)
plt.title(i)
plt.show()
Heat-Grid of crime on a city
data = df['franja_horaria'].unique()
data.sort()
horarios = [i for i in data]
fecha = df['fecha']
df['dia_semana'] = fecha.dt.weekday
t = df['dia_semana'].unique()
t.sort()
weekdays = [i for i in t]
barrio = 'Villa Devoto'
h = []
for i in weekdays:
z = []
for j in horarios:
z.append(df[(df['dia_semana'] == i) & (df['franja_horaria'] == j) & (df['barrio'] == barrio)].shape[0])
h.append(z)
j = np.array(h)
fig, ax = plt.subplots()
im = ax.imshow(j)
ax.set_yticks(np.arange(len(weekdays)))
ax.set_xticks(np.arange(len(horarios)))
ax.set_yticklabels(weekdays)
ax.set_xticklabels(horarios)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
for x in range(len(weekdays)):
for z in range(len(horarios)):
text = ax.text(z, x, j[x, z], ha="center", va="center", color="w")
ax.set_title(barrio)
fig.tight_layout()
plt.show()
Percentage of crime-kind in each combination weekday-hour
barrios = df['barrio'].unique()
tipos = df['tipo_delito'].unique()
barrio = 'Villa Devoto'
for i in weekdays:
for j in horarios:
y = []
for tipo in tipos:
y.append(df[
(df['barrio'] == barrio) &
(df['tipo_delito'] == tipo) &
(df['dia_semana'] == i) &
(df['franja_horaria'] == j)
].shape[0])
plt.pie(y,labels=tipos,autopct='%1.1f%%', textprops={'color': 'white', 'size': 'large'})
plt.title(f'Day: {i} - Hour: {j}')
plt.show()