# Import libraries to be used for the project
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
#Import the crime data from FBI website and create a new data frame and rename the columns
crime_df= pd.read_csv('crime.csv', header=None)
crime_df.columns = ["Year","Population","Violent Crime","Violent Crime Rate","Murder and nonnegligent manslaughter","Murder and nonnegligent manslaughter Rate","Rape Revised def","Rape Revised def rate","Rape Legacy def","Rape Legacy def rate", "Robbery","Robbery rate","Aggrevated Assult","Aggrevated Assult rate","Property Crime","Property crime rate","Burglary", "Burglary rate", "Larceny-theft","Larceny-theft rate", "Motor vehicle theft", "Motor vehicle theft rate"]
# drop row one and rename data frame to df1 for ease in future manipulation
df1= crime_df[1:]
df1
# replace all commas
df1 = df1.replace(',','', regex=True)
# convert each columnn to an integer column
df1["Population"]= df1["Population"].astype (int)
df1["Violent Crime"]= df1["Violent Crime"].astype (int)
df1["Murder and nonnegligent manslaughter"]= df1["Murder and nonnegligent manslaughter"].astype (int)
df1["Rape Legacy def"]= df1["Rape Legacy def"].astype (int)
df1["Robbery"]= df1["Robbery"].astype (int)
df1["Aggrevated Assult"]= df1["Aggrevated Assult"].astype (int)
df1["Property Crime"]= df1["Property Crime"].astype (int)
df1["Burglary"]= df1["Burglary"].astype (int)
df1["Larceny-theft"]= df1["Larceny-theft"].astype (int)
df1["Motor vehicle theft"]= df1["Motor vehicle theft"].astype (int)
# preview to ensure everything looks good
df1
# create a plot of crime over time to see which types of crime are commited most often
df1.plot(kind='line',x="Year",y=["Violent Crime","Murder and nonnegligent manslaughter","Rape Legacy def", "Robbery","Aggrevated Assult","Property Crime","Burglary", "Larceny-theft", "Motor vehicle theft"])
plt.xlabel("Year")
plt.ylabel("Crime")
plt.suptitle('Crime rate from 1997 to 2016')
plt.legend( ncol=1, loc='best');
/opt/venv/lib/python3.7/site-packages/pandas/plotting/_matplotlib/core.py:1192: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(xticklabels)
# create a chart of summary statistics for each column in the data frame
df1.describe()
# set the x value to be years and the y value to be property crime
df1['x'] = df1["Year"]
df1['y'] = df1["Property Crime"]
# using seaborn scatter plot X vs. Y.
sns.lmplot('x', 'y', data=df1, fit_reg=False)
/opt/venv/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
# create a density plot using seaborn
sns.kdeplot(df1.y)
# this shows another version of the density plot with the histogram in the background
sns.distplot(df1.y)
/opt/venv/lib/python3.7/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
# rename the variables x being larceny theft and y being property crime
df1['x'] = df1["Larceny-theft"]
df1['y'] = df1["Property Crime"]
# using seaborn scatter plot X vs. Y.
sns.lmplot('x', 'y', data=df1, fit_reg=False)
/opt/venv/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning