# Imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import plotly.graph_objects as go
import iplot
import plotly as py
import plotly.express as px
import scipy as sci
import scipy.stats as stat
import sklearn.cluster as cl
import sklearn.manifold as mani
import seaborn as sns
import joypy
# Read the csv file
dfRaw = pd.read_csv('/work/US/us_18Q1.csv')
newDF = dfRaw[['DEM_AGE', 'DEM_REGION','DEM_INCOME', 'DEM_EDU', 'DAST_SUM','DEM_STATE', 'CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE', 'GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE', 'KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE', 'SAL_USE', 'STER_USE']].copy()
col_sz = len(newDF.columns)
raw_column_name = dfRaw.columns
for index in range(166, len(dfRaw.columns)):
temp = raw_column_name[index].split('_')
if temp[len(temp) -1] == 'YR':
newDF.insert(col_sz, raw_column_name[index], dfRaw[raw_column_name[index]].copy())
col_sz += 1
dem_income = {1: 10000, 2: 25000, 3: 35000, 4: 45000, 5: 55000, 6: 65000, 7: 75000, 8: 85000, 9: 95000, 10: 105000, 11: np.nan}
dem_edu = {4: np.nan}
dem_region = {1:'Northeast', 2: 'Midwest', 3: 'South', 4: 'West'}
newDF.DEM_INCOME = newDF.DEM_INCOME.replace(dem_income)
newDF.DEM_EDU = newDF.DEM_EDU.replace(dem_edu)
newDF.DEM_REGION = newDF.DEM_REGION.replace(dem_region)
#column_names = newDF.columns
crimeDF = pd.read_csv('estimated_crimes_1979_2019.csv')
crimeDF2018 = crimeDF[crimeDF.year==2018].copy()
crimeDF2018['CRIME_RATE'] = (crimeDF2018.iloc[:,-11:-1].sum(axis=1) / crimeDF2018['population']) * 100000
crimeDF2018 = crimeDF2018.set_index('state_abbr')
participants = newDF.groupby('DEM_STATE').count()
avgStateDAST = newDF.groupby(['DEM_STATE']).DAST_SUM.mean()
crimeRate2018 = crimeDF2018[['CRIME_RATE']].drop(crimeDF2018.tail(1).index).squeeze()
drugUse = newDF[['DEM_STATE', 'CAN_REC_USE', 'COKE_USE', 'CRACK_USE',
'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE',
'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE',
'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE','SAL_USE',
'STER_USE', 'OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR',
'ILL_YR']].copy()
drugUse[['CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE','SAL_USE', 'STER_USE']] = drugUse[['CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE', 'GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE', 'KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE', 'SAL_USE', 'STER_USE']].applymap(lambda x: 0 if x != 2 and x != 3 and x != 4 else 1)
drugUse[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']] = drugUse[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].replace({'Yes': 1, 'No': 0})
drugState = pd.DataFrame(drugUse.groupby('DEM_STATE').sum().sum(axis=1), columns=['Drug_Use'])
drugState = drugState.div(drugUse.groupby('DEM_STATE').count()['CAN_REC_USE'], axis=0)
indDrugState = drugUse.groupby('DEM_STATE').sum()
indDrugState = indDrugState.div(drugUse.groupby('DEM_STATE').count()['CAN_REC_USE'], axis=0)
incomeState = newDF[['DEM_STATE', 'DEM_INCOME']]
incomeState = incomeState.groupby('DEM_STATE').mean()#, columns=['Income']
edu = dfRaw[['DEM_STATE', 'DEM_EDU']]
edu = edu.groupby('DEM_STATE').mean()
stateDF = pd.DataFrame(crimeRate2018)
stateDF = stateDF.merge(incomeState, left_index=True, right_index=True)
stateDF = stateDF.merge(edu, left_index=True, right_index=True)
stateDF = stateDF.merge(avgStateDAST, left_index=True, right_index=True)
stateDF = stateDF.merge(drugState, left_index=True, right_index=True)
stateDF = stateDF.merge(indDrugState, left_index=True, right_index=True)
plt.figure(figsize=(25,25))
sc = plt.scatter(stateDF['CRIME_RATE'], stateDF.DAST_SUM, c=stateDF['Drug_Use'], cmap='flare')
plt.colorbar(sc, label='Average Amount of Drugs Used per Person')
for i in crimeRate2018.index:
plt.text(crimeRate2018[i]+0.0003, avgStateDAST[i]+0.001, i)
plt.xlabel('Crime rate')
plt.ylabel('Average state DAST')
plt.title('Crime Rate vs. DAST-10')
plt.show()
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'Reds',
reversescale=False,
text = 'Crime Rate',
z = stateDF['CRIME_RATE'],
colorbar = {'title':"Crime Rate"}
)
layout = dict(title='Crime Rate by State', geo=dict(scope='usa'))
crime = go.Figure(data=[data], layout = layout)
crime.show()
cpy = stateDF[['CRIME_RATE', 'Drug_Use']].copy()
cpy.values.tolist()
dCrime = []
for i, j in cpy.values.tolist():
dCrime.append(np.linalg.norm([i,j], ord=np.inf))
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'Burgyl',
text = 'DAST-10 Crime score',
z = dCrime,
colorbar = {'title':"DAST-10 and Crime Rate"}
)
layout = dict(title='DAST-10 and Crime Rate by State', geo=dict(scope='usa'))
dastCrime = go.Figure(data=[data], layout = layout)
dastCrime.show()
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'emrld',
reversescale=False,
text = 'Average Income',
z = stateDF['DEM_INCOME'],
colorbar = {'title':"Income in USD"}
)
layout = dict(title='Average Income by State', geo=dict(scope='usa'))
choromap = go.Figure(data=[data], layout = layout)
choromap.show()
statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['DEM_INCOME'])
print("pval: ",pval)
temp = dfRaw[['DEM_STATE', 'DEM_EDU']]
temp = temp.groupby('DEM_STATE').mean()
# Reduce reuse recycle this code
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'Blues',
text = 'Average Education level',
z = stateDF['DEM_EDU'],
colorbar = {'title':"Average Education Level"}
)
layout = dict(title='Average Education Level by State', geo=dict(scope='usa'))
choromap = go.Figure(data=[data], layout = layout)
choromap.show()
statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['DEM_EDU'])
print("pval: ",pval)
x = stateDF.index.to_list()
weedCrime = stateDF[['CAN_REC_USE']]
weedCrime['Crime'] = stateDF[['CRIME_RATE']]
weedCrime['Drug_Use'] = stateDF['Drug_Use']
weedCrime['pivot'] = weedCrime['CAN_REC_USE'].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0)
# display(y)
# Reduce reuse recycle this code
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'Greens',
text = 'Crime Rate for Cannibis Users',
z = weedCrime['pivot'],
colorbar = {'title':"Crime Rate"}
)
layout = dict(title='Crime Rate for Cannibis Users by State', geo=dict(scope='usa'))
choromap = go.Figure(data=[data], layout = layout)
choromap.show()
x = stateDF.index.to_list()
opiodCrime = stateDF[['OP_NMU_YR']]
opiodCrime['Crime'] = stateDF[['CRIME_RATE']]
opiodCrime['Drug_Use'] = stateDF['Drug_Use']
opiodCrime['pivot'] = opiodCrime['OP_NMU_YR'].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0)
data = dict(type='choropleth',
locations = stateDF.index,
locationmode = 'USA-states',
colorscale = 'Purples',
text = 'Crime Rate for Opioid Users',
z = opiodCrime['pivot'],
colorbar = {'title':"Crime Rate"}
)
layout = dict(title='Crime Rate for Opioid Users by StaTE', geo=dict(scope='usa'))
choromap = go.Figure(data=[data], layout = layout)
choromap.show()
statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], weedCrime['pivot'])
print("pval: ",pval)
statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], stateDF['DEM_EDU'])
print("pval: ",pval)
statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], stateDF['DEM_INCOME'])
print("pval: ",pval)
temp = newDF[['DEM_EDU','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']]
temp = temp.groupby(['DEM_EDU']).sum()
temp1 = newDF[['DEM_EDU','OP_NMU_YR', 'CAN_REC_USE']]
temp1 = temp1.groupby('DEM_EDU').sum()
temp['Other Drug Usage']= temp.sum(axis=1)
temp = temp.merge(temp1, left_index=True, right_index=True)
data = pd.DataFrame(temp[['Other Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']])
norm = (data - data.mean()) / data.std()
norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage']
fig, axes = joypy.joyplot(norm, title='Education on Illicit Drug Use', color=['#E34132','#645394','#17AB6F'])
fig.show()
#Hello yall but this is my block of code, no one is allowed to enter this sacred ground
temp = newDF[['DEM_INCOME','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']]
temp = temp.groupby(['DEM_INCOME']).sum()
temp1 = newDF[['DEM_INCOME','OP_NMU_YR', 'CAN_REC_USE']]
temp1 = temp1.groupby('DEM_INCOME').sum()
temp['Drug Usage']= temp.sum(axis=1)
temp = temp.merge(temp1, left_index=True, right_index=True)
data = pd.DataFrame(temp[['Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']])
norm = (data - data.mean()) / data.std()
#data['Drug Usage'] = temp.sum(axis=0)
norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage']
fig, axes = joypy.joyplot(norm, title='Income on Illicit Drug Use ', color=['#E34132','#645394','#17AB6F'])
fig.show()
#Hello yall but this is my block of code, no one is allowed to enter this sacred ground
temp = newDF[['DEM_AGE','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']]
temp = temp.groupby(['DEM_AGE']).sum()
temp1 = newDF[['DEM_AGE','OP_NMU_YR', 'CAN_REC_USE']]
temp1 = temp1.groupby('DEM_AGE').sum()
temp['Drug Usage']= temp.sum(axis=1)
temp = temp.merge(temp1, left_index=True, right_index=True)
data = pd.DataFrame(temp[['Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']])
norm = (data - data.mean()) / data.std()
norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage']
fig, axes = joypy.joyplot(norm, title='Age on Illicit Drug Use', color=['#E34132','#645394','#17AB6F'])
fig.show()
temp = newDF[['DEM_EDU', 'CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']]
# display(temp)
temp = temp.groupby('DEM_EDU').sum()
temp['Total'] = temp.sum(axis=1)
temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] = temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']].div(temp['Total'], axis=0)
temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']].plot(kind='bar', stacked=True, title='')
# display(temp)
temp = dfRaw[['DEM_STATE','OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']]
temp = temp.groupby('DEM_STATE').sum()
temp['Total'] = temp.sum(axis=1)
temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']] = temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].div(temp['Total'], axis=0)
temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].plot(kind='bar', stacked=True)
statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['Drug_Use'])
print("pval: ",pval)
sns.set(rc={'figure.figsize': (15,10)})
x = stateDF.index.to_list()
y = stateDF[['COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']]
y['Crime'] = stateDF['CRIME_RATE']
y['Drug_Use'] = stateDF['Drug_Use']
colChange = y.columns[:-2]
pivot = y[colChange].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0).T
y_axis_labels = ['Coke', 'Crack', 'MDMA', 'GHB', 'Inhalant', 'Speed', 'Methamphetamemes', 'Fentanyl', 'Heroin', 'Ketamine', 'Mephedrone', 'LSD', 'Mushrooms', 'PCP', 'Spice', 'Salvia']
ax = sns.heatmap(pivot, cmap='flare', yticklabels=y_axis_labels)
plt.xlabel('States')
plt.title('Crime Rate by State Scaled by Usage of Each Illicit Drug')
plt.show()
# This is intentionally left blank