Untitled Python Project

# Imports import matplotlib.pyplot as plt import pandas as pd import numpy as np import csv import plotly.graph_objects as go import iplot import plotly as py import plotly.express as px import scipy as sci import scipy.stats as stat import sklearn.cluster as cl import sklearn.manifold as mani import seaborn as sns import joypy

# Read the csv file dfRaw = pd.read_csv('/work/US/us_18Q1.csv')

newDF = dfRaw[['DEM_AGE', 'DEM_REGION','DEM_INCOME', 'DEM_EDU', 'DAST_SUM','DEM_STATE', 'CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE', 'GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE', 'KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE', 'SAL_USE', 'STER_USE']].copy() col_sz = len(newDF.columns) raw_column_name = dfRaw.columns for index in range(166, len(dfRaw.columns)): temp = raw_column_name[index].split('_') if temp[len(temp) -1] == 'YR': newDF.insert(col_sz, raw_column_name[index], dfRaw[raw_column_name[index]].copy()) col_sz += 1 dem_income = {1: 10000, 2: 25000, 3: 35000, 4: 45000, 5: 55000, 6: 65000, 7: 75000, 8: 85000, 9: 95000, 10: 105000, 11: np.nan} dem_edu = {4: np.nan} dem_region = {1:'Northeast', 2: 'Midwest', 3: 'South', 4: 'West'} newDF.DEM_INCOME = newDF.DEM_INCOME.replace(dem_income) newDF.DEM_EDU = newDF.DEM_EDU.replace(dem_edu) newDF.DEM_REGION = newDF.DEM_REGION.replace(dem_region) #column_names = newDF.columns

crimeDF = pd.read_csv('estimated_crimes_1979_2019.csv') crimeDF2018 = crimeDF[crimeDF.year==2018].copy() crimeDF2018['CRIME_RATE'] = (crimeDF2018.iloc[:,-11:-1].sum(axis=1) / crimeDF2018['population']) * 100000 crimeDF2018 = crimeDF2018.set_index('state_abbr')

participants = newDF.groupby('DEM_STATE').count() avgStateDAST = newDF.groupby(['DEM_STATE']).DAST_SUM.mean() crimeRate2018 = crimeDF2018[['CRIME_RATE']].drop(crimeDF2018.tail(1).index).squeeze() drugUse = newDF[['DEM_STATE', 'CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE','SAL_USE', 'STER_USE', 'OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR', 'ILL_YR']].copy() drugUse[['CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE','SAL_USE', 'STER_USE']] = drugUse[['CAN_REC_USE', 'COKE_USE', 'CRACK_USE', 'MDMA_USE', 'GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE', 'KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE','MESC_USE', 'SPICE_USE', 'SAL_USE', 'STER_USE']].applymap(lambda x: 0 if x != 2 and x != 3 and x != 4 else 1) drugUse[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']] = drugUse[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].replace({'Yes': 1, 'No': 0}) drugState = pd.DataFrame(drugUse.groupby('DEM_STATE').sum().sum(axis=1), columns=['Drug_Use']) drugState = drugState.div(drugUse.groupby('DEM_STATE').count()['CAN_REC_USE'], axis=0) indDrugState = drugUse.groupby('DEM_STATE').sum() indDrugState = indDrugState.div(drugUse.groupby('DEM_STATE').count()['CAN_REC_USE'], axis=0) incomeState = newDF[['DEM_STATE', 'DEM_INCOME']] incomeState = incomeState.groupby('DEM_STATE').mean()#, columns=['Income'] edu = dfRaw[['DEM_STATE', 'DEM_EDU']] edu = edu.groupby('DEM_STATE').mean() stateDF = pd.DataFrame(crimeRate2018) stateDF = stateDF.merge(incomeState, left_index=True, right_index=True) stateDF = stateDF.merge(edu, left_index=True, right_index=True) stateDF = stateDF.merge(avgStateDAST, left_index=True, right_index=True) stateDF = stateDF.merge(drugState, left_index=True, right_index=True) stateDF = stateDF.merge(indDrugState, left_index=True, right_index=True)

plt.figure(figsize=(25,25)) sc = plt.scatter(stateDF['CRIME_RATE'], stateDF.DAST_SUM, c=stateDF['Drug_Use'], cmap='flare') plt.colorbar(sc, label='Average Amount of Drugs Used per Person') for i in crimeRate2018.index: plt.text(crimeRate2018[i]+0.0003, avgStateDAST[i]+0.001, i) plt.xlabel('Crime rate') plt.ylabel('Average state DAST') plt.title('Crime Rate vs. DAST-10') plt.show()

data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'Reds', reversescale=False, text = 'Crime Rate', z = stateDF['CRIME_RATE'], colorbar = {'title':"Crime Rate"} ) layout = dict(title='Crime Rate by State', geo=dict(scope='usa')) crime = go.Figure(data=[data], layout = layout) crime.show()

cpy = stateDF[['CRIME_RATE', 'Drug_Use']].copy() cpy.values.tolist() dCrime = [] for i, j in cpy.values.tolist(): dCrime.append(np.linalg.norm([i,j], ord=np.inf)) data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'Burgyl', text = 'DAST-10 Crime score', z = dCrime, colorbar = {'title':"DAST-10 and Crime Rate"} ) layout = dict(title='DAST-10 and Crime Rate by State', geo=dict(scope='usa')) dastCrime = go.Figure(data=[data], layout = layout) dastCrime.show()

data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'emrld', reversescale=False, text = 'Average Income', z = stateDF['DEM_INCOME'], colorbar = {'title':"Income in USD"} ) layout = dict(title='Average Income by State', geo=dict(scope='usa')) choromap = go.Figure(data=[data], layout = layout) choromap.show()

statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['DEM_INCOME']) print("pval: ",pval)

temp = dfRaw[['DEM_STATE', 'DEM_EDU']] temp = temp.groupby('DEM_STATE').mean() # Reduce reuse recycle this code data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'Blues', text = 'Average Education level', z = stateDF['DEM_EDU'], colorbar = {'title':"Average Education Level"} ) layout = dict(title='Average Education Level by State', geo=dict(scope='usa')) choromap = go.Figure(data=[data], layout = layout) choromap.show()

statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['DEM_EDU']) print("pval: ",pval)

x = stateDF.index.to_list() weedCrime = stateDF[['CAN_REC_USE']] weedCrime['Crime'] = stateDF[['CRIME_RATE']] weedCrime['Drug_Use'] = stateDF['Drug_Use'] weedCrime['pivot'] = weedCrime['CAN_REC_USE'].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0) # display(y) # Reduce reuse recycle this code data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'Greens', text = 'Crime Rate for Cannibis Users', z = weedCrime['pivot'], colorbar = {'title':"Crime Rate"} ) layout = dict(title='Crime Rate for Cannibis Users by State', geo=dict(scope='usa')) choromap = go.Figure(data=[data], layout = layout) choromap.show()

x = stateDF.index.to_list() opiodCrime = stateDF[['OP_NMU_YR']] opiodCrime['Crime'] = stateDF[['CRIME_RATE']] opiodCrime['Drug_Use'] = stateDF['Drug_Use'] opiodCrime['pivot'] = opiodCrime['OP_NMU_YR'].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0) data = dict(type='choropleth', locations = stateDF.index, locationmode = 'USA-states', colorscale = 'Purples', text = 'Crime Rate for Opioid Users', z = opiodCrime['pivot'], colorbar = {'title':"Crime Rate"} ) layout = dict(title='Crime Rate for Opioid Users by StaTE', geo=dict(scope='usa')) choromap = go.Figure(data=[data], layout = layout) choromap.show()

statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], weedCrime['pivot']) print("pval: ",pval)

statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], stateDF['DEM_EDU']) print("pval: ",pval)

statistic, pval = stat.mannwhitneyu(opiodCrime['pivot'], stateDF['DEM_INCOME']) print("pval: ",pval)

temp = newDF[['DEM_EDU','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] temp = temp.groupby(['DEM_EDU']).sum() temp1 = newDF[['DEM_EDU','OP_NMU_YR', 'CAN_REC_USE']] temp1 = temp1.groupby('DEM_EDU').sum() temp['Other Drug Usage']= temp.sum(axis=1) temp = temp.merge(temp1, left_index=True, right_index=True) data = pd.DataFrame(temp[['Other Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']]) norm = (data - data.mean()) / data.std() norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage'] fig, axes = joypy.joyplot(norm, title='Education on Illicit Drug Use', color=['#E34132','#645394','#17AB6F']) fig.show()

#Hello yall but this is my block of code, no one is allowed to enter this sacred ground temp = newDF[['DEM_INCOME','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] temp = temp.groupby(['DEM_INCOME']).sum() temp1 = newDF[['DEM_INCOME','OP_NMU_YR', 'CAN_REC_USE']] temp1 = temp1.groupby('DEM_INCOME').sum() temp['Drug Usage']= temp.sum(axis=1) temp = temp.merge(temp1, left_index=True, right_index=True) data = pd.DataFrame(temp[['Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']]) norm = (data - data.mean()) / data.std() #data['Drug Usage'] = temp.sum(axis=0) norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage'] fig, axes = joypy.joyplot(norm, title='Income on Illicit Drug Use ', color=['#E34132','#645394','#17AB6F']) fig.show()

#Hello yall but this is my block of code, no one is allowed to enter this sacred ground temp = newDF[['DEM_AGE','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] temp = temp.groupby(['DEM_AGE']).sum() temp1 = newDF[['DEM_AGE','OP_NMU_YR', 'CAN_REC_USE']] temp1 = temp1.groupby('DEM_AGE').sum() temp['Drug Usage']= temp.sum(axis=1) temp = temp.merge(temp1, left_index=True, right_index=True) data = pd.DataFrame(temp[['Drug Usage', 'OP_NMU_YR', 'CAN_REC_USE']]) norm = (data - data.mean()) / data.std() norm.columns = ['Other Drug Usage', 'Opium Usage', 'Cannabis Usage'] fig, axes = joypy.joyplot(norm, title='Age on Illicit Drug Use', color=['#E34132','#645394','#17AB6F']) fig.show()

temp = newDF[['DEM_EDU', 'CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] # display(temp) temp = temp.groupby('DEM_EDU').sum() temp['Total'] = temp.sum(axis=1) temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] = temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']].div(temp['Total'], axis=0) temp[['CAN_REC_USE', 'OP_NMU_YR','COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']].plot(kind='bar', stacked=True, title='') # display(temp)

temp = dfRaw[['DEM_STATE','OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']] temp = temp.groupby('DEM_STATE').sum() temp['Total'] = temp.sum(axis=1) temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']] = temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].div(temp['Total'], axis=0) temp[['OP_NMU_YR', 'BENZ_NMU_YR', 'STIM_NMU_YR', 'GABA_NMU_YR','ILL_YR']].plot(kind='bar', stacked=True)

statistic, pval = stat.mannwhitneyu(stateDF['CRIME_RATE'], stateDF['Drug_Use']) print("pval: ",pval)

sns.set(rc={'figure.figsize': (15,10)}) x = stateDF.index.to_list() y = stateDF[['COKE_USE', 'CRACK_USE', 'MDMA_USE','GHB_USE', 'INH_USE', 'SPEED_USE', 'METHAM_USE', 'NPFENT_USE', 'HEROIN_USE','KET_USE', 'MEPH_USE', 'LSD_USE', 'MUSH_USE', 'PCP_USE', 'SPICE_USE','SAL_USE']] y['Crime'] = stateDF['CRIME_RATE'] y['Drug_Use'] = stateDF['Drug_Use'] colChange = y.columns[:-2] pivot = y[colChange].div(y.Drug_Use, axis=0).mul(y.Crime, axis=0).T y_axis_labels = ['Coke', 'Crack', 'MDMA', 'GHB', 'Inhalant', 'Speed', 'Methamphetamemes', 'Fentanyl', 'Heroin', 'Ketamine', 'Mephedrone', 'LSD', 'Mushrooms', 'PCP', 'Spice', 'Salvia'] ax = sns.heatmap(pivot, cmap='flare', yticklabels=y_axis_labels) plt.xlabel('States') plt.title('Crime Rate by State Scaled by Usage of Each Illicit Drug') plt.show()

# This is intentionally left blank