Resident Voting Project

import pandas as pd

df = pd.read_csv('rawdata.csv')

len(df.index)

dup_cols = [col for col in df.columns if col not in ['Submission ID', 'Submitted at']]

df_clean = df.drop_duplicates(subset=dup_cols)

len(df_clean)

df_response = df_clean.groupby('PGY year?').agg( count = ('Submission ID', 'nunique') ) df_response

(df_clean['Did you vote in the 2022 Midterm Election?'] == 'I am not eligible to vote (i.e. non-US citizen)').sum()

df_eligible = df_clean[df_clean['Did you vote in the 2022 Midterm Election?'] != 'I am not eligible to vote (i.e. non-US citizen)']

len(df_eligible.index)

def did_vote(response): if response == 'Yes': return 1 else: return 0 df_eligible['did_vote'] = df_eligible.apply( lambda x: did_vote(x['Did you vote in the 2022 Midterm Election?']), axis=1 )

df_samestate = df_eligible.groupby('Are you registered to vote in the same state as your institution?').agg( count = ('Respondent ID', 'nunique') ) df_samestate

df_state = df_eligible.groupby('In which state are you registered?').agg( count = ('Respondent ID', 'nunique') ) df_state

df_eligible['did_vote'].sum()

turnout = df_eligible['did_vote'].sum()/len(df_eligible.index) turnout

df_turnout = df_eligible.groupby('PGY year?').agg( vote = ('did_vote', 'sum'), count = ('Respondent ID', 'nunique') ) df_turnout.reset_index(inplace=True) df_turnout['pct_voting'] = df_turnout['vote']/df_turnout['count'] df_turnout

df_not_vote = df_eligible[df_eligible['did_vote'] != 1] len(df_not_vote.index)

df_not_vote.columns = df_not_vote.columns.str.replace(r'What reasons contributed to your decision not to vote\?', '') df_not_vote.columns = df_not_vote.columns.str.replace(r'\n\(Select all that apply\)', '') df_not_vote.columns = df_not_vote.columns.str.replace(r'\(', '') df_not_vote.columns = df_not_vote.columns.str.replace(r'\)', '')

barrier_cols = df_not_vote.columns[13:22].to_list() barrier_cols

df_melt = df_not_vote.melt( id_vars = ['Respondent ID'], value_vars = barrier_cols, var_name='barrier' ) df_melt['value'] = df_melt['value'].replace({True: 1, False: 0}) df_melt.dropna(inplace=True)

df_barriers = df_melt.groupby('barrier').agg( count = ('value', 'sum'), total = ('Respondent ID', 'nunique') ) df_barriers.reset_index(inplace=True) df_barriers

df_barriers['count'] = df_barriers['count'].replace(6, 7) df_barriers = df_barriers[~df_barriers['barrier'].isin(['Other', 'Please describe...'])] df_barriers.sort_values(by='count', ascending=False, inplace=True) df_barriers['pct'] = df_barriers['count']/df_barriers['total'] df_barriers