import pandas as pd
df = pd.read_csv( 'USW00094850.csv', low_memory=False )
df['Date'] = pd.to_datetime( df['Year-Month-Day'], format="%Y-%M-%d" )
df[['Date','DlySum']].head()
df_1990 = df[(df['Date'] >= '01-01-1990') & (df['Date'] < '01-01-1991')]
df_1990['DlySum'].sum()/100
df_2000s = df[(df['Date'] >= '01-01-2000') & (df['Date'] < '01-01-2010')]
( df_2000s['DlySum'] >= 100 ).sum() / len( df_2000s ) * 100
highest_index = df['DlySum'].argmax()
df.loc[highest_index,'Date']
df[df.loc[highest_index,'DlySum'] == df['DlySum']]
import numpy as np
df_mortgages = pd.read_csv( 'practice-project-dataset-1.csv' )
df_mortgages['property_value'] = df_mortgages['property_value'].replace( 'Exempt', np.nan ).astype( float )
df_mortgages.head()
df_election = pd.read_csv( 'npr-2016-election-data.csv' )
df_election.head()
df_mortgages.groupby( 'state_code' )['property_value'].agg( 'median' )
df_mortgages.groupby( 'derived_race' )['property_value'].agg( 'median' ).sort_values( ascending=False )
state_Trump_pct = dict( zip( df_election['State'], df_election['Trump'] ) )
df_mortgages['Trump2016%'] = df_mortgages['state_code'].map( state_Trump_pct )
import matplotlib.pyplot as plt
focus = df_mortgages[df_mortgages['property_value'] <= 500000]
plt.scatter( focus['property_value'], focus['Trump2016%'], alpha=0.025 )
plt.title( 'Mortgage Applications in 2020' )
plt.xlabel( 'Property Value' )
plt.ylabel( 'Percent of State Voting\nfor Trump in 2016')
plt.ylim( 25, 75 )
plt.show()
import scipy.stats as stats
α = 0.05
reps = df_mortgages[df_mortgages['Trump2016%'] >= 60]['property_value'].dropna()
dems = df_mortgages[df_mortgages['Trump2016%'] <= 40]['property_value'].dropna()
t_statistics, p_value = stats.ttest_ind( dems, reps, equal_var=False )
reject_H0 = p_value < α
α, p_value, reject_H0
reps
reps.mean(), dems.mean()