Week 3 Homework - Duplicate

import pandas as pd df = pd.read_csv( 'practice-project-dataset-1.csv' )

df.head()

df.info()

df = df[['interest_rate','property_value','state_code','tract_minority_population_percent', 'derived_race','derived_sex','applicant_age']] df.info()

import numpy as np df['interest_rate'] = df['interest_rate'].replace( 'Exempt', np.nan )

df['interest_rate'] = df['interest_rate'].astype( float )

df['property_value'] = df['property_value'].replace( 'Exempt', np.nan ) df['property_value'] = df['property_value'].astype( float ) df.info()

df['applicant_age'].value_counts()

df['derived_race'] = df['derived_race'].astype( 'category' ) df['derived_sex'] = df['derived_sex'].astype( 'category' ) df['applicant_age'] = df['applicant_age'].astype( 'category' ) df.info()

lower_prices = df[df['property_value'] < 500000] high_minority = lower_prices[lower_prices['tract_minority_population_percent'] > 75] low_minority = lower_prices[lower_prices['tract_minority_population_percent'] < 25]

import matplotlib.pyplot as plt plt.hist( [ high_minority['property_value'], low_minority['property_value'] ], bins=20, density=True ) plt.legend( [ 'High % minority', 'Low % minority' ] ) plt.title( 'Sample of 2018 Home Mortgage Applications' ) plt.xlabel( 'Property Value' ) plt.ylabel( 'Proportion' ) plt.show()

high_minority['property_value'].mean(), low_minority['property_value'].mean()

from scipy import stats alpha = 0.05 statistic, pvalue = stats.ttest_ind( high_minority['property_value'], low_minority['property_value'], equal_var=False ) pvalue < alpha # reject H_0?