import pandas as pd
df = pd.read_csv( 'practice-project-dataset-1.csv' )

df.head()

df.info()

df = df[['interest_rate','property_value','state_code','tract_minority_population_percent','derived_race','derived_sex','applicant_age']]
df.info()

import numpy as np
df['interest_rate'] = df['interest_rate'].replace( 'Exempt', np.nan )

df['interest_rate'] = df['interest_rate'].astype( float )

df['property_value'] = df['property_value'].replace( 'Exempt', np.nan )
df['property_value'] = df['property_value'].astype( float )
df.info()

df['applicant_age'].value_counts()

df['derived_race'] = df['derived_race'].astype( 'category' )
df['derived_sex'] = df['derived_sex'].astype( 'category' )
df['applicant_age'] = df['applicant_age'].astype( 'category' )
df.info()

# Create two new DataFrames derived from "df", filtered based upon minority
# population percentage
lower_prices = df[df['property_value'] < 500000]
high_minority = lower_prices[lower_prices['tract_minority_population_percent'] > 75]
low_minority = lower_prices[lower_prices['tract_minority_population_percent'] < 25]
# The dataframes high_minority and low_minority were created for graphing purposes
# as filtered DataFrames to compare property values in high- and low-minority areas

# Plot the derived DataFrames as a histogram for juxtaposition
import matplotlib.pyplot as plt
plt.hist( [ high_minority['property_value'], low_minority['property_value'] ],
bins=20, density=True )
plt.legend( [ 'High % minority', 'Low % minority' ] )
plt.title( 'Sample of 2018 Home Mortgage Applications' )
plt.xlabel( 'Property Value' )
plt.ylabel( 'Proportion' )
plt.show()
# A histogram was constructed using 20 points of reference to compare the
# property values of areas with high and low percentages of minority residents

# Construct a calculation to formulate the confidence interval of the data
high_minority['property_value'].mean(), low_minority['property_value'].mean()
# The high and low minority DataFrames were averaged in order to compare the
# statistical average property value to rudimentally evaluate price difference

# Test the null hypothesis utilizing a 5% confidence level
from scipy import stats
alpha = 0.05
statistic, pvalue = stats.ttest_ind( high_minority['property_value'],
low_minority['property_value'],
equal_var=False )
pvalue < alpha # reject H_0?
# The high_minority and low_minority DataFrames are used to conduct a T-Test to test
# H_0, that the mean property value is constant regardless of minority population