Day 11 - Predicting Mortgage Outcomes
Jame Zou | MA346-SN1F
import pandas as pd
import numpy as np
df = pd.read_csv("mortgage-testing-data.csv")
df.dtypes
# create new boolean column for conforming
df['conforming'] = np.where(df['conforming_loan_limit'].isin(['C']), 1,0)
# create new boolean column for non-conforming
df['not_conforming'] = np.where(df['conforming_loan_limit'].isin(['NC']), 1,0)
# create new boolean column for females
df['derived_sex_female'] = np.where(df['derived_sex'].isin(['Female']), 1,0)
# create new boolean column for males
df['derived_sex_male'] = np.where(df['derived_sex'].isin(['Male']), 1,0)
# create new boolean column for accepted applications
df['accepted'] = np.where(df['action_taken'].isin([1]), 1,0)
# view all unique values in debt_to_income column
df['debt_to_income_ratio'].unique()
# replace values with integer equivalents
df = df.replace({'debt_to_income_ratio': {'20%-<30%':25, '49':49, '30%-<36%':33, '47':47, '38':38, '41':41, '48':48, '39':39, '45':45,
'42':42, '<20%':10, '>60%':70, '44':44, '43':43, '37':37, '46':46, '36':36, '50%-60%':55, '40':40}})
df
df.to_csv('mortgage-numerical.csv', index=False, header=True)