Mortgage Data Prep
By Krtin
# importing and viewing the dataset
import pandas as pd
df = pd.read_csv('mortgage-testing-data.csv')
df.dtypes
# creating functions to simplify filtering
def conforming_changer(name):
if name == 'C':
name = 1
else:
name = 0
return name
def non_conforming_changer(name):
if name == 'NC':
name = 1
else:
name = 0
return name
def male_changer(name):
if name == 'Male':
name = 1
else:
name = 0
return name
def female_changer(name):
if name == 'Female':
name = 1
else:
name = 0
return name
def action_changer(name):
if name == 1:
name = 1
else:
name = 0
return name
def d_to_i_changer(name):
if name == '20%-<30%':
name = 25
elif name == '50%-60%':
name = 55
elif name == '30%-<36%':
name = 33
elif name == '<20%':
name = 10
elif name == '>60%':
name = 70
else:
name = int(name)
return name
# changing the columns according to the requirements
df['conforming']= df['conforming_loan_limit'].apply(conforming_changer)
df['non conforming']= df['conforming_loan_limit'].apply(non_conforming_changer)
df['male']= df['derived_sex'].apply(male_changer)
df['female']= df['derived_sex'].apply(female_changer)
df['application accepted']= df['action_taken'].apply(action_changer)
df['central debt to income ratio']= df['debt_to_income_ratio'].apply(d_to_i_changer)
df = df.drop(['conforming_loan_limit','derived_sex','action_taken','debt_to_income_ratio'], axis=1)
df.head()
df.dtypes