import pandas as pd
import numpy as np
import re
mortgage = pd.read_csv('mortgage-testing-data.csv')
mortgage.dtypes
# d = {0:'NC', 1:'C'}
# d_reverse = {0:'C', 1:'NC'}
# mortgage['NC_CFL'] = mortgage['conforming_loan_limit'].apply(d)
# mortgage['C_CFL'] = mortgage['conforming_loan_limit'].apply(d_reverse)
for i in mortgage['conforming_loan_limit'].index:
if mortgage['conforming_loan_limit'][i] == 'NC':
mortgage['NC_CFL'][i] = 1
mortgage['C_CFL'][i] = 0
elif mortgage['conforming_loan_limit'][i] == 'C':
mortgage['NC_CFL'][i] = 0
mortgage['C_CFL'][i] = 1
else:
mortgage['NC_CFL'][i] = np.nan
mortgage['C_CFL'][i] = np.nan
for i in mortgage['derived_sex'].index:
if mortgage['derived_sex'][i] == 'Female':
mortgage['is_female'][i] = 1
mortgage['is_male'][i] = 0
elif mortgage['derived_sex'][i] == 'Male':
mortgage['is_female'][i] = 0
mortgage['is_male'][i] = 1
for i in mortgage['action_taken'].index:
if mortgage['action_taken'][i] == 1:
mortgage['application_accepted'][i] = 1
mortgage['application_denied'][i] = 0
elif mortgage['action_taken'][i] = 0
mortgage['application_accepted'][i] = 0
mortgage['application_denied'][i] = 1
for i in mortgage['debt_to_income_ratio'].index:
if mortgage['debt_to_income_ratio'][i].isdigit()
pass
elif mortgage['debt_to_income_ratio'][i][2:4] == '<-':
upper = mortgage['debt_to_income_ratio'][4:6]
lower = mortgage['debt_to_income_ratio'][0:2]
mortgage['debt_to_income_ratio'][i] = lower + ((upper-lower)*0.5)
elif mortgage['debt_to_income_ratio'][i][0] == '>':
mortgage['debt_to_income_ratio'] = mortgage['debt_to_income_ratio'][i][1:] + 1
elif mortgage['debt_to_income_ratio'][i][0] == '<':
mortgage['debt_to_income_ratio'] = mortgage['debt_to_income_ratio'][i][1:] - 10
mortgage.drop(column=['conforming_loan_limit', 'action_taken', 'derived_sex']