import pandas as pd
df = pd.read_csv('mortgage-testing-data.csv')
df.dtypes
df['nonconforming_loan_limit'] = df['conforming_loan_limit']
for var in df['conforming_loan_limit'].unique():
if var == 'C':
df['conforming_loan_limit'] = df['conforming_loan_limit'].replace(var, 1)
else:
df['conforming_loan_limit'] = df['conforming_loan_limit'].replace(var, 0)
for var in df['nonconforming_loan_limit'].unique():
if var == 'NC':
df['nonconforming_loan_limit'] = df['nonconforming_loan_limit'].replace(var, 1)
else:
df['nonconforming_loan_limit'] = df['nonconforming_loan_limit'].replace(var, 0)
df['male_sex'] = df['derived_sex']
df['female_sex'] = df['derived_sex']
for sex in df['male_sex'].unique():
if sex == 'Male':
df['male_sex'] = df['male_sex'].replace(sex, 1)
else:
df['male_sex'] = df['male_sex'].replace(sex, 0)
for sex in df['female_sex'].unique():
if sex == 'Female':
df['female_sex'] = df['female_sex'].replace(sex, 1)
else:
df['female_sex'] = df['female_sex'].replace(sex, 0)
df['action_taken'] = df['action_taken'].replace(1, 0)
df['action_taken'] = df['action_taken'].replace(3, 1)
df['debt_to_income_ratio'] = df['debt_to_income_ratio'].replace('<20%', 10)
df['debt_to_income_ratio'] = df['debt_to_income_ratio'].replace('20%-<30%', 25)
df['debt_to_income_ratio'] = df['debt_to_income_ratio'].replace('30%-<36%', 33)
df['debt_to_income_ratio'] = df['debt_to_income_ratio'].replace('50%-60%', 55)
df['debt_to_income_ratio'] = df['debt_to_income_ratio'].replace('>60%', 70)
df = df[['conforming_loan_limit','nonconforming_loan_limit','male_sex', 'female_sex', 'action_taken', 'loan_amount', 'property_value', 'income', 'debt_to_income_ratio']]
df.to_csv('mortgage-testing-data-cleaned.csv', header=True)