import pandas as pd
df = pd.read_csv("mortgage-testing-data.csv")
df.head()
# fix conforming loan limit column
df1 = df.conforming_loan_limit.str.get_dummies()
df = pd.concat([df, df1], axis=1)
df.head()
df = df.drop(["conforming_loan_limit"], axis=1)
df.head()
df2 = df.derived_sex.str.get_dummies()
df2.head()
df = pd.concat([df, df2], axis=1)
df.head()
df = df.drop(["derived_sex"], axis=1)
df.head()
df["action_taken"] = df["action_taken"].replace(1, 0)
df["action_taken"] = df["action_taken"].replace(3, 1)
df.head()
df["debt_to_income_ratio"].value_counts()
df["debt_to_income_ratio"] = df["debt_to_income_ratio"].replace("30%-<36%", 33)
df["debt_to_income_ratio"] = df["debt_to_income_ratio"].replace("20%-<30%", 25)
df["debt_to_income_ratio"] = df["debt_to_income_ratio"].replace(">60%", 70)
df["debt_to_income_ratio"] = df["debt_to_income_ratio"].replace("<20%", 10)
df["debt_to_income_ratio"] = df["debt_to_income_ratio"].replace("50%-60%", 55)
df["debt_to_income_ratio"] = pd.to_numeric(df["debt_to_income_ratio"])
df.head()
df.to_csv("testing_data.csv")