#
import pandas as pd
# Look in the "data" folder.
df = pd.read_csv("../data/rejections.csv.gz")
df.head()
#
df.shape
#
df.dtypes
#
df.isnull().sum()
df.head()
#
print(df["Amount Requested"].min())
print(df["Amount Requested"].max())
#
df[df["Amount Requested"] == 0]
df[df["Amount Requested"] == df["Amount Requested"].max()]
df = df[df["Amount Requested"] > 0]
#
len(df[df["State"].isnull()])
# df = df.dropna(axis=0 or 1)
# df = df.dropna(how=["State"] any ...)
# df = df.dropna(thresh=["State"])
# df = df.dropna(inplace=["State"])
df = df.dropna(subset=["State"])
df.groupby("State")["Amount Requested"].mean()
df["Employment Length"].value_counts().sort_index()
#
#
df = df.dropna(subset=["Loan Title"])
df["loan_title_upper"] = df["Loan Title"].str.upper()
debt_count = len(df[df["loan_title_upper"].str.contains("DEBT")])
print(debt_count)
print(100*(debt_count/len(df)))
#
key_words = ["DEBT", "CONSOLIDATE", "CONSOLIDATING"]
for word in key_words:
df.loc[df["loan_title_upper"].str.contains(word), "matches_key_word"] = True
df["matches_key_word"].fillna(False, inplace=True)
total_count = len(df[df["matches_key_word"]])
print(total_count)
print(100*(total_count / len(df)))