#
import pandas as pd
Run to view results
# Look in the "data" folder.
df = pd.read_csv("../data/rejections.csv.gz")
df.head()
Run to view results
#
df.shape
Run to view results
#
df.dtypes
Run to view results
#
df.isnull().sum()
Run to view results
df.head()
Run to view results
#
print(df["Amount Requested"].min())
print(df["Amount Requested"].max())
Run to view results
#
df[df["Amount Requested"] == 0]
Run to view results
df[df["Amount Requested"] == df["Amount Requested"].max()]
Run to view results
df = df[df["Amount Requested"] > 0]
Run to view results
#
len(df[df["State"].isnull()])
Run to view results
# df = df.dropna(axis=0 or 1)
# df = df.dropna(how=["State"] any ...)
# df = df.dropna(thresh=["State"])
# df = df.dropna(inplace=["State"])
df = df.dropna(subset=["State"])
Run to view results
df.groupby("State")["Amount Requested"].mean()
Run to view results
df["Employment Length"].value_counts().sort_index()
#
Run to view results
#
df = df.dropna(subset=["Loan Title"])
df["loan_title_upper"] = df["Loan Title"].str.upper()
debt_count = len(df[df["loan_title_upper"].str.contains("DEBT")])
print(debt_count)
print(100*(debt_count/len(df)))
Run to view results
#
Run to view results
key_words = ["DEBT", "CONSOLIDATE", "CONSOLIDATING"]
for word in key_words:
df.loc[df["loan_title_upper"].str.contains(word), "matches_key_word"] = True
df["matches_key_word"].fillna(False, inplace=True)
total_count = len(df[df["matches_key_word"]])
print(total_count)
print(100*(total_count / len(df)))
Run to view results