import pandas as pd
Run to view results
# Look in the "data" folder.
df = pd.read_csv('/work/data/rejections.csv.gz')
df.head()
Run to view results
df.shape[1]
Run to view results
df.dtypes
Run to view results
df.isna().sum()
Run to view results
df.isnull().sum()
Run to view results
print(f" The highest loan requested was {df['Amount Requested'].max()} and the smallest loan requested was {df['Amount Requested'].min()}")
Run to view results
#returns any row that equals the max value
df[df['Amount Requested'] == df['Amount Requested'].max()]
Run to view results
#returns any row that equals the min value
df[df['Amount Requested'] == df['Amount Requested'].min()]
Run to view results
#dropping rows with 0 amount requested
df = df[df['Amount Requested'] > 0]
Run to view results
#no 0 values found
df.sort_values('Amount Requested',ascending = True).head()
Run to view results
#dropping nulls
df = df.dropna(subset = ['State'])
Run to view results
#avg loan by state
state_avg_dict = dict(df.groupby('State')['Amount Requested'].mean().round(2).sort_index())
Run to view results
state_avg_dict
Run to view results
def state_avg_amount(State):
print(f" {State}: {state_avg_dict[State]}")
state_avg_amount('PA')
Run to view results
df.head()
Run to view results
df['Employment Length'].value_counts().sort_index()
Run to view results
df = df.dropna(subset=["Loan Title"])
df["loan_title_upper"] = df["Loan Title"].str.upper()
debt_count = len(df[df["loan_title_upper"].str.contains("DEBT")])
print(debt_count)
print(100*(debt_count / len(df)))
Run to view results
keywords = ["DEBT", "CONSOLIDATE", "CONSOLIDATING"]
for word in keywords:
df.loc[df["loan_title_upper"].str.contains(word), "matches_keyword"] = True
df["matches_keyword"].fillna(False, inplace=True)
total_count = len(df[df["matches_keyword"]])
print(total_count)
print(100*(total_count / len(df)))
Run to view results
Run to view results