Lesson 13 - Aggregating Data

import pandas as pd

Run to view results

# Look in the "data" folder. df = pd.read_csv('/work/data/rejections.csv.gz') df.head()

Run to view results

df.shape[1]

Run to view results

df.dtypes

Run to view results

df.isna().sum()

Run to view results

df.isnull().sum()

Run to view results

print(f" The highest loan requested was {df['Amount Requested'].max()} and the smallest loan requested was {df['Amount Requested'].min()}")

Run to view results

#returns any row that equals the max value df[df['Amount Requested'] == df['Amount Requested'].max()]

Run to view results

#returns any row that equals the min value df[df['Amount Requested'] == df['Amount Requested'].min()]

Run to view results

#dropping rows with 0 amount requested df = df[df['Amount Requested'] > 0]

Run to view results

#no 0 values found df.sort_values('Amount Requested',ascending = True).head()

Run to view results

#dropping nulls df = df.dropna(subset = ['State'])

Run to view results

#avg loan by state state_avg_dict = dict(df.groupby('State')['Amount Requested'].mean().round(2).sort_index())

Run to view results

state_avg_dict

Run to view results

def state_avg_amount(State): print(f" {State}: {state_avg_dict[State]}") state_avg_amount('PA')

Run to view results

df.head()

Run to view results

df['Employment Length'].value_counts().sort_index()

Run to view results

df = df.dropna(subset=["Loan Title"]) df["loan_title_upper"] = df["Loan Title"].str.upper() debt_count = len(df[df["loan_title_upper"].str.contains("DEBT")]) print(debt_count) print(100*(debt_count / len(df)))

Run to view results

keywords = ["DEBT", "CONSOLIDATE", "CONSOLIDATING"] for word in keywords: df.loc[df["loan_title_upper"].str.contains(word), "matches_keyword"] = True df["matches_keyword"].fillna(False, inplace=True) total_count = len(df[df["matches_keyword"]]) print(total_count) print(100*(total_count / len(df)))

Run to view results