freecodecamp.org | Demographic Data Analyzer

import pandas as pd

df = pd.read_csv('/work/adult.data.csv') df.head()

#pd.plotting.scatter_matrix(df, alpha=0.1, figsize=(10,10), diagonal="kde");

# race_count df.groupby('race')['race'].count()

# average_age_men round(df.groupby('sex')['age'].mean()[1], 1)

# percentage_bachelors round(df['education'].value_counts(normalize=True)['Bachelors'] * 100, 1)

# higher_education # `Bachelors`, `Masters`, or `Doctorate` round(df['education'].value_counts(normalize=True)[['Bachelors', 'Masters', 'Doctorate']].sum() * 100, 1)

# lower_education # not `Bachelors`, `Masters`, or `Doctorate` 1 - df['education'].value_counts(normalize=True)[['Bachelors', 'Masters', 'Doctorate']].sum()

# higher_education_rich df[df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]['salary'].value_counts(normalize=True)['>50K'] round(df[df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]['salary'].value_counts(normalize=True)['>50K'] * 100, 1)

# lower_education_rich round(df[~df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]['salary'].value_counts(normalize=True)['>50K'] * 100, 1)

# min_work_hours df['hours-per-week'].min()

# num_min_workers df[df['hours-per-week'] == 1].shape[0]

# rich_percentage round(df[df['hours-per-week'] == 1]['salary'].value_counts(normalize=True)['>50K'] * 100, 1)

# highest_earning_country df.groupby('native-country')['salary'].value_counts(normalize=True).loc[:, ('>50K')].idxmax()

# highest_earning_country_percentage round(df.groupby('native-country')['salary'].value_counts(normalize=True).loc[:, ('>50K')].max() * 100, 1)

# top_IN_occupation df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]['occupation'].value_counts().idxmax()