# Don't change this cell; just run it.
import numpy as np
import pandas as pd
# Safe settings for Pandas.
pd.set_option('mode.chained_assignment', 'raise')
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from client.api.notebook import Notebook
ok = Notebook('pandering.ok')
unemployment = pd.read_csv("unemployment.csv")
unemployment
_ = ok.grade('q1_1')
by_nei = unemployment.sort_values("NEI", ascending = False)
by_nei_pter = unemployment.sort_values("NEI-PTER", ascending= False)
_ = ok.grade('q1_2')
greatest_nei = by_nei.head(10)
greatest_nei
_ = ok.grade('q1_3')
pter = pd.DataFrame()
pter=unemployment["NEI-PTER"]- unemployment ["NEI"]
# Show the first five values.
pter.head()
_ = ok.grade('q1_4')
unemployment["PTER"]=pter
by_pter = unemployment.sort_values("PTER",ascending=False)
# Show the first five values.
by_pter.head()
_ = ok.grade('q1_5')
highpter= pd.DataFrame()
highpter["Date"]=by_pter["Date"]
highpter["PTER"] = by_pter["PTER"]
by_date=highpter.sort_values("Date", ascending=True)
by_date.plot (x = "Date", xticks= [0,15,30,45,60,75])
summation = 0
count= 0
summation_2 =0
count_2=0
for i in np.arange(56,68):
summation_2= summation_2 + by_date['PTER'].iloc[i]
count_2 = count_2 +1
mean_pter_2008_2010= summation_2/count_2
print ("The mean percentage of people who were PTER between 2008-2010 was", round(mean_pter_2008_2010, ndigits=2))
difference=mean_pter_2008_2010- mean_pter_2005_2007
difference_of_percent= (difference/mean_pter_2008_2010)*100
print ("Meaning that PTER increased by", round(difference_of_percent, ndigits==2), "percent in the period 2008-2010 compared to 2005-2007")
# Don't change this cell; just run it.
# From http://www2.census.gov/programs-surveys/popest/datasets/2010-2016/national/totals/nst-est2016-alldata.csv
# See https://www2.census.gov/programs-surveys/popest/datasets/2010-2016/national/totals/nst-est2016-alldata.pdf
# for column descriptions.
# Read the table
raw_pop = pd.read_csv('nst-est2016-alldata.csv')
# Select rows giving summaries at US state level (summary level).
pop = raw_pop[raw_pop['SUMLEV'] == 40]
# Select only the columns we're interested in
good_cols = ['REGION', 'NAME', 'POPESTIMATE2015', 'POPESTIMATE2016',
'BIRTHS2016', 'DEATHS2016', 'NETMIG2016', 'RESIDUAL2016']
pop = pop.loc[:, good_cols]
# Give the columns new names
good_names = ['REGION', 'NAME', '2015', '2016',
'BIRTHS', 'DEATHS', 'MIGRATION', 'OTHER']
pop.columns = good_names
# Show the first five rows.
pop.head()
total_us_population=sum(pop["2015"])
total_births= sum(pop["BIRTHS"])
us_birth_rate = total_births/total_us_population
us_birth_rate
_ = ok.grade('q2_1')
pop_growth= np.array([])
for i in np.arange(1,53):
diff= (pop["2016"].iloc[i-1]- pop["2015"].iloc[i-1]/pop["2016"].iloc[i-1]
pop_growth=np.append(pop_growth,diff)
pop["GROWTH RATE"]= pop_growth
growth=pop.sort_values("GROWTH RATE", ascending= False)
fastest_growth = pd.Series()
fastest_growth=growth["NAME"].head(5)
fastest_growth
_ = ok.grade('q2_2')
annual_rate_migration= np.array([])
for i in np.arange(1,53):
prop_migration= (pop["MIGRATION"].iloc[i-1]/pop["2015"].iloc[i-1])*100
annual_rate_migration=np.append(annual_rate_migration, prop_migration)
n_movers = np.array([])
n_movers=np.count_nonzero(annual_rate_migration > 1.0)
n_movers
_ = ok.grade('q2_3')
region_4= pop["REGION"]== "4"
west_births = pop["BIRTHS"][region_4]
n_west_births= sum(west_births)
n_west_births
_ = ok.grade('q2_4')
n_less_than_west_births=pop["2016"]< n_west_births
n_less_than_west_births = np.count_nonzero(n_less_than_west_births)
n_less_than_west_births
_ = ok.grade('q2_5')
#- Generate a chart here to support your conclusion
birth_rate=pop["BIRTHS"]/pop["2015"]
death_rate=pop["DEATHS"]/pop["2015"]
association=birth_rate/death_rate
pop["BIRTH RATE"]= birth_rate
pop["DEATH RATE"]= death_rate
pop["BIRTH/DEATH RATE"]= association
pop.plot(x= "BIRTH RATE", y= "DEATH RATE", kind= "scatter")
# Just run this cell.
complaints = pd.read_csv("complaints.csv")
complaints.head()
complaints_per_product = complaints["product"]
complaints_per_product= complaints_per_product.value_counts()
complaints_per_product
_ = ok.grade('q3_1')
complaints_per_product_p= complaints_per_product.sort_values(ascending= True)
complaints_per_product_p.plot(kind= "barh").set_xlabel("complaints")
complaints_per_company = complaints ["company"]
complaints_per_company= complaints_per_company.value_counts()
complaints_per_company
_ = ok.grade('q3_3')
complaints_per_company_plot= complaints_per_company.head(10).sort_values(ascending= True)
complaints_per_company_plot.plot(kind= "barh").set_xlabel("complaints")
proportion_complaints_per_company = (complaints_per_company/sum(complaints_per_company)).head(10).sort_values(ascending= True)
proportion_complaints_per_company.plot(kind= "barh").set_xlabel("prportion of total complaints")
# For your convenience, you can run this cell to run all the tests at once!
import os
_ = [ok.grade(q[:-3]) for q in os.listdir("tests") if q.startswith('q')]