# Don't change this cell; just run it.
import numpy as np
import pandas as pd
# Safe settings for Pandas.
pd.set_option('mode.chained_assignment', 'raise')
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from client.api.notebook import Notebook
ok = Notebook('religion.ok')
religion = pd.read_csv('oliner_tab6_6.csv')
religion
_ = ok.grade('q_01_religion')
top_religion = religion.head(4)
top_religion= top_religion.set_index('level')
top_religion
_ = ok.grade('q_02_top_religion')
rescuers_actives = pd.DataFrame()
rescuers_actives= top_religion.loc[:, ['rescuers', 'actives']]
rescuers_actives
_ = ok.grade('q_03_rescuers_actives')
# Run this cell
# Recreate the individual labels for group and religiousness.
group = np.repeat(['rescuers', 'actives'], rescuers_actives.sum())
religiousness = np.repeat(['Very', 'Somewhat', 'Not very', 'Not at all'],
rescuers_actives.transpose().sum())
# Shuffle the religiousness labels to give a random association.
np.random.shuffle(religiousness)
# Make, show fake table.
fake_table = pd.crosstab(religiousness, group)
fake_table
# Run this cell
actual_by_very = rescuers_actives.loc['Very', 'actives']
actual_by_very
# Run this cell
fake_by_very = fake_table.loc['Very', 'actives']
fake_by_very
very_actives = np.zeros(1000)
for i in np.arange(1000):
np.random.shuffle(religiousness)
fake_by_very= pd.crosstab(religiousness, group)
very_actives[i] = fake_table.loc['Very', 'actives']
# Show the first five values
very_actives[:5]
_ = ok.grade('q_04_very_actives')
#- Plot a histogram of the very_actives values, calculate the proportion
plt.hist(very_actives)
# Run this cell
rescuer_counts = rescuers_actives.loc[:, 'rescuers']
rescuer_scores = np.repeat([3, 2, 1, 0], rescuer_counts)
rescuer_scores
np.mean(rescuer_scores)
active_counts= rescuers_actives.loc[:, 'actives']
active_scores = np.repeat([3,2,1,0], active_counts)
active_scores
_ = ok.grade('q_05_active_scores')
# Run this cell
observed = np.mean(rescuer_scores) - np.mean(active_scores)
observed
pooled= np.append(rescuer_scores, active_scores)
fake_mean_diffs = np.zeros(10000)
for i in np.arange(10000):
fake_rescuers_scores= pooled[:210]
fake_actives_scores= pooled[:210]
fake_mean= np.mean(fake_rescuers_scores)- np.mean(fake_actives_scores)
fake_mean_diffs[i]= fake_mean
# Show the first five values
fake_mean_diffs[:5]
_ = ok.grade('q_06_fake_mean_diffs')
#- Use this cell to plot histogram and calculate proportion.
plt.hist(fake_mean_diffs)
very_fake_prop= np.count_nonzero(fake_mean_diffs >= observed)/len(fake_mean_diffs)
very_fake_prop
# For your convenience, you can run this cell to run all the tests at once!
import os
_ = [ok.grade(q[:-3]) for q in os.listdir("tests") if q.startswith('q')]