# Don't change this cell; just run it.
import numpy as np
import pandas as pd
# Safe settings for Pandas.
pd.set_option('mode.chained_assignment', 'raise')
import matplotlib.pyplot as plt
%matplotlib inline
# The OKpy testing system.
from client.api.notebook import Notebook
ok = Notebook('nobler_monkeys.ok')
nobler = pd.read_csv('nobler_monkeys.csv')
nobler
rna_copies_k = np.array(nobler['RNA copies']) / 1000
# Plot the two distributions side by side.
fig, axes = plt.subplots(1, 2)
axes[0].hist(rna_copies_k[:8]);
axes[0].set_title('Modvax group')
axes[1].hist(rna_copies_k[8:]);
axes[1].set_title('Placebo group');
modvax_mean = np.mean(rna_copies_k[:8])
placebo_mean = np.mean(rna_copies_k[8:])
print('Modvax mean', modvax_mean)
print('Placebo mean', placebo_mean)
plac_modvax_diff = placebo_mean - modvax_mean
print('Placebo - modvax mean difference', plac_modvax_diff)
#- You may want to simulate a single trial here.
modvax= rna_copies_k[:8]
placebo= rna_copies_k[8:]
pooled= np.append(modvax, placebo)
shuffled= np.random.permutation(pooled)
fake_modvax= shuffled[: 8]
fake_placebo= shuffled[8 :]
fake_difference= np.mean(fake_modvax)- np.mean(fake_placebo)
fake_difference
n=10000
counts= np.zeros(n)
for i in np.arange(n):
trials= np.random.randint(0, 100, size=17)
cured= trials<=90
count= np.count_nonzero(cured)
counts[i]= count
# Build up the sampling distribution from the ideal (null) world.
n_iters = 10000
fake_diffs = np.zeros (n_iters)
for i in np.arange(n_iters):
modvax= rna_copies_k[:8]
placebo= rna_copies_k[8:]
pooled = np.append(modvax, placebo)
shuffled= np.random.permutation(pooled)
fake_modvax= shuffled [: 8]
fake_placebo= shuffled [8 :]
fake_difference= np.mean(fake_modvax)- np.mean(fake_placebo)
fake_diffs[i]= fake_difference
# Show the first 10 values.
fake_diffs[:10]
_ = ok.grade('q_fake_diffs')
#- Consider a histogram of the sampling distribution here.
plt.hist(fake_diffs)
prop_ge = sum(fake_diffs>=plac_modvax_diff) /32
# Show the proportion.
prop_ge
_ = ok.grade('q_prop_ge')
# For your convenience, you can run this cell to run all the tests at once!
import os
_ = [ok.grade(q[:-3]) for q in os.listdir("tests") if q.startswith('q')]