# Don't change this cell; just run it.
import numpy as np
import pandas as pd
# Safe settings for Pandas.
pd.set_option('mode.chained_assignment', 'raise')
import matplotlib.pyplot as plt
%matplotlib inline
# The OKpy testing system.
from client.api.notebook import Notebook
ok = Notebook('oxford_astrazeneca.ok')
# Run this cell.
ox_vax = pd.read_csv('ox_astra_cov002.csv')
ox_vax
prop_covid = calc_prop_cases(ox_vax)
prop_covid
_ = ok.grade('q_prop_covid')
def calc_prop_cases(df):
return sum(ox_vax['Cases'])/sum(ox_vax['N'])
# Check this function returns the same value as you calculated above, when
# called on the whole table.
calc_prop_cases(ox_vax)
_ = ok.grade('q_calc_prop_cases')
vax_eff = 1- relative_risk(ox_vax)
vax_eff
_ = ok.grade('q_vax_eff')
def calc_efficiency(df):
ox_vax=ox_vax.groupby('GROUP').sum()
return ox_vax['Cases'][1]/ox_vax['Cases'][0]
return
# Run this cell, check you get the same answer as previously.
_ = ok.grade('q_calc_efficiency')
ld_vax_eff = ...
sd_vax_eff = ...
vax_eff_diff = ...
print('LD efficiency', ld_vax_eff)
print('SD efficiency', sd_vax_eff)
print('Efficiency difference', vax_eff_diff)
_ = ok.grade('q_vax_eff_diff')
def calc_ld_sd_ediff(df):
...
return ...
# Run this cell, check you get the same answer as previously.
calc_ld_sd_ediff(ox_vax)
_ = ok.grade('q_calc_ld_sd_ediff')
# What the first five rows will look like.
person_start = pd.DataFrame()
person_start['Dose'] = np.repeat(['LD'], [5])
person_start['Group'] = np.repeat(['Control'], [5])
person_start['Case'] = np.repeat(['False'], [5])
person_start
person_df = pd.DataFrame()
n_per_cell = np.array(ox_vax['N'])
person_df['Dose'] = np.repeat(['LD', 'LD', 'SD', 'SD'], n_per_cell)
# Show the first five rows
person_df.head()
person_df...
# Show the first five rows so far.
person_df.head()
_ = ok.grade('q_person_df_group')
# Run this cell.
# "Case" will be False for non-case, True for case.
# Start with all False
to_repeat = np.repeat([False], len(n_per_cell) * 2)
# Set every other value, from the second, to True
to_repeat[1:len(to_repeat):2] = True
# Note that we can do the same thing with this short-cut.
to_repeat[1::2] = True
# Show the result
to_repeat
# Start with an integer array of zeros, of the right length.
repeat_nos = np.repeat([0], len(to_repeat))
...
person_df['Case'] = ...
# Show the first five rows.
person_df.head()
_ = ok.grade('q_person_df_with_case')
# Make a thing that knows how to "group" the rows "by" the pairs of labels in
# "Dose" and "Group".
grouped = person_df.groupby(['Dose', 'Group'])
# Aggregate within these groups, by
# * counting the number of rows
# (put this value into the column "N")
# * counting the number of True values in the "Case" column
# (put this value into the column "Cases")
aggregated = grouped.agg(N=('Case', len),
Cases=('Case', np.count_nonzero))
# Drop the fancy index (row labels) to make an ordinary data frame.
tabulated = aggregated.reset_index()
tabulated
def cases_to_counts(full_df):
""" Calculate "N" and "Cases" for "Dose" and "Group" cells of "full_df"
"""
return full_df.groupby(['Dose', 'Group']).agg(
N=('Case', len),
Cases=('Case', np.count_nonzero)).reset_index()
# This should return values identical to the original "ox_vax" data frame.
cases_to_counts(person_df)
# This should should evaluate to (and show) True.
cases_to_counts(person_df).equals(ox_vax)
fake_df = person_df.copy()
fake_df['Dose'] = np.random.permutation(person_df['Dose'])
fake_df.head()
n_iters = 1000
fake_ediffs = ...
# Show the first 10 efficiency differences.
fake_ediffs[:10]
_ = ok.grade('q_fake_ediffs')
#- Histogram of eff_diffs
prop_ediff_ge = ...
# Show the result
prop_ediff_ge
_ = ok.grade('q_prop_ediff_ge')
# For your convenience, you can run this cell to run all the tests at once!
import os
_ = [ok.grade(q[:-3]) for q in os.listdir("tests") if q.startswith('q')]