import scipy as sc
import numpy as np
Run to view results
results = []
for i in range(10000):
k_0 = np.random.choice([0])
n_0 = 100
p_0 = k_0/n_0
k_1 = np.random.choice([20, 50, 80, 150, 200, 300, 350])
n_1 = 1000
p_1 = k_1/n_1
se_0_base = np.sqrt(p_0*(1-p_0)/(n_0))
se_1_base = np.sqrt(p_1*(1-p_1)/(n_1))
se_0 = np.sqrt(p_0*(1-p_0)/(n_0)+1.96**2/(4*(n_0)**2))/(1+1.96**2/n_0)
se_1 = np.sqrt(p_1*(1-p_1)/(n_1)+1.96**2/(4*(n_1)**2))/(1+1.96**2/n_1)
if True:
results.append(
[
k_0,
n_0,
p_0,
k_1,
n_1,
p_1,
sc.stats.binomtest(k_0, n_0, p_1, alternative='less').pvalue,
np.minimum(
sc.stats.norm.cdf(0, p_1-p_0, np.sqrt(se_0_base**2 + se_1_base**2)),
1 - sc.stats.norm.cdf(0, p_1-p_0, np.sqrt(se_0_base**2 + se_1_base**2))
)*2,
np.minimum(
sc.stats.norm.cdf(0, p_1-p_0, np.sqrt(se_0**2 + se_1**2)),
1 - sc.stats.norm.cdf(0, p_1-p_0, np.sqrt(se_0**2 + se_1**2))
)*2
]
)
Run to view results
import pandas as pd
df = pd.DataFrame(results, columns=['k_0', 'n_0', 'p_0', 'k_1', 'n_1', 'p_1', 'binom_test', 'p_value_base', 'p_value_inflated'])
Run to view results
df['binom_test_decision'] = df['binom_test'] < 0.05
df['p_value_inflated_decision'] = df['p_value_inflated'] < 0.05
df['p_value_base_decision'] = df['p_value_base'] < 0.05
Run to view results
print(
'abs diff in p-values:',
abs(df['p_value_base'] - df['binom_test']).mean(),
abs(df['p_value_inflated'] - df['binom_test']).mean()
)
Run to view results
print(
'accuracy:',
(df['p_value_base_decision'] == df['binom_test_decision']).mean(),
(df['p_value_inflated_decision'] == df['binom_test_decision']).mean()
)
Run to view results
df_inflated_postive = df[df['p_value_inflated_decision']]
df_base_postive = df[df['p_value_base_decision']]
print(
'type I error rate:',
(df_base_postive['binom_test_decision'] == False).mean(),
(df_inflated_postive['binom_test_decision'] == False).mean()
)
Run to view results
df_inflated_negative = df[df['p_value_inflated_decision'] == False]
df_base_negative = df[df['p_value_base_decision'] == False]
print(
'type II error rate:',
(df_base_negative['binom_test_decision'] == True).mean(),
(df_inflated_negative['binom_test_decision'] == True).mean()
)
Run to view results
df_inflated_negative
Run to view results
Run to view results