import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import scipy
df = pd.read_csv('policez.csv', index_col = 0)
fig, ax = plt.subplots(figsize = [12, 7])
x_axis = np.arange(-5, 5, 0.1)
plt.plot(x_axis, stats.norm.pdf(x_axis, 0, 1))
plt.hist(data = df, x = 'x', density = True, bins = np.arange(-10, 10, .5))
plt.show()
df['pvalues'] = 1 - scipy.stats.norm.cdf(df['x'])
df.head()
xfloat64
pvaluesfloat64
1
2.411365343
0.00794645969
2
0.1607881011
0.4361301486
3
-0.85217084
0.8029403607
4
0.1510155077
0.4399817414
5
1.836084478
0.0331725818
df = df.sort_values('pvalues').reset_index(drop = True)
df['rank'] = np.arange(1, len(df['pvalues']) + 1)
df['critval'] = df['rank']/len(df['pvalues']) * 0.2
#using that largest kth_pval
kth_pval = max(df[df['pvalues'] < df['critval']]['pvalues'])
df['decisions'] = [int(df['pvalues'][i] <= kth_pval) for i in np.arange(len(df['pvalues']))]
df.head()
xfloat64
pvaluesfloat64
0
6.952482685
1.794564497e-12
1
6.506209798
3.853528607e-11
2
5.703130111
5.881356291e-9
3
5.360467288
4.150347865e-8
4
4.934228717
4.023405108e-7
f"{len(df[df['decisions'] == 1])} discoveries"
sns.stripplot(
data=df, x='pvalues', y='decisions',
alpha = 0.8, order = [0, 1], orient = "h",
)
print(f"decision boundary at {min(df[df['decisions']==0]['critval'])}")
decision boundary at 0.014696253182975627
df1 = pd.read_csv('policez.csv')
fig, ax = plt.subplots(figsize = [12, 7])
x_axis = np.arange(-5, 5, 0.1)
plt.plot(x_axis, stats.norm.pdf(x_axis, .1, 1.4))
plt.hist(data = df1, x = 'x', density = True, bins = np.arange(-10, 10, .5))
plt.show()
df1['pvalues'] = 1 - scipy.stats.norm.cdf(df1['x'], 0.1, 1.4)
df1 = df1.sort_values('pvalues').reset_index(drop = True)
df1['rank'] = np.arange(1, len(df1['pvalues']) + 1)
df1['critval'] = df1['rank']/len(df1['pvalues']) * .2
#using that largest kth_pval
kth_pval = max(df1[df1['pvalues'] < df1['critval']]['pvalues'])
df1['decisions'] = [int(df1['pvalues'][i] <= kth_pval) for i in np.arange(len(df1['pvalues']))]
print(f"{len(df1[df1['decisions'] == 1])} discoveries")
sns.stripplot(
data=df1, x='pvalues', y='decisions',
alpha = 0.8, order = [0, 1], orient = "h",
)
print(f"decision boundary at {min(df1[df1['decisions']==0]['critval'])}")
5 discoveries
decision boundary at 0.00043652237177155327