import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import scipy
df = pd.read_csv('policez.csv', index_col = 0)
fig, ax = plt.subplots(figsize = [12, 7])
x_axis = np.arange(-5, 5, 0.1)
plt.plot(x_axis, stats.norm.pdf(x_axis, 0, 1))
plt.hist(data = df, x = 'x', density = True, bins = np.arange(-10, 10, .5))
plt.show()
df['pvalues'] = 1 - scipy.stats.norm.cdf(df['x'])
df.head()
df = df.sort_values('pvalues').reset_index(drop = True)
df['rank'] = np.arange(1, len(df['pvalues']) + 1)
df['critval'] = df['rank']/len(df['pvalues']) * 0.2
#using that largest kth_pval
kth_pval = max(df[df['pvalues'] < df['critval']]['pvalues'])
df['decisions'] = [int(df['pvalues'][i] <= kth_pval) for i in np.arange(len(df['pvalues']))]
df.head()
f"{len(df[df['decisions'] == 1])} discoveries"
sns.stripplot(
data=df, x='pvalues', y='decisions',
alpha = 0.8, order = [0, 1], orient = "h",
)
print(f"decision boundary at {min(df[df['decisions']==0]['critval'])}")
df1 = pd.read_csv('policez.csv')
fig, ax = plt.subplots(figsize = [12, 7])
x_axis = np.arange(-5, 5, 0.1)
plt.plot(x_axis, stats.norm.pdf(x_axis, .1, 1.4))
plt.hist(data = df1, x = 'x', density = True, bins = np.arange(-10, 10, .5))
plt.show()
df1['pvalues'] = 1 - scipy.stats.norm.cdf(df1['x'], 0.1, 1.4)
df1 = df1.sort_values('pvalues').reset_index(drop = True)
df1['rank'] = np.arange(1, len(df1['pvalues']) + 1)
df1['critval'] = df1['rank']/len(df1['pvalues']) * .2
#using that largest kth_pval
kth_pval = max(df1[df1['pvalues'] < df1['critval']]['pvalues'])
df1['decisions'] = [int(df1['pvalues'][i] <= kth_pval) for i in np.arange(len(df1['pvalues']))]
print(f"{len(df1[df1['decisions'] == 1])} discoveries")
sns.stripplot(
data=df1, x='pvalues', y='decisions',
alpha = 0.8, order = [0, 1], orient = "h",
)
print(f"decision boundary at {min(df1[df1['decisions']==0]['critval'])}")