Hypothesis Test & Power Analysis

import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import scipy.stats as stats

data = pd.read_csv('data.csv')

data.head()

target_agg = data['new feature'].value_counts().rename(index={False:"WITHOUT \n New Feature", True:'WITH \n New Feature'}) target_bar = plt.bar(x=target_agg.index, height=target_agg.values) plt.bar_label(target_bar, padding=1) plt.yticks(ticks=np.arange(0,141,20)) plt.title('The Number of Participants') plt.show()

with_hours = data[data['new feature'] == True]['hours'] without_hours = data[data['new feature'] == False]['hours']

print("The Average Hours Spent on Gaming (With New Feature): ", np.mean(with_hours)) print("The Average Hours Spent on Gaming (Without New Feature): ", np.mean(without_hours)) print("The Difference in the Average Hours Spent on Gaming: ", np.mean(with_hours) - np.mean(without_hours))

# function for calculating the t-test for two independent samples def independent_ttest(data1, data2, alpha=0.05, alternative = 'two-sided'): """ Calculate the t-test on TWO INDEPENDENT samples of scores, data1 and data2. This is a test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations do not have identical variances. Parameters ---------- data1, data2 : array_like The arrays must have the same shape. alpha : int or float, default: 0.05 The significance level alternative : {'two-sided', 'less', 'greater'}, optional Defines the alternative hypothesis. The following options are available (default is 'two-sided'): * 'two-sided': the means of the distributions underlying the samples are unequal. * 'less': the mean of the distribution underlying the first sample is less than the mean of the distribution underlying the second sample. * 'greater': the mean of the distribution underlying the first sample is greater than the mean of the distribution underlying the second sample. Returns ------- t_stat : The t-statistic. p : The p-value associated with the given alternative. df : The number of degrees of freedom used in calculation of the t-statistic; this is one less than the size of the sample cv: critical value """ # calculate means mean1, mean2 = np.average(data1), np.average(data2) std1, std2 = np.std(data1, ddof=1), np.std(data2, ddof=1) n1 = len(data1) n2 = len(data2) # calculate standard errors se1 = std1/np.sqrt(n1) se2 = std2/np.sqrt(n2) sqrd_se1 = std1**2/n1 sqrd_se2 = std2**2/n2 # standard error on the difference between the samples sed = np.sqrt(sqrd_se1 + sqrd_se2) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df_n = (sqrd_se1 + sqrd_se2)**2 df_d = (sqrd_se1**2)/(n1-1) + (sqrd_se2**2)/(n2-1) df = df_n/df_d # calculate the critical value cv = ((stats.t.ppf(alpha, df)),(stats.t.ppf(1.0 - alpha, df))) # calculate the p-value if alternative == 'two-sided': p = min([(stats.t.cdf(abs(t_stat), df)),(1.0 - stats.t.cdf(abs(t_stat), df))]) * 2.0 elif alternative == 'greater': p = 1.0 - stats.t.cdf(abs(t_stat), df) elif alternative == 'less': p = stats.t.cdf(abs(t_stat), df) # return everything return t_stat, df, cv, p

t_stat, df, cv, p_value = independent_ttest(with_hours, without_hours, alpha=0.01, alternative='greater')

alpha = 0.01 print("The result of the two sample t-test") print("t statistic: %.8f" % t_stat) print("p value: %.8f" % p_value) if p_value <alpha: print("Reject null hypothesis") else: print("Fail to reject null hypothesis")

ttest, p_value = stats.ttest_ind(with_hours, without_hours, equal_var=False, alternative='greater')

print("The result of the two sample t-test for each feature") print("t statistic:%.8f" % ttest) print("p value:%.8f" % p_value) if p_value <0.01: print("Reject null hypothesis") else: print("Fail to reject null hypothesis")

hours_after = data[data['new feature'] == True]['hours'] hours_before = data[data['new feature'] == True]['monthly hours (before experiment)'] diff = hours_after-hours_before

print("The average difference in monthly hours of gaming (before the experiemt vs during the experiment):", np.mean(diff))

def paired_ttest(data1, data2, alpha=0.05, alternative='two-sided'): """ Calculate the t-test on TWO RELATED samples of scores, data1 and data2. This is a test for the null hypothesis that two related or repeated samples have identical average (expected) values. Parameters ---------- data1, data2 : array_like The arrays must have the same shape. alpha : int or float, default: 0.05 The significance level alternative : {'two-sided', 'less', 'greater'}, optional Defines the alternative hypothesis. The following options are available (default is 'two-sided'): * 'two-sided': the means of the distributions underlying the samples are unequal. * 'less': the mean of the distribution underlying the first sample is less than the mean of the distribution underlying the second sample. * 'greater': the mean of the distribution underlying the first sample is greater than the mean of the distribution underlying the second sample. Returns ------- t_stat : The t-statistic. p : The p-value associated with the given alternative. df : The number of degrees of freedom used in calculation of the t-statistic; this is one less than the size of the sample cv: critical value """ if len(data1) != len(data2): raise Exception("The size of data1 and data2 should be the same") data1 = np.array(data1) data2 = np.array(data2) # calculate means and standard devision of data1 and data 2 mean1, mean2 = np.average(data1), np.average(data2) std1, std2 = np.std(data1, ddof=1), np.std(data2, ddof=1) # sample size n = len(data1) # degree of freedom df = n-1 diff = data1 - data2 mean_diff = np.average(diff) # standard deviaion of the difference s_diff = np.sqrt((1/(n-1))*(sum((diff-mean_diff)**2))) # t-test statistic t_stat = mean_diff/(s_diff/np.sqrt(n)) # calculate the p-value if alternative == 'two-sided': p = min([(stats.t.cdf(abs(t_stat), df)),(1.0 - stats.t.cdf(abs(t_stat), df))]) * 2.0 elif alternative == 'greater': p = 1.0 - stats.t.cdf(abs(t_stat), df) elif alternative == 'less': p = stats.t.cdf(abs(t_stat), df) # calculate the critical value cv = ((stats.t.ppf(alpha, df)),(stats.t.ppf(1.0 - alpha, df))) # return everything return t_stat, df, cv, p

t_stat, df, cv, p_value = paired_ttest(hours_after, hours_before, alpha=0.01, alternative='greater')

alpha = 0.01 print("The result of the paired t-test") print("t statistic: %.8f" % t_stat) print("p value: %.8f" % p_value) if p_value <alpha: print("Reject null hypothesis") else: print("Fail to reject null hypothesis")

ttest, p_value = stats.ttest_rel(hours_after, hours_before, alternative='greater') print("The result of the paired t-test") print("The test statistic: ", ttest) print("The p value:%.8f" % p_value) if p_value <0.05: print("=> Reject the null hypothesis") else: print("=> Fail to reject the null hypothesis")

from statsmodels.stats.power import TTestIndPower

# size of samples (e.g. in a previous study) n1, n2 = len(with_hours), len(without_hours) # variance of samples (e.g. in a previous study) v1, v2 = np.var(with_hours), np.var(without_hours) # pooled standard deviation pooled_s = np.sqrt(((n1 - 1) * v1 + (n2 - 1) * v2) / (n1 + n2 - 2)) # calculate the effect size (Cohen's d) d = 2.5 / pooled_s print(f'Effect size: {d}') # factors for power analysis alpha = 0.01 power = 0.8 # power analysis to find sample size for given effect ttest_ind = TTestIndPower() n = ttest_ind.solve_power(effect_size=d, alpha=alpha, power=power, ratio=1, alternative='larger') print('Sample size needed in each group: {:.0f}'.format(np.round(n)))

from statsmodels.stats.power import TTestPower

# size of samples (e.g. in a previous study) n1, n2 = len(hours_after), len(hours_before) # variance of samples (e.g. in a previous study) v1, v2 = np.var(hours_after), np.var(hours_before) # pooled standard deviation pooled_s = np.sqrt(((n1 - 1) * v1 + (n2 - 1) * v2) / (n1 + n2 - 2)) # calculate the effect size (Cohen's d) d = 2.5 / pooled_s print(f'Effect size: {d}') # factors for power analysis alpha = 0.01 power = 0.8 # power analysis to find sample size for given effect ttest_paired = TTestPower() n = ttest_paired.solve_power(effect_size=d, alpha=alpha, power=power, alternative='larger') print('Sample size needed : {:.0f}'.format(np.round(n)))