#checking distribution
sns.set(rc={'figure.figsize': (30, 30)})
for i in range(len(numerical)):
plt.subplot(4, 4, i + 1)
sns.distplot(df[numerical[i]], fit=norm)
res = stats.probplot(df["customer_age"], plot=plt)
male1 = df[df.gender == "M"].sample(n=1941, random_state=71, replace=False)
male2 = df[df.gender == "M"].sample(n=1941, random_state=72, replace=False)
male3 = df[df.gender == "M"].sample(n=1941, random_state=73, replace=False)
male4 = df[df.gender == "M"].sample(n=1941, random_state=77, replace=False)
female1 = df[df.gender == "F"].sample(n=1941, random_state=74, replace=False)
female2 = df[df.gender == "F"].sample(n=1941, random_state=75, replace=False)
female3 = df[df.gender == "F"].sample(n=1941, random_state=76, replace=False)
female4 = df[df.gender == "F"].sample(n=1941, random_state=78, replace=False)
#checking
male2.head()
#checking
female1.head()
#Shapiro-Wilk test
stat, p = shapiro(male1["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#D’Agostino’s test
stat, p = normaltest(male1["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#Anderson-Darling Test
result = anderson(male1["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))
#Shapiro-Wilk test
stat, p = shapiro(male3["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#D’Agostino’s test
stat, p = normaltest(male3["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#Anderson-Darling Test
result = anderson(male3["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))
#Shapiro-Wilk test
stat, p = shapiro(female2["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#D’Agostino’s test
stat, p = normaltest(female2["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")
#Anderson-Darling Test
result = anderson(female2["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))
stats.bartlett(male1['customer_age'], female1['customer_age'])
stats.bartlett(male2['customer_age'], female2['customer_age'])
stats.bartlett(male3['customer_age'], female3['customer_age'])
stats.bartlett(male4['customer_age'], female4['customer_age'])
#1st test
ztest(male1["customer_age"], female1["customer_age"], value=0)
#2nd test
ztest(male2["customer_age"], female2["customer_age"], value=0)
#3rd test
ztest(male3["customer_age"], female3["customer_age"], value=0)
#4th test
ztest(male4["customer_age"], female4["customer_age"], value=0)