#checking distribution
sns.set(rc={'figure.figsize': (30, 30)})
for i in range(len(numerical)):
plt.subplot(4, 4, i + 1)
sns.distplot(df[numerical[i]], fit=norm)

res = stats.probplot(df["customer_age"], plot=plt)

male1 = df[df.gender == "M"].sample(n=1941, random_state=71, replace=False)

male2 = df[df.gender == "M"].sample(n=1941, random_state=72, replace=False)

male3 = df[df.gender == "M"].sample(n=1941, random_state=73, replace=False)

male4 = df[df.gender == "M"].sample(n=1941, random_state=77, replace=False)

female1 = df[df.gender == "F"].sample(n=1941, random_state=74, replace=False)

female2 = df[df.gender == "F"].sample(n=1941, random_state=75, replace=False)

female3 = df[df.gender == "F"].sample(n=1941, random_state=76, replace=False)

female4 = df[df.gender == "F"].sample(n=1941, random_state=78, replace=False)

#checking
male2.head()

#checking
female1.head()

#Shapiro-Wilk test
stat, p = shapiro(male1["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=0.995, p=0.000
Sample does not seem Gaussian - reject H0
```

#D’Agostino’s test
stat, p = normaltest(male1["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=11.625, p=0.003
Sample does not seem Gaussian - reject H0
```

#Anderson-Darling Test
result = anderson(male1["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))

```
Statistic: 1.864
15.000: 0.575, data does not look normal (reject H0)
10.000: 0.655, data does not look normal (reject H0)
5.000: 0.785, data does not look normal (reject H0)
2.500: 0.916, data does not look normal (reject H0)
1.000: 1.090, data does not look normal (reject H0)
```

#Shapiro-Wilk test
stat, p = shapiro(male3["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=0.996, p=0.000
Sample does not seem Gaussian - reject H0
```

#D’Agostino’s test
stat, p = normaltest(male3["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=9.441, p=0.009
Sample does not seem Gaussian - reject H0
```

#Anderson-Darling Test
result = anderson(male3["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))

```
Statistic: 1.662
15.000: 0.575, data does not look normal (reject H0)
10.000: 0.655, data does not look normal (reject H0)
5.000: 0.785, data does not look normal (reject H0)
2.500: 0.916, data does not look normal (reject H0)
1.000: 1.090, data does not look normal (reject H0)
```

#Shapiro-Wilk test
stat, p = shapiro(female2["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=0.996, p=0.000
Sample does not seem Gaussian - reject H0
```

#D’Agostino’s test
stat, p = normaltest(female2["customer_age"])
print('Statistics=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > 0.05:
print("Sample seems Gaussian - fail to reject H0")
else:
print("Sample does not seem Gaussian - reject H0")

```
Statistics=8.873, p=0.012
Sample does not seem Gaussian - reject H0
```

#Anderson-Darling Test
result = anderson(female2["customer_age"])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
else:
print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))

```
Statistic: 1.836
15.000: 0.575, data does not look normal (reject H0)
10.000: 0.655, data does not look normal (reject H0)
5.000: 0.785, data does not look normal (reject H0)
2.500: 0.916, data does not look normal (reject H0)
1.000: 1.090, data does not look normal (reject H0)
```

stats.bartlett(male1['customer_age'], female1['customer_age'])

stats.bartlett(male2['customer_age'], female2['customer_age'])

stats.bartlett(male3['customer_age'], female3['customer_age'])

stats.bartlett(male4['customer_age'], female4['customer_age'])

#1st test
ztest(male1["customer_age"], female1["customer_age"], value=0)

#2nd test
ztest(male2["customer_age"], female2["customer_age"], value=0)

#3rd test
ztest(male3["customer_age"], female3["customer_age"], value=0)

#4th test
ztest(male4["customer_age"], female4["customer_age"], value=0)