#Importing useful libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from copy import deepcopy
from scipy.stats import shapiro
from statsmodels.graphics.gofplots import qqplot
from warnings import filterwarnings
filterwarnings(action = 'ignore')
%matplotlib inline
raw = pd.read_csv('surveyofbodyfat.csv')
df = deepcopy(raw)
#Features with above +-0.5 of correlation coefficient affect the most
df.corr()['BodyFat'].to_frame().sort_values(by='BodyFat', ascending=False).iloc[1:].T
sns.displot(df['BodyFat'], bins=20, color='grey')
plt.show()
def normality_visual(data):
for column in data.columns:
fig = qqplot(data[column], line = '45', fit=True)
ax = plt.gca()
fig.set_size_inches(15, 8)
ax.set_xlabel('Theoretical Quantiles', fontsize=13)
ax.set_ylabel(f'Sample Quantiles of the {column} column', fontsize=13)
plt.show()
normality_visual(df)
sns.set(rc={'figure.figsize':(10,7)})
sns.heatmap(df.corr(), annot=True)
plt.show()