#importing html data using web scraping
import requests
con = requests.get("https://canvas.colorado.edu/courses/84839/modules")
con.content
import pandas as pd
df=pd.read_csv("bigmartsales.csv")
display(df['Outlet_Type'].value_counts())
print("For Item_MRP Column:")
print("Max is:"+str(df['Item_MRP'].max()))
print("Min is:"+str(df['Item_MRP'].min()))
print("Mean is:"+str(df['Item_MRP'].mean()))
print("Median is:"+str(df['Item_MRP'].median()))
print("Mode is :"+str(df['Item_MRP'].mode()[0]))
print("Range is :"+str(df['Item_MRP'].max()-df['Item_MRP'].min()))
print("Midrange is :"+str((df['Item_MRP'].max()-df['Item_MRP'].min())/2))
q1=df['Item_MRP'].quantile(0.25)
q2=df['Item_MRP'].median()
q3=df['Item_MRP'].quantile(0.75)
print("Quartiles are: "+str(q1)+", "+str(q3))
IQR=q3-q1
print("IQR is: "+ str(IQR))
var1=df['Item_MRP'].var()
print("Variance :"+ str(var1))
std1=df['Item_MRP'].std()
print('Standard deviation is :'+str(std1))
outliers = (df['Item_MRP'] - q2).abs() > std1
print(df['Item_MRP'][outliers])
df.info()
import seaborn as sns
sns.relplot(x='Item_MRP',y='Profit',data=df,hue='Outlet_Type')
sns.catplot(x='Outlet_Type',y='Item_Outlet_Sales',data=df)
sns.displot(data=df,x='Profit')
sns.jointplot(data=df,x='Item_Weight',y='Item_Visibility',kind='kde')
sns.pairplot(data=df,hue='Outlet_Type')
display(df.isnull().sum())
#we will try to fill null values in one column with mean
mean_value=df['Item_Weight'].mean()
df['Item_Weight_mean']=df['Item_Weight'].fillna(mean_value)
#we will try to perform data reduction(data descretization) on Item_weight
df['IW_bucket']=pd.cut(df['Item_Weight_mean'],3,labels=['Heavy','Middleweight','Light'])
df.head(10)
#We will try to normalize a column using z-score(data transformation)
df['Profit_Zscore']=(df['Profit']-df['Profit'].mean())/df['Profit'].std()
df['Profit_Zscore']