import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
iris_data = pd.read_csv("/work/ASSIGN_4/iris.csv")
iris_data.head()
print("Sepal length range: [%s, %s]" % (min(iris_data["sepal_length"]),max(iris_data["sepal_length"])))
print("Sepal width range: [%s, %s]" % (min(iris_data["sepal_width"]),max(iris_data["sepal_width"])))
print("Petal length range: [%s, %s]" % (min(iris_data["petal_length"]),max(iris_data["petal_length"])))
print("Petal width range: [%s, %s]" % (min(iris_data["petal_width"]),max(iris_data["petal_width"])))
print("Sepal Length standard deviation :\t %f" %np.std(iris_data["sepal_length"]))
print("Sepal Width standard deviation :\t %f" %np.std(iris_data["sepal_width"]))
print("Petal Length standard deviation :\t %f" %np.std(iris_data["petal_length"]))
print("Petal Width standard deviation :\t %f" %np.std(iris_data["petal_width"]))
corr = iris_data.corr(method ="pearson")
display(corr)
sns.heatmap(corr,cmap =sns.diverging_palette(10,0,as_cmap=True),annot= True,fmt = "f")
from sklearn.decomposition import PCA
pca = PCA()
x_new1 =pca.fit_transform(iris_data.drop(["species"],axis =1))
x_new1[:5]
explained_variance= pca.explained_variance_ratio_
explained_variance
plt.figure(figsize =(6,4))
plt.bar(range(4),explained_variance, alpha=0.5, align='center', label ="Individual Explained Variance", color ="darkblue")
plt.ylabel("Explained Variance Ratio")
plt.xlabel("Principal Component")
plt.legend(loc ="best")
plt.tight_layout()
pca = PCA(n_components=3)
x_new =pca.fit_transform(iris_data.drop(['species'],axis =1))
x_new[:5]
categ_num ={"species":{"setosa":0,"versicolor":1,"virginica":2}}
iris_data1 = iris_data.replace(categ_num)
columns = list(iris_data.columns[:4])
fig, axes =plt.subplots(3,4,figsize =(15,10))
k= 0
for i in range(axes.shape[0]):
for j in range(axes.shape[1]):
axes[i,j].scatter(x_new[:,i],iris_data[columns[j]],c =iris_data1["species"])
plt.show()