Importing the Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import graphviz
Loading the Dataset and Preparing it
iris_data=pd.read_csv('/work/Iris.csv')
iris_data.head()
species = iris_data[['Species']]
species.head()
iris_data.columns
iris_data.Species.unique()
from sklearn.preprocessing import OrdinalEncoder
#encoding the species column using the ordinal encoder to make it easier to handle numerically
ordinal_encoder = OrdinalEncoder()
iris_data['encoded_species'] = ordinal_encoder.fit_transform(species)
ordinal_encoder.categories_
iris_setosa = iris_data.loc[iris_data.Species == 'Iris-setosa']
iris_versicolor = iris_data.loc[iris_data.Species == 'Iris-versicolor']
iris_virginica = iris_data.loc[iris_data.Species == 'Iris-virginica']
iris_data.head()
Data Overview and Analysis
iris_data.describe(include="all")
iris_data.groupby(['Species']).mean()
Finding the aberrations
plt.figure(figsize=(16,8))
sns.boxplot(iris_setosa.SepalLengthCm)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
plt.figure(figsize=(16,8))
sns.boxplot(iris_setosa.SepalWidthCm)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
plt.figure(figsize=(16,8))
sns.boxplot(iris_setosa.PetalWidthCm)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
plt.figure(figsize=(16,8))
sns.boxplot(iris_setosa.PetalLengthCm)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
Visualizing the distributions of the different characteristics of the data
import plotly.express as px
fig1 = px.scatter(iris_data, x='PetalLengthCm', y='SepalLengthCm', color='Species', marginal_x='box', marginal_y='box')
fig2 = px.scatter(iris_data, x='PetalWidthCm', y='SepalWidthCm', color='Species', marginal_x='box', marginal_y='box')
fig3 = px.scatter(iris_data, x='PetalWidthCm', y='PetalLengthCm', color='Species')
fig1.show()
fig2.show()
fig3.show()
Visualizing the correlations
iris_corr = iris_data.iloc[:, 1:].corr()
plt.figure(figsize=(16,8))
sns.heatmap(iris_corr)
fig = px.imshow(iris_corr, title="Iris")
fig.show()
setosa_corr = iris_setosa.iloc[:, 1:].corr()
versicolor_corr = iris_versicolor.iloc[:, 1:].corr()
virginica_corr = iris_virginica.iloc[:, 1:].corr()
fig1 = px.imshow(setosa_corr, title='Iris Setosa')
fig2 = px.imshow(versicolor_corr, title='Iris Versicolor')
fig3 = px.imshow(virginica_corr, title='Iris Virginica')
fig1.show()
fig2.show()
fig3.show()
Classification with Decision Trees
X = iris_data.loc[:,('PetalLengthCm', 'PetalWidthCm')]
y = iris_data.encoded_species
feature_name = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
target_names = np.array(['setosa', 'versicolor', 'virginica'])
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)
tree_clf.fit(X, y)
from graphviz import Source
from sklearn.tree import export_graphviz
IMAGES_PATH = '/work/'
export_graphviz(
tree_clf,
out_file=os.path.join(IMAGES_PATH, "iris_tree.dot"),
feature_names=feature_name[2:],
class_names=target_names,
rounded=True,
filled=True
)
Source.from_file(os.path.join(IMAGES_PATH, "iris_tree.dot"))
from matplotlib.colors import ListedColormap
#Function from the Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow by Aurélien Géron
def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):
x1s = np.linspace(axes[0], axes[1], 100)
x2s = np.linspace(axes[2], axes[3], 100)
x1, x2 = np.meshgrid(x1s, x2s)
X_new = np.c_[x1.ravel(), x2.ravel()]
y_pred = clf.predict(X_new).reshape(x1.shape)
custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
if not iris:
custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
if plot_training:
plt.plot(X.iloc[:, 0][y==0], X.iloc[:, 1][y==0], "yo", label="Iris setosa")
plt.plot(X.iloc[:, 0][y==1], X.iloc[:, 1][y==1], "bs", label="Iris versicolor")
plt.plot(X.iloc[:, 0][y==2], X.iloc[:, 1][y==2], "g^", label="Iris virginica")
plt.axis(axes)
if iris:
plt.xlabel("Petal length", fontsize=14)
plt.ylabel("Petal width", fontsize=14)
else:
plt.xlabel(r"$x_1$", fontsize=18)
plt.ylabel(r"$x_2$", fontsize=18, rotation=0)
if legend:
plt.legend(loc="lower right", fontsize=14)
plt.figure(figsize=(8, 4))
plot_decision_boundary(tree_clf, X, y)
plt.plot([2.45, 2.45], [0, 3], "k-", linewidth=2)
plt.plot([2.45, 7.5], [1.75, 1.75], "k--", linewidth=2)
plt.plot([4.95, 4.95], [0, 1.75], "k:", linewidth=2)
plt.plot([4.85, 4.85], [1.75, 3], "k:", linewidth=2)
plt.text(1.40, 1.0, "Depth=0", fontsize=15)
plt.text(3.2, 1.80, "Depth=1", fontsize=13)
plt.text(4.05, 0.5, "(Depth=2)", fontsize=11)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/base.py:446: UserWarning:
X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names