Week 13 Practice Notebook

import numpy as np import matplotlib.pyplot as plt from sklearn.neural_network import MLPClassifier from sklearn.datasets import load_iris from pandas import DataFrame import pandas as pd import seaborn as sns

# Input data into dataframe # iris = load_iris() x = iris['data'] # # make data frame df = DataFrame(x) df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] # add column with targets df['target'] = iris['target'] # #add species name for plotting df['species'] = df['target'].map( {0:iris.target_names[0], 1:iris.target_names[1], 2:iris.target_names[2]})

# Create NumPy target array target = np.array(df.target.values)

# Split data in test and train sets using 70-30 split # from sklearn.model_selection import train_test_split (X_train, X_test, Y_train, Y_test) = train_test_split(x,target,test_size=0.3)

# ANN sensitive to scaling so scale # # Import StandardScaler and scale from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test)

# Create classifier and train mlp = MLPClassifier(max_iter=300, solver='adam', random_state=1) mlp.fit(X_train,Y_train) # # Print out precent accuracy of training set accuracy = round(mlp.score(X_train,Y_train)*100,2) print (f'% Accuracy = {accuracy}%')

# Print out percent accuracy of test set accuracy = round(mlp.score(X_test,Y_test)*100,2) print (f'Accuracy = {accuracy}%')

# Scale the entire data X scaler.fit(x) X = scaler.transform(x)

# Predict the entire dataset and add column to dataframe for Prediction and predicted species pred = mlp.predict(X) df['prediction'] = pred df['pred_species'] = df['prediction'].map( {0:iris.target_names[0], 1:iris.target_names[1], 2:iris.target_names[2]})

# Plot petal length vs sepal length for actual data with hue = species and for prediction; compare sns.relplot(x='petal_length',y='sepal_length',data=df,hue='species') sns.relplot(x='petal_length',y='sepal_length',data=df,hue='pred_species')

# Plot petal width vs sepal width for actual data with hue = species and for prediction; compare sns.relplot(x='petal_width',y='sepal_width',data=df,hue='species') sns.relplot(x='petal_width',y='sepal_width',data=df,hue='pred_species')

The actual classification has the data much more spread out compared to the predicted.