Week 13 Practice Notebook

import numpy as np import matplotlib.pyplot as plt from sklearn.neural_network import MLPClassifier from sklearn.datasets import load_iris from pandas import DataFrame import pandas as pd import seaborn as sns

# Input data into dataframe # iris = load_iris() X = iris['data'] # # make data frame df = DataFrame(X) df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] # add column with targets df['target'] = iris['target'] # #add species name for plotting df['species'] = df['target'].map( \ {0:iris.target_names[0], \ 1:iris.target_names[1], \ 2:iris.target_names[2]})

# Create NumPy target array target = np.array(df.target.values)

# Split data in test and train sets using 70-30 split # from sklearn.model_selection import train_test_split (xTrain, xTest, yTrain, yTest) = train_test_split(X,target,test_size=0.3)

# ANN sensitive to scaling so scale # # Import StandardScaler and scale from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(xTrain) xTrain = scaler.transform(xTrain) xTest = scaler.transform(xTest)

# Create classifier and train mlp = MLPClassifier(max_iter=300, solver='adam', random_state=1) mlp.fit(xTrain,yTrain) # # Print out precent accuracy of training set accuracy = round(mlp.score(xTrain,yTrain)*100,2) print (f'accuracy: {accuracy}')

# Print out percent accuracy of test set accuracy = round(mlp.score(xTest,yTest)*100,2) print (f'accuracy: {accuracy}')

# Scale the entire data X scaler.fit(X) X = scaler.transform(X)

# Predict the entire dataset and add column to dataframe for Prediction and predicted species pred = mlp.predict(X) df['prediction'] = pred df['pred_species'] = df['prediction'].map( {0:iris.target_names[0], \ 1:iris.target_names[1], \ 2:iris.target_names[2]})

# Plot petal length vs sepal length for actual data with hue = species and for prediction; compare sns.relplot(x='petal_length',y='sepal_length',data=df,hue='species') sns.relplot(x='petal_length',y='sepal_length',data=df,hue='pred_species')

# Plot petal width vs sepal width for actual data with hue = species and for prediction; compare sns.relplot(x='petal_width',y='sepal_width',data=df,hue='species') sns.relplot(x='petal_width',y='sepal_width',data=df,hue='pred_species')

The actual classifications have three species into which the data is sorted, whereas the predicted classifications only sort into two species.