Week 13 Practice Notebooks

import numpy as np import matplotlib.pyplot as plt from sklearn.neural_network import MLPClassifier from sklearn.datasets import load_iris from pandas import DataFrame import pandas as pd import seaborn as sns

# Input data into dataframe iris = load_iris() X = iris.data # make data frame df = DataFrame(X) # add column with targets df.columns = iris ["feature_names"] df["target"] = iris ["target"] #add species name for plotting df ["species"] = df ["target"].map({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]}) df.head()

# Create NumPy target array y = df.target.values

# Split data in test and train sets using 70-30 split from sklearn.model_selection import train_test_split y = df.target.values (X_train, X_test, y_train, y_test) = train_test_split (X, y, test_size = .3) n_train = len (X_train) n_test = len (X_test) print (f"n_train = {n_train}") print (f"n_test = {n_test}")

# ANN sensitive to scaling so scale # Import StandardScaler and scale from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test)

# Create classifier and train mlp = MLPClassifier (max_iter = 1000, solver = "adam", random_state = 1) mlp.fit(X_train, y_train) # Print out precent accuracy of training set train_accuracy = round(mlp.score(X_train, y_train)*100, 2) print (f" Train accuracy percentage = {train_accuracy}%")

# Print out percent accuracy of test set test_accuracy = round(mlp.score(X_test, y_test)*100, 2) print (f" Test accuracy percentage = {test_accuracy}%")

# Scale the entire data X scaler.fit(X) X = scaler.transform(X)

# Predict the entire dataset and add column to dataframe for Prediction and predicted species pred = mlp.predict(X) df ["prediction"] = pred df ["pred_species"] = df["prediction"].map ({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]})

# Plot petal length vs sepal length for actual data with hue = species and for prediction; compare # For x and y put in the full category for the columns, not just "petal length", have to include (cm) or else it won't work #Actual data sns.relplot (x = "petal length (cm)", y = "sepal length (cm)", data = df, hue = "species").set (title = "Actual Data") #Predicted data sns.relplot (x = "petal length (cm)", y = "sepal length (cm)", data = df, hue = "pred_species").set (title = "Predicted Data")

# Plot petal width vs sepal width for actual data with hue = species and for prediction; compare #Basically copy and paste from last block but change the parameters to be width instead of length #Actual data sns.relplot (x = "petal width (cm)", y = "sepal width (cm)", data = df, hue = "species").set (title = "Actual Data") #Predicted data sns.relplot (x = "petal width (cm)", y = "sepal width (cm)", data = df, hue = "pred_species").set (title = "Predicted Data")

print (f"The actual classification has data that intermingles between zones more when compared to the predicted data.") print (f"The predicted version of the data seems to have more definite lines/division between the species.")