import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_iris
from pandas import DataFrame
import pandas as pd
import seaborn as sns
# Input data into dataframe
iris = load_iris()
X = iris.data
# make data frame
df = DataFrame(X)
# add column with targets
df.columns = iris ["feature_names"]
df["target"] = iris ["target"]
#add species name for plotting
df ["species"] = df ["target"].map({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]})
df.head()
# Create NumPy target array
y = df.target.values
# Split data in test and train sets using 70-30 split
from sklearn.model_selection import train_test_split
y = df.target.values
(X_train, X_test, y_train, y_test) = train_test_split (X, y, test_size = .3)
n_train = len (X_train)
n_test = len (X_test)
print (f"n_train = {n_train}")
print (f"n_test = {n_test}")
# ANN sensitive to scaling so scale
# Import StandardScaler and scale
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
# Create classifier and train
mlp = MLPClassifier (max_iter = 1000, solver = "adam", random_state = 1)
mlp.fit(X_train, y_train)
# Print out precent accuracy of training set
train_accuracy = round(mlp.score(X_train, y_train)*100, 2)
print (f" Train accuracy percentage = {train_accuracy}%")
# Print out percent accuracy of test set
test_accuracy = round(mlp.score(X_test, y_test)*100, 2)
print (f" Test accuracy percentage = {test_accuracy}%")
# Scale the entire data X
scaler.fit(X)
X = scaler.transform(X)
# Predict the entire dataset and add column to dataframe for Prediction and predicted species
pred = mlp.predict(X)
df ["prediction"] = pred
df ["pred_species"] = df["prediction"].map ({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]})
# Plot petal length vs sepal length for actual data with hue = species and for prediction; compare
# For x and y put in the full category for the columns, not just "petal length", have to include (cm) or else it won't work
#Actual data
sns.relplot (x = "petal length (cm)", y = "sepal length (cm)", data = df, hue = "species").set (title = "Actual Data")
#Predicted data
sns.relplot (x = "petal length (cm)", y = "sepal length (cm)", data = df, hue = "pred_species").set (title = "Predicted Data")
# Plot petal width vs sepal width for actual data with hue = species and for prediction; compare
#Basically copy and paste from last block but change the parameters to be width instead of length
#Actual data
sns.relplot (x = "petal width (cm)", y = "sepal width (cm)", data = df, hue = "species").set (title = "Actual Data")
#Predicted data
sns.relplot (x = "petal width (cm)", y = "sepal width (cm)", data = df, hue = "pred_species").set (title = "Predicted Data")
print (f"The actual classification has data that intermingles between zones more when compared to the predicted data.")
print (f"The predicted version of the data seems to have more definite lines/division between the species.")