import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
pd.set_option('display.max_columns', None)
sns.set_style('darkgrid')
## reading training data
train_df = pd.read_csv()
##reading test data
test_df = pd.read_csv()
train_df.head()
test_df.head()
print("train data shape: ", train_df.shape)
print("test data shape: ", test_df.shape)
## train and test data
X_train = train_df.iloc[:, :-2]
y_train = train_df.iloc[:, -1]
X_test = test_df.iloc[:, :-2]
y_test = test_df.iloc[:, -1]
## unique classes
y_train.unique()
## value counts of unique classes
class_label = y_train.value_counts()
plt.figure(figsize=(10, 10))
plt.xticks(rotation=75)
sns.barplot(class_label.index, class_label);
## Standard Scaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
## LabelEncoder
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_train=pd.get_dummies(y_train).values
y_test = label_encoder.fit_transform(y_test)
y_test=pd.get_dummies(y_test).values
## converting into numpy array
y_train = np.array(y_train)
y_test = np.array(y_test)
y_train
## PCA
pca = PCA(n_components=None)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
X_train
## Model
model = keras.models.Sequential()
model.add(keras.layers.Dense(units=64,activation='relu'))
model.add(keras.layers.Dense(units=128,activation='relu'))
model.add(keras.layers.Dense(units=64,activation='relu'))
model.add(keras.layers.Dense(units=6,activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(X_train, y_train, batch_size=128, epochs=15, validation_split=0.2)
## Loss Vs. Epochs
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train Loss', 'Validation Loss']);
## Accuracy Vs. Epochs
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train Accuracy', 'Validation Accuracy']);
## prediction
pred = model.predict(X_test)
predic = []
for p in pred:
p = np.argmax(p)
predic.append(p)
predic = np.array(predic)
y_test.shape
y_test[0]
y_test_label = []
for i in range(len(y_test)):
for ind, j in enumerate(y_test[i]):
if j == 1:
y_test_label.append(ind)
y_test_label = np.array(y_test_label)
y_test_label
predic
print("Classification Report: \n", classification_report(y_test_label, predic))
print("-" * 100)
print()
print("Accuracy Score: ", accuracy_score(y_test_label, predic))
print("-" * 100)
print()
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test_label, predic), annot=True, fmt='g')