import keras
import numpy as np
import tensorflow as tf
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
train = pd.read_csv("emnist-balanced-train.csv",delimiter = ',')
test = pd.read_csv("emnist-balanced-test.csv", delimiter = ',')
mapping = pd.read_csv("emnist-balanced-mapping.txt", delimiter = ' ', \
index_col=0, header=None, squeeze=True)
print("Train: %s, Test: %s, Map: %s" %(train.shape, test.shape, mapping.shape))
Train: (112799, 785), Test: (18799, 785), Map: (47,)
# Split x and y
train_x = train.iloc[:,1:]
train_y = train.iloc[:,0]
del train
test_x = test.iloc[:,1:]
test_y = test.iloc[:,0]
del test
print(train_x.shape,train_y.shape,test_x.shape,test_y.shape)
(112799, 784) (112799,) (18799, 784) (18799,)
# Constants
HEIGHT = 28
WIDTH = 28
# Rotate image
def rotate(image):
image = image.reshape([HEIGHT, WIDTH])
image = np.fliplr(image)
image = np.rot90(image)
return image
train_x = np.asarray(train_x)
train_x = np.apply_along_axis(rotate, 1, train_x)
test_x = np.asarray(test_x)
test_x = np.apply_along_axis(rotate, 1, test_x)
train_x: (112799, 28, 28)
test_x: (18799, 28, 28)
# Normalize
train_x = train_x.astype('float32')
test_x = test_x.astype('float32')
train_x /= 255
test_x /= 255
# plot image
for i in range(100, 109):
plt.subplot(330 + (i+1))
plt.imshow(train_x[i], cmap=plt.get_cmap('gray'))
plt.title(chr(mapping[train_y[i]]))
# number of classes
num_classes = train_y.nunique()
# One hot encoding
train_y = np_utils.to_categorical(train_y, num_classes)
test_y = np_utils.to_categorical(test_y, num_classes)
train_y: (112799, 47)
test_y: (18799, 47)
# Reshape image
train_x = train_x.reshape(-1, HEIGHT, WIDTH, 1)
test_x = test_x.reshape(-1, HEIGHT, WIDTH, 1)
# split as train and val
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size= 0.10, random_state=7)
# build model
model = Sequential()
model.add(Conv2D(filters=128, kernel_size=(5,5), padding = 'same', activation='relu', input_shape=(HEIGHT, WIDTH,1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.5))
model.add(Conv2D(filters=64, kernel_size=(3,3) , padding = 'same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(units=num_classes, activation='softmax'))
model.summary()
Model: "sequential_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_15 (Conv2D) (None, 28, 28, 128) 3328
_________________________________________________________________
batch_normalization_14 (Batc (None, 28, 28, 128) 512
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 14, 14, 128) 0
_________________________________________________________________
dropout_8 (Dropout) (None, 14, 14, 128) 0
_________________________________________________________________
conv2d_16 (Conv2D) (None, 14, 14, 64) 73792
_________________________________________________________________
batch_normalization_15 (Batc (None, 14, 14, 64) 256
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 7, 7, 64) 0
_________________________________________________________________
dropout_9 (Dropout) (None, 7, 7, 64) 0
_________________________________________________________________
flatten_4 (Flatten) (None, 3136) 0
_________________________________________________________________
dense_8 (Dense) (None, 128) 401536
_________________________________________________________________
dropout_10 (Dropout) (None, 128) 0
_________________________________________________________________
dense_9 (Dense) (None, 47) 6063
=================================================================
Total params: 485,487
Trainable params: 485,103
Non-trainable params: 384
_________________________________________________________________
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(train_x, train_y, epochs=10, batch_size=512, verbose=1, validation_data=(val_x, val_y))
Epoch 1/10
199/199 [==============================] - 203s 1s/step - loss: 3.5977 - accuracy: 0.1164 - val_loss: 12.2032 - val_accuracy: 0.0633
Epoch 2/10
199/199 [==============================] - 196s 983ms/step - loss: 1.7477 - accuracy: 0.4644 - val_loss: 5.3876 - val_accuracy: 0.1426
Epoch 3/10
199/199 [==============================] - 193s 971ms/step - loss: 1.1731 - accuracy: 0.6261 - val_loss: 0.7805 - val_accuracy: 0.7526
Epoch 4/10
199/199 [==============================] - 193s 970ms/step - loss: 0.9407 - accuracy: 0.6990 - val_loss: 0.4891 - val_accuracy: 0.8318
Epoch 5/10
199/199 [==============================] - 194s 975ms/step - loss: 0.8342 - accuracy: 0.7313 - val_loss: 0.4519 - val_accuracy: 0.8473
Epoch 6/10
199/199 [==============================] - 199s 1s/step - loss: 0.7557 - accuracy: 0.7542 - val_loss: 0.4189 - val_accuracy: 0.8547
Epoch 7/10
199/199 [==============================] - 200s 1s/step - loss: 0.7100 - accuracy: 0.7690 - val_loss: 0.4106 - val_accuracy: 0.8543
Epoch 8/10
199/199 [==============================] - 213s 1s/step - loss: 0.6710 - accuracy: 0.7792 - val_loss: 0.3994 - val_accuracy: 0.8593
Epoch 9/10
199/199 [==============================] - 195s 981ms/step - loss: 0.6430 - accuracy: 0.7868 - val_loss: 0.4016 - val_accuracy: 0.8592
Epoch 10/10
199/199 [==============================] - 201s 1s/step - loss: 0.6165 - accuracy: 0.7962 - val_loss: 0.3825 - val_accuracy: 0.8616
def plotgraph(epochs, acc, val_acc):
plt.plot(epochs, acc, 'b')
plt.plot(epochs, val_acc, 'r')
plt.title('Accuracy of Model')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(acc)+1)
# Accuracy curve
plotgraph(epochs, acc, val_acc)
# Loss curve
plotgraph(epochs, loss, val_loss)
y_pred = model.predict(test_x)
y_pred = (y_pred > 0.5)
print(y_pred)
model.evaluate(test_x, test_y)
588/588 [==============================] - 12s 20ms/step - loss: 0.4014 - accuracy: 0.8643
!pip install anvil-server