ASL project 

by Yuliia NikolaenkoDec 1, 2020
0 likes0 duplicates
Share
Twitter iconTwitter
Facebook iconFacebook
Linkedin
Email
Copy link
Save as PDF
  1. Part 1: ASL Classification
    1. 1.1 ASL Dataset
  2. ResNet
      1. 1. Identity Block
      2. 2. Convolutional Block
      3. 3 Building our first ResNet model (50 layers)
      4. Visualization of the model
    1. 1.2 Neural Network for ASL Classification
      1. Evaluate accuracy on the test dataset
      2. Find best Hyperparamters for **optimizer** and **lerning rate**
    2. 1.3 Convolutional Neural Network (CNN) for ASL Classification
      1. Train and test the CNN model
      2. Make predictions with the CNN model
    3. 1.4 Training the model 2.0
      1. Defining function for confusion matrix plot

Part 1: ASL Classification

We will build and train a convolutional neural network (CNN) for classification of ASL signs.

# Import Tensorflow 2.0 #%tensorflow_version 2.x --> only google collab import tensorflow as tf !pip install mitdeeplearning import mitdeeplearning as mdl import matplotlib.pyplot as plt import numpy as np import random from tqdm import tqdm import cv2 import os from sklearn.model_selection import GridSearchCV from keras.wrappers.scikit_learn import KerasClassifier import time import seaborn as sns import pandas as pd from sklearn.metrics import confusion_matrix from keras import layers from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D from keras.models import Model, load_model from keras.initializers import glorot_uniform from keras.utils import plot_model from IPython.display import SVG from keras.utils.vis_utils import model_to_dot import keras.backend as K

1.1 ASL Dataset

We will load the dataset and display a sample for each sign of the training dataset from it:

train_dir = "data/asl_alphabet_train/asl_alphabet_train/" test_dir = "data/asl_alphabet_test/asl_alphabet_test/" IMG_SIZE = 64 labels_map = {'A':0,'B':1,'C': 2, 'D': 3, 'E':4,'F':5,'G':6, 'H': 7, 'I':8, 'J':9,'K':10,'L':11, 'M': 12, 'N': 13, 'O':14, 'P':15,'Q':16, 'R': 17, 'S': 18, 'T':19, 'U':20,'V':21, 'W': 22, 'X': 23, 'Y':24, 'Z':25, 'del': 26, 'nothing': 27,'space':28}
classes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'nothing', 'space', 'del'] plt.figure(figsize=(11, 11)) for i in range (0,29): plt.subplot(7,7,i+1) plt.xticks([]) plt.yticks([]) path = train_dir + "/{0}/{0}1.jpg".format(classes[i]) img = plt.imread(path) plt.imshow(img) plt.xlabel(classes[i])
#dict_characters=labels_map #df = pd.DataFrame() #df["labels"]=y_train #lab = df['labels'] #dist = lab.value_counts() #plt.figure(figsize=(12,8)) #sns.countplot(lab) #print(dict_characters)
def create_train_data(): x_train = [] y_train = [] for folder_name in os.listdir(train_dir): label = labels_map[folder_name] for image_filename in tqdm(os.listdir(train_dir + folder_name)): path = os.path.join(train_dir,folder_name,image_filename) img = cv2.resize(cv2.imread(path, cv2.IMREAD_COLOR),(IMG_SIZE, IMG_SIZE )) x_train.append(np.array(img)) y_train.append(np.array(label)) print("Done creating train data") return x_train, y_train
train_dir
files = [f for f in tqdm(os.listdir(train_dir + 'A'))] len(files) files[0]
'''def create_train_data(): x_train = [] y_train = [] for folder_name in os.listdir(train_dir): label = labels_map[folder_name] files = [f for f in tqdm(os.listdir(train_dir + folder_name))] for i in range(0,15): #for image_filename in tqdm(os.listdir(train_dir + folder_name)): path = os.path.join(train_dir,files[i]) img = cv2.resize(cv2.imread(path, cv2.IMREAD_COLOR),(IMG_SIZE, IMG_SIZE )) x_train.append(np.array(img)) y_train.append(np.array(label)) print("Done creating train data") return x_train, y_train '''
def create_test_data(): x_test = [] y_test = [] for folder_name in os.listdir(test_dir): label = labels_map[folder_name] for image_filename in tqdm(os.listdir(test_dir + folder_name)): path = os.path.join(test_dir,folder_name,image_filename) img = cv2.resize(cv2.imread(path, cv2.IMREAD_COLOR),(IMG_SIZE, IMG_SIZE )) x_test.append(np.array(img)) y_test.append(np.array(label)) print("Done creating test data") return x_test,y_test
x_train, y_train= create_train_data()
x_test,y_test = create_test_data()
#num_features = 2500 #num_classes = 29 #x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32) #x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features]) #x_train, x_test = x_train / 255., x_test / 255.
x_train = x_train[0:1000] y_train = y_train[0:1000]
y_train[0]
def convert_to_one_hot(Y, C): Y = np.eye(C)[Y.reshape(-1)].T return Y
y_train_hot = convert_to_one_hot(np.array(y_train), 29).T
y_test_hot = convert_to_one_hot(np.array(y_test), 29).T

Our training set is made up of 28x28 grayscale images of ASL sign.

'''x_train = (np.expand_dims(x_train, axis=-1)/255.).astype(np.float32) x_test = (np.expand_dims(x_test, axis=-1)/255.).astype(np.float32) y_train = np.array(y_train, dtype=np.int64) y_test = np.array(y_test, dtype=np.int64)'''

ResNet

1. Identity Block

We'll implement an identity block, in which the skip connection "skips over" 3 hidden layers.

Arguments:

  • X - input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
  • f - integer, specifying the shape of the middle CONV's window for the main path
  • filters - python list of integers, defining the number of filters in the CONV layers of the main path
  • stage - integer, used to name the layers, depending on their position in the network
  • block - string/character, used to name the layers, depending on their position in the network

Returns: X - output of the identity block, tensor of shape (n_H, n_W, n_C)

def identity_block(X, f, filters, stage, block): # defining name basis conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' # Retrieve Filters F1, F2, F3 = filters # Save the input value. We'll need this later to add back to the main path. X_shortcut = X # First component of main path X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X) X = Activation('relu')(X) # Second component of main path X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X) X = Activation('relu')(X) # Third component of main path X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X) # Final step: Add shortcut value to main path, and pass it through a RELU activation X = Add()([X, X_shortcut]) X = Activation('relu')(X) return X

Sanity check if the identity function works

# tf.compat.v1 instead of tf as functions are outdated tf.compat.v1.reset_default_graph() with tf.compat.v1.Session() as test: A_prev = tf.compat.v1.placeholder("float", [3, 4, 4, 6]) X = np.random.randn(3, 4, 4, 6) A = identity_block(A_prev, f = 2, filters = [2, 4, 6], stage = 1, block = 'a') test.run(tf.compat.v1.global_variables_initializer()) out = test.run([A], feed_dict={A_prev: X, K.learning_phase(): 0}) print("out = ", out[0][1][1][0])

2. Convolutional Block

Arguments:

  • X - input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
  • f - integer, specifying the shape of the middle CONV's window for the main path
  • filters - python list of integers, defining the number of filters in the CONV layers of the main path
  • stage - integer, used to name the layers, depending on their position in the network
  • block - string/character, used to name the layers, depending on their position in the network
  • s - Integer, specifying the stride to be used

Returns: X - output of the convolutional block, tensor of shape (n_H, n_W, n_C)

def convolutional_block(X, f, filters, stage, block, s = 2): # defining name basis conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' # Retrieve Filters F1, F2, F3 = filters # Save the input value X_shortcut = X ##### MAIN PATH ##### # First component of main path X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X) X = Activation('relu')(X) # Second component of main path X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X) X = Activation('relu')(X) # Third component of main path X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X) ##### SHORTCUT PATH #### X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut) X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut) # Final step: Add shortcut value to main path, and pass it through a RELU activation X = Add()([X, X_shortcut]) X = Activation('relu')(X) return X

Again testing if convolutional block works

tf.compat.v1.reset_default_graph() with tf.compat.v1.Session() as test: A_prev = tf.compat.v1.placeholder("float", [3, 4, 4, 6]) X = np.random.randn(3, 4, 4, 6) A = convolutional_block(A_prev, f = 2, filters = [2, 4, 6], stage = 1, block = 'a') test.run(tf.compat.v1.global_variables_initializer()) out = test.run([A], feed_dict={A_prev: X, K.learning_phase(): 0}) print("out = ",out[0][1][1][0])

3 Building our first ResNet model (50 layers)

Arguments:

  • input_shape - shape of the images of the dataset
  • classes - integer, number of classes

Returns: model - a Model() instance in Keras

def ResNet50(input_shape = (64, 64, 3), classes = 29): # Define the input as a tensor with shape input_shape X_input = Input(input_shape) # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # Stage 1 X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = 'bn_conv1')(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=(2, 2))(X) # Stage 2 X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1) X = identity_block(X, 3, [64, 64, 256], stage=2, block='b') X = identity_block(X, 3, [64, 64, 256], stage=2, block='c') # Stage 3 X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2) X = identity_block(X, 3, [128, 128, 512], stage=3, block='b') X = identity_block(X, 3, [128, 128, 512], stage=3, block='c') X = identity_block(X, 3, [128, 128, 512], stage=3, block='d') # Stage 4 X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2) X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f') # Stage 5 X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2) X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b') X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c') # AVGPOOL. X = AveragePooling2D((2, 2), name='avg_pool')(X) # output layer X = Flatten()(X) X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X) # Create model model = Model(inputs = X_input, outputs = X, name='ResNet50') return model
ROWS = 64 COLS = 64 CHANNELS = 3 CLASSES = 29
# build the model's graph model = ResNet50(input_shape =(ROWS, COLS, CHANNELS), classes = CLASSES)
#Now we need to configure the learning process by compiling the model. model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#The model is now ready to be trained. Run the following cell to train your model on 100 epochs with a batch size of 64: model.fit(x_train, y_train_hot, epochs = 100, batch_size = 64)
preds = model.evaluate(X_test, Y_test) print ("Loss = " + str(preds[0])) print ("Test Accuracy = " + str(preds[1]))
model.summary()

Visualization of the model

plot_model(model, to_file='model.png') SVG(model_to_dot(model).create(prog='dot', format='svg'))

We will visualize a random image

%matplotlib inline import matplotlib.pyplot as plt def display_image(num): label = y_train[num] plt.title('Label: %d' % (label)) image = x_train[num].reshape([IMG_SIZE,IMG_SIZE]) plt.imshow(image, cmap=plt.get_cmap('gray_r')) plt.show() display_image(1000)

1.2 Neural Network for ASL Classification

We'll first build a simple neural network consisting of two fully connected layers and apply this to the digit classification task. Our network will ultimately output a probability distribution over the 29 classes (0-28).

def build_fc_model(): fc_model = tf.keras.Sequential([ # First define a Flatten layer tf.keras.layers.Flatten(), # '''TODO: Define the activation function for the first fully connected (Dense) layer.''' tf.keras.layers.Dense(128, activation= 'relu'), # '''TODO: Define the second Dense layer to output the classification probabilities''' tf.keras.layers.Dense(29, activation= 'softmax') ]) return fc_model model_sgd = build_fc_model()
'''TODO: Experiment with different optimizers and learning rates. How do these affect the accuracy of the trained model? Which optimizers and/or learning rates yield the best performance?''' model_sgd.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-1), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Define the batch size and the number of epochs to use during training BATCH_SIZE = 64 EPOCHS = 5
model_sgd.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS)
print(model_sgd.summary())

Evaluate accuracy on the test dataset

'''TODO: Use the evaluate method to test the model!''' test_loss, test_acc = model_sgd.evaluate(x= x_test, y=y_test) '''TODO: Use the evaluate method to test the model!''' train_loss, train_acc = model_sgd.evaluate(x= x_train, y=y_train) print('Test accuracy:', test_acc) print('Train accuracy:', train_acc)

Find best Hyperparamters for optimizer and lerning rate

def build_classifier(optimizer = 'adam', learning_rate=1e-1): classifier = build_fc_model() if optimizer == 'SGD': optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate) if optimizer == 'RMSprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate) if optimizer == 'Adagrad': optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate) if optimizer == 'Adadelta': optimizer = tf.keras.optimizers.Adadelta(learning_rate=learning_rate) if optimizer == 'Adam': optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) if optimizer == 'Adamax': optimizer = tf.keras.optimizers.Adamax(learning_rate=learning_rate) if optimizer == 'Nadam': optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate) classifier.compile(#optimizer=tf.keras.optimizers.SGD(learning_rate=1e-1), optimizer = optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) return classifier optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'] learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3] param_grid = dict(optimizer=optimizer, learning_rate = learning_rate) # create model my_wrapper_model = KerasClassifier(build_fn=build_classifier, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0) grid = GridSearchCV(estimator=my_wrapper_model, param_grid=param_grid, n_jobs=-1, cv=3, scoring='accuracy') start_time = time.time() grid_result = grid.fit(x_train, y_train) print(f'--- {(time.time() - start_time)/60:.3f} minutes ---')
# summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] #for mean, stdev, param in zip(means, stds, params): #print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.022437 using {'learning_rate': 0.001, 'optimizer': 'Nadam'}

model_nadam = build_fc_model() model_nadam.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model_nadam.fit(x_test, y_test, batch_size=BATCH_SIZE, epochs=EPOCHS) '''TODO: Use the evaluate method to test the model!''' test_loss, test_acc = model_nadam.evaluate(x= x_test, y=y_test) print('Test accuracy:', test_acc)

1.3 Convolutional Neural Network (CNN) for ASL Classification

def build_cnn_model(): cnn_model = tf.keras.Sequential([ # TODO: Define the first convolutional layer # 24: filter size # 3: Kernel Size: height and width of the 2D convolution (3) tf.keras.layers.Conv2D(24, 3, input_shape = (28, 28, 1), activation=tf.nn.relu), # TODO: Define the first max pooling layer tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2), # TODO: Define the second convolutional layer tf.keras.layers.Conv2D(36, 3, activation=tf.nn.relu), # TODO: Define the second max pooling layer tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2), tf.keras.layers.Flatten(), tf.keras.layers.Dense(128, activation=tf.nn.relu), # TODO: Define the last Dense layer to output the classification # probabilities. Pay attention to the activation needed for a probability # output #'''TODO: Dense layer to output classification probabilities''' tf.keras.layers.Dense(29, activation= 'softmax') ]) return cnn_model cnn_model_nadam = build_cnn_model() # Initialize the model by passing some data through cnn_model_nadam.predict(x_train[[0]]) # Print the summary of the layers in the model. print(cnn_model_nadam.summary())

Train and test the CNN model

Now, as before, we can define the loss function, optimizer, and metrics through the compile method. Compile the CNN model with an optimizer and learning rate of choice:

'''TODO: Define the compile operation with your optimizer and learning rate of choice''' cnn_model_nadam.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

As was the case with the fully connected model, we can train our CNN using the fit method via the Keras API.

'''TODO: Use model.fit to train the CNN model, with the same batch_size and number of epochs previously used.''' history = cnn_model_nadam.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs = EPOCHS, verbose = 1, validation_data = (x_test, y_test))

Visualizing the training performance

plt.figure(figsize=(12, 8)) plt.subplot(2, 2, 1) plt.plot(history.history['loss'], label='Loss') plt.plot(history.history['val_loss'], label='val_Loss') plt.legend() plt.grid() plt.title('Loss evolution') plt.subplot(2, 2, 2) plt.plot(history.history['accuracy'], label='accuracy') plt.plot(history.history['val_accuracy'], label='val_accuracy') plt.legend() plt.grid() plt.title('Accuracy evolution')
#let's try another one cnn_model_sgd = build_cnn_model() cnn_model_sgd.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.1), loss='sparse_categorical_crossentropy', metrics=['accuracy']) cnn_model_sgd.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS)
'''TODO: Use the evaluate method to test the model!''' test_loss, test_acc = cnn_model_nadam.evaluate(x= x_test, y=y_test) print('Test accuracy nadam lr = 0.001:', test_acc)
'''TODO: Use the evaluate method to test the model!''' test_loss, test_acc = cnn_model_sgd.evaluate(x= x_test, y=y_test) print('Test accuracy SGD lr=0.1:', test_acc)

Make predictions with the CNN model

#predictions = cnn_model_nadam.predict(x_test) predictions = cnn_model_nadam.predict(x_test)
L = 5 W = 5 fig, axes = plt.subplots(L, W, figsize = (12,12)) axes = axes.ravel() for i in np.arange(0, L * W): axes[i].imshow(x_test[i]) axes[i].set_title(f'Prediction Class = {np.argmax(predictions[i])}\n True Class = {y_test[i]}') axes[i].axis('off') plt.subplots_adjust(wspace=0.5)
print(predictions[0])
'''TODO: identify the digit with the highest confidence prediction for the first image in the test dataset. ''' prediction = np.argmax(predictions[0]) print(prediction)
predictions_int = [] for y in range(0,len(predictions)): predictions_int.append(np.argmax(predictions[y]))
print("Label of this digit is:", y_test[0]) plt.imshow(x_test[0,:,:,0], cmap=plt.cm.binary)
import mitdeeplearning as mdl_msk image_index = 1 plt.subplot(1,2,1) mdl_msk.lab2.plot_image_prediction(image_index, predictions, y_test, x_test) #plt.subplot(1,2,2) #mdl_msk.lab2.plot_value_prediction(image_index, predictions, y_test)

https://github.com/aamini/introtodeeplearning/blob/master/lab2/Part1_MNIST.ipynb

# Plots the first X test images, their predicted label, and the true label # Color correct predictions in blue, incorrect predictions in red num_rows = 5 num_cols = 4 num_images = num_rows*num_cols plt.figure(figsize=(2*2*num_cols, 2*num_rows)) offset = 0 for i in range(num_images): plt.subplot(num_rows, 2*num_cols, 2*i+1) mdl_msk.lab2.plot_image_prediction(i+offset, predictions, y_test, x_test) #plt.subplot(num_rows, 2*num_cols, 2*i+2) #mdl_msk.lab2.plot_value_prediction(i+offset, predictions, y_test)

1.4 Training the model 2.0

# Rebuild the CNN model cnn_model = build_cnn_model() batch_size = 12 loss_history = mdl_msk.util.LossHistory(smoothing_factor=0.95) # to record the evolution of the loss plotter = mdl_msk.util.PeriodicPlotter(sec=2, xlabel='Iterations', ylabel='Loss', scale='semilogy') optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2) # define our optimizer if hasattr(tqdm, '_instances'): tqdm._instances.clear() # clear if it exists only_once = True for idx in tqdm(range(0, x_train.shape[0], batch_size)): # First grab a batch of training data and convert the input images to tensors (images, labels) = (x_train[idx:idx+batch_size], y_train[idx:idx+batch_size]) images = tf.convert_to_tensor(images, dtype=tf.float32) # GradientTape to record differentiation operations with tf.GradientTape() as tape: #'''TODO: feed the images into the model and obtain the predictions''' logits = cnn_model(images) if only_once: print('type of logits: ', type(logits)) only_once = False #'''TODO: compute the categorical cross entropy loss loss_value = tf.keras.backend.sparse_categorical_crossentropy(labels, logits) # TODO loss_history.append(loss_value.numpy().mean()) # append the loss to the loss_history record plotter.plot(loss_history.get()) # Backpropagation '''TODO: Use the tape to compute the gradient against all parameters in the CNN model. Use cnn_model.trainable_variables to access these parameters.''' grads = tape.gradient(loss_value, cnn_model.trainable_variables) optimizer.apply_gradients(zip(grads, cnn_model.trainable_variables))

Defining function for confusion matrix plot

def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): if not title: