Recognizing handwriting with Tensorflow and OpenCV

import tensorflow as tf from tensorflow import keras import numpy as np import pandas as pd print(tf.__version__)

mnist = keras.datasets.mnist (train_images_mnist,train_labels_mnist),(test_images_mnist,test_labels_mnist) = mnist.load_data() # images are reshaped to be used by the flow method of a keras ImageGenerator train_images_mnist = np.reshape(train_images_mnist,(train_images_mnist.shape[0],28,28,1)) test_images_mnist = np.reshape(test_images_mnist,(test_images_mnist.shape[0],28,28,1))

az_data_path = '/datasets/handwritingdataset' AZ_data = pd.read_csv(az_data_path +'/AZ_Handwritten_Data.csv',header = None) # the first column contains label values, while the remaining are the flattened array of 28 x 28 image pixels AZ_labels = AZ_data.values[:,0] AZ_images = AZ_data.values[:,1:] # images are reshaped to be used by the flow method of a keras ImageGenerator AZ_images = np.reshape(AZ_images,(AZ_images.shape[0],28,28,1))

# join datasets # split AZ data in train and test from sklearn.model_selection import train_test_split test_size = float(len(test_labels_mnist))/len(train_labels_mnist) print(f'test set size: {test_size}') train_images_AZ, test_images_AZ, train_labels_AZ, test_labels_AZ = train_test_split(AZ_images,AZ_labels, test_size=test_size) #shift mnist labels train_labels_mnist = train_labels_mnist + max(AZ_labels)+1 test_labels_mnist = test_labels_mnist + max(AZ_labels)+1 # concatenate datasets train_images = np.concatenate((train_images_AZ,train_images_mnist),axis=0) train_labels = np.concatenate((train_labels_AZ,train_labels_mnist)) test_images = np.concatenate((test_images_AZ,test_images_mnist),axis=0) test_labels = np.concatenate((test_labels_AZ,test_labels_mnist)) print('Data ready')

import tensorflow as tf from tensorflow.keras.optimizers import RMSprop model = tf.keras.models.Sequential([ # Note the input shape is the desired size of the image 150x150 with 3 bytes color tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)), tf.keras.layers.MaxPooling2D(2,2), tf.keras.layers.Conv2D(32, (3,3), activation='relu'), tf.keras.layers.MaxPooling2D(2,2), # Flatten the results to feed into a DNN tf.keras.layers.Flatten(), # 512 neuron hidden layer tf.keras.layers.Dense(512, activation='relu'), # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('cats') and 1 for the other ('dogs') tf.keras.layers.Dense(len(np.unique(train_labels)), activation='softmax') ]) model.compile(optimizer=RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics = ['accuracy']) model.summary()

from tensorflow.keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator( rescale=1./255, rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1, zoom_range=0.2, horizontal_flip=False, fill_mode='nearest') test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using generator train_generator = train_datagen.flow(train_images, train_labels, batch_size=50, shuffle=True) validation_generator = test_datagen.flow(test_images, test_labels, batch_size=50, shuffle=True)

history = model.fit( train_generator, steps_per_epoch=500, epochs=100, validation_data=validation_generator, validation_steps=50, verbose=2) model.save('model_v2')

from tensorflow.keras.models import load_model import tensorflow as tf import cv2 import matplotlib.pyplot as plt import imutils from imutils.contours import sort_contours

# loads the model with the keras load_model function model_path = 'model_v2' print("Loading NN model...") model = load_model(model_path) print("Done")

# loads the input image image_path = 'handwriting_example1.png' image = cv2.imread(image_path) #from google.colab import files #image = files.upload()

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) cropped = gray[120:,:] blurred = cv2.GaussianBlur(cropped, (5, 5), 0) %matplotlib inline from matplotlib import cm fig = plt.figure(figsize=(16,4)) ax = plt.subplot(1,4,1) ax.imshow(image) ax.set_title('original image'); ax = plt.subplot(1,4,2) ax.imshow(gray,cmap=cm.binary_r) ax.set_axis_off() ax.set_title('grayscale image'); ax = plt.subplot(1,4,3) ax.imshow(cropped,cmap=cm.binary_r) ax.set_axis_off() ax.set_title('cropped image'); ax = plt.subplot(1,4,4) ax.imshow(blurred,cmap=cm.binary_r) ax.set_axis_off() ax.set_title('blurred image'); #plt.imshow(gray,cmap=cm.binary_r)

# perform edge detection, find contours in the edge map, and sort the # resulting contours from left-to-right edged = cv2.Canny(blurred, 30, 250) #low_threshold, high_threshold cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) cnts = sort_contours(cnts, method="left-to-right")[0] figure = plt.figure(figsize=(7,7)) plt.axis('off'); plt.imshow(edged,cmap=cm.binary_r);

chars = [] # loop over the contours for c in cnts: # compute the bounding box of the contour and isolate ROI (x, y, w, h) = cv2.boundingRect(c) roi = cropped[y:y + h, x:x + w] #binarize image, finds threshold with OTSU method thresh = cv2.threshold(roi, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] # resize largest dimension to input size (tH, tW) = thresh.shape if tW > tH: thresh = imutils.resize(thresh, width=28) # otherwise, resize along the height else: thresh = imutils.resize(thresh, height=28) # find how much is needed to pad (tH, tW) = thresh.shape dX = int(max(0, 28 - tW) / 2.0) dY = int(max(0, 28 - tH) / 2.0) # pad the image and force 28 x 28 dimensions padded = cv2.copyMakeBorder(thresh, top=dY, bottom=dY, left=dX, right=dX, borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0)) padded = cv2.resize(padded, (28, 28)) # reshape and rescale padded image for the model padded = padded.astype("float32") / 255.0 padded = np.expand_dims(padded, axis=-1) # append image and bounding box data in char list chars.append((padded, (x, y, w, h)))

# plot isolated characters n_cols = 10 n_rows = np.floor(len(chars)/ n_cols)+1 fig = plt.figure(figsize=(1.5*n_cols,1.5*n_rows)) for i,char in enumerate(chars): ax = plt.subplot(n_rows,n_cols,i+1) ax.imshow(char[0][:,:,0],cmap=cm.binary,aspect='auto') #plt.axis('off') plt.tight_layout()

boxes = [b[1] for b in chars] chars = np.array([c[0] for c in chars], dtype="float32") # OCR the characters using our handwriting recognition model preds = model.predict(chars) # define the list of label names labelNames = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

image = cv2.imread(image_path) cropped = image[120:,:] for (pred, (x, y, w, h)) in zip(preds, boxes): # find the index of the label with the largest corresponding # probability, then extract the probability and label i = np.argmax(pred) prob = pred[i] label = labelNames[i] # draw the prediction on the image and it's probability label_text = f"{label},{prob * 100:.1f}%" cv2.rectangle(cropped, (x, y), (x + w, y + h), (0,255 , 0), 2) cv2.putText(cropped, label_text, (x - 10, y - 10),cv2.FONT_HERSHEY_SIMPLEX,0.5, (0,255, 0), 1) # show the image plt.figure(figsize=(15,10)) plt.imshow(cropped)