Training

!pip install mxnet==1.6.0 import numpy as np import mxnet as mx from mxnet import nd def transform(data, label): # define a function to transfer data return (nd.floor(data/128)).astype(np.float32), label.astype(np.float32) # floor of 255/128 = 1 pixel value = 1 when it is on. # Devide dataset to 2 sets: one for training and one for testing mnist_train = mx.gluon.data.vision.MNIST(train=True, transform=transform) mnist_test = mx.gluon.data.vision.MNIST(train=False, transform=transform) import matplotlib.pyplot as plt %matplotlib inline image_index=8888 # Pick one to take a look. Any integer <60,000 # 8888 is digit with label 3, as printed below print(mnist_train[image_index][1]) #image stored in 0; label in 1 plt.imshow(mnist_train[image_index][0].reshape((28, 28)).\ asnumpy(), cmap='Greys') #image pixel: 28 by 28 # Initialize arrays for counts for computing p(y), p(xi|y) # We initialize all numbers with a count of 1 to avoid # division by zero, known as Laplace smoothing. ycount = nd.ones(shape=(10)) #10 possible digits xcount = nd.ones(shape=(784, 10)) #784 (= 28*28) variables # Aggregate the count of the labels in training dataset # and number of its cooresponding pixels being on (value=1) for data, label in mnist_train: # loop over the dataset x = data.reshape((784,)) y = int(label) # get the digit-number ycount[y] += 1 # add 1 to (digit)th entry xcount[:, y] += x # add the image data to # the (digit)th column # compute the probabilities p(xi|y) (divide per pixel counts # by total count of the label in the training dataset) for i in range(10): xcount[:, i] = xcount[:, i]/ycount[i] # Compute the probability p(y) py = ycount / nd.sum(ycount)

Testing

from PIL import Image, ImageOps from numpy import asarray from matplotlib import pyplot as plt logxcount = nd.log(xcount) # pre-computations logxcountneg = nd.log(1-xcount) logpy = nd.log(py) fig, figarr = plt.subplots(2, 2, figsize=(15, 3)) # test and show 10 images ctr = 0 # initialize the control iterator y = [] pxm = np.array([]) xi = () # A = mx.img.imread("4_image.jpg",flag = 0) # AA = mx.ndarray.image.resize(A,size =(28,28)) # CC = AA.reshape((28,28)) # k = [1,2] #importing image im1 = nd.array(ImageOps.invert(Image.open('image6.jpg').resize((28,28)).convert('L'))) im2 = nd.array(ImageOps.invert(Image.open('image4.jpg').resize((28,28)).convert('L'))) im1 = (nd.floor(im1/160)).astype(np.float32) im2 = (nd.floor(im2/160)).astype(np.float32) truelable = [6,4] imgdatas = (im1, im2) for j in range(2): data = imgdatas[j] label = float(truelable[j]) x = data.reshape((784,)) y.append(int(label)) # Incorporate the prior probability p(y) since p(y|x) is # proportional to p(x|y) p(y) logpx = logpy.copy() #nd.zeros_like(logpy) for i in range(10): # compute the log probability for a digit logpx[i]+=nd.dot(logxcount[:,i],x)+nd.dot(logxcountneg[:,i],1-x) # normalize to prevent overflow or underflow by subtracting # the largest value logpx -= nd.max(logpx) # and compute the softmax using logpx px = nd.exp(logpx).asnumpy() px = px*py.asnumpy() # this proportional to P(y|x) px /= np.sum(px) pxm = np.append(pxm,max(px)) # use the one with max Pr. xi = np.append(xi,np.where(px == np.amax(px))) # bar chart and image of digit figarr[1, ctr].bar(range(10), px) figarr[1, ctr].axes.get_yaxis().set_visible(False) figarr[0, ctr].imshow(x.reshape((28,28)).asnumpy(),cmap='gray') figarr[0, ctr].axes.get_xaxis().set_visible(False) figarr[0, ctr].axes.get_yaxis().set_visible(False) ctr += 1 np.set_printoptions(formatter={'float': '{: 0.0f}'.format}) plt.show() print('True label: ',y) xi = np.array(xi) print('Predicted digits:',xi) print('Correct?',np.equal(y,xi)) np.set_printoptions(formatter={'float': '{: 0.1f}'.format}) print('Maximum probability:',pxm)

Testing HW code with Cross Entropy instead of Binary Cross Entropy

from PIL import Image, ImageOps from numpy import asarray from matplotlib import pyplot as plt logxcount = nd.log(xcount) # pre-computations logxcountneg = nd.log(1-xcount) logpy = nd.log(py) fig, figarr = plt.subplots(2, 2, figsize=(15, 3)) # test and show 10 images ctr = 0 # initialize the control iterator y = [] pxm = np.array([]) xi = () # A = mx.img.imread("4_image.jpg",flag = 0) # AA = mx.ndarray.image.resize(A,size =(28,28)) # CC = AA.reshape((28,28)) # k = [1,2] #importing image im1 = nd.array(ImageOps.invert(Image.open('image6.jpg').resize((28,28)).convert('L'))) im2 = nd.array(ImageOps.invert(Image.open('image4.jpg').resize((28,28)).convert('L'))) im1 = (nd.floor(im1/160)).astype(np.float32) im2 = (nd.floor(im2/160)).astype(np.float32) truelable = [6,4] imgdatas = (im1, im2) for j in range(2): data = imgdatas[j] label = float(truelable[j]) x = data.reshape((784,)) y.append(int(label)) # Incorporate the prior probability p(y) since p(y|x) is # proportional to p(x|y) p(y) logpx = logpy.copy() #nd.zeros_like(logpy) for i in range(10): # compute the log probability for a digit logpx[i]+=nd.dot(logxcount[:,i],x) # normalize to prevent overflow or underflow by subtracting # the largest value logpx -= nd.max(logpx) # and compute the softmax using logpx px = nd.exp(logpx).asnumpy() px = px*py.asnumpy() # this proportional to P(y|x) px /= np.sum(px) pxm = np.append(pxm,max(px)) # use the one with max Pr. xi = np.append(xi,np.where(px == np.amax(px))) # bar chart and image of digit figarr[1, ctr].bar(range(10), px) figarr[1, ctr].axes.get_yaxis().set_visible(False) figarr[0, ctr].imshow(x.reshape((28,28)).asnumpy(),cmap='gray') figarr[0, ctr].axes.get_xaxis().set_visible(False) figarr[0, ctr].axes.get_yaxis().set_visible(False) ctr += 1 np.set_printoptions(formatter={'float': '{: 0.0f}'.format}) plt.show() print('True label: ',y) xi = np.array(xi) print('Predicted digits:',xi) print('Correct?',np.equal(y,xi)) np.set_printoptions(formatter={'float': '{: 0.1f}'.format}) print('Maximum probability:',pxm)

Shifting the likelihood function from binary cross entropy to cross entropy reduces the accuracy of the model. This is due to the fact that binary cross entropy magnifies the discrepancy thus enhancing the the likelihood function more than the latter.

Binary Cross Entropy = 2 correct predictions

Cross-Entropy = 2 incorrect predictions

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Training

Testing

Testing HW code with Cross Entropy instead of Binary Cross Entropy

Training