Importing the libs and data

import numpy as np import matplotlib.pyplot as plt import seaborn as sns

from sklearn.datasets import fetch_openml mnist = fetch_openml('mnist_784', version=1, as_frame=False)

Preparing and normalizing the data

X, y = mnist["data"], mnist["target"] train_images, test_images, train_labels, test_labels = X[:60000], X[60000:], y[:60000], y[60000:]

X_train = train_images.reshape(60000, 784) mean = np.mean(X_train) stddev = np.std(X_train) X_train = (X_train - mean) / stddev X_test = test_images.reshape(10000, 784) X_test = (X_test - mean) / stddev

y_train = np.zeros((60000, 10)) y_test = np.zeros((10000, 10)) for i, y in enumerate(train_labels): y_train[i][int(y)] = 1 for i, y in enumerate(test_labels): y_test[i][int(y)] = 1 index_list = list(range(len(X_train)))

Creating the neural network

Defining the learning rate

np.random.seed(7) LEARNING_RATE = 0.01 #Learning Rate is the value by which the gradient descent algorithm evolves and updates # the weights of each neuron EPOCHS = 20 #defining the number of epochs by which we are going to train our neural network

Initializing the weights

#We initiate the weights randomly with a uniform law within a [-0.1, 0.1] intervall #Never initialize the weights using 0, because the gradient descent algorithm will never launch off def layer_weights_init(number_of_neurons, number_of_inputs): w_matrix = np.zeros((number_of_neurons, number_of_inputs+1)) #the +1 is for the bias weight for i in range(number_of_neurons): for j in range(1,(number_of_inputs+1)): w_matrix[i][j]=np.random.uniform(-0.1, 0.1) return w_matrix

W_1 = layer_weights_init(25, 784) R_1 = np.zeros(25) Error_1 = np.zeros(25) W_Out = layer_weights_init(10, 25) R_Out = np.zeros(10) Error_Out = np.zeros(10)

def feedforward(x): #Forward Propagation where apply the activation functions to the weighted sum of the inputs global R_1 global R_Out for i, w in enumerate(W_1): z = np.dot(w,x) R_1[i] = np.tanh(z) R_1_array = np.concatenate((np.array([1.0]), R_1)) for i, w in enumerate(W_Out): z = np.dot(w,R_1_array) R_Out[i] = 1.0 / (1.0 + np.exp(-z)) def backpropagation(y_real): #We back propagate the loss over the neurons and over the weights global Error_1 global Error_Out for i, y in enumerate(R_Out): Loss_derivative = -(y_real[i] -y) # This is the derivative of the loss function sigmoid_prime = y * (1.0-y) # This is the derivative of the sigmoid which is sigmoid_prime = sigmoid *(1-sigmoid) Error_Out[i] = Loss_derivative * sigmoid_prime for i, y in enumerate(R_1): error_weights = [0]*W_Out for j, w in enumerate(W_Out): error_weights[j] = w[i+1] error_weight_array = np.array(error_weights) tanh_derivative = 1.0 - y**2 weighted_error = np.dot(error_weight_array, Error_Out) Error_1[i] = weighted_error * tanh_derivative def adjust_weights(x): #Adjusting weights using SGD global W_Out global W_1 for i, error in enumerate(Error_1): W_1[i] -= (x * LEARNING_RATE * error) R_1_array = np.concatenate( (np.array([1.0]), R_1)) for i, error in enumerate(Error_Out): W_Out[i] -= (R_1_array * LEARNING_RATE * error)

chart_x = [] chart_y_train = [] chart_y_test = [] def show_learning(epoch_no, train_acc, test_acc): global chart_x global chart_y_train global chart_y_test print('epoch no:', epoch_no, ', train_acc: ', '%6.4f' % train_acc, ', test_acc: ', '%6.4f' % test_acc) chart_x.append(epoch_no + 1) chart_y_train.append(1.0 - train_acc) chart_y_test.append(1.0 - test_acc) def plot_learning(): plt.plot(chart_x, chart_y_train, 'b++', label='training error') plt.plot(chart_x, chart_y_test, 'g++', label='test error') plt.axis([0, len(chart_x), 0.0, 1.0]) plt.xlabel('training epochs') plt.ylabel('error') plt.legend() plt.show()

# Network training loop. for i in range(EPOCHS): # Train EPOCHS iterations np.random.shuffle(index_list) # Randomize order correct_training_results = 0 for j in index_list: # Train on all examples x = np.concatenate((np.array([1.0]), X_train[j])) feedforward(x) if R_Out.argmax() == y_train[j].argmax(): correct_training_results += 1 backpropagation(y_train[j]) adjust_weights(x) correct_test_results = 0 for j in range(len(X_test)): # Evaluate network x = np.concatenate((np.array([1.0]), X_test[j])) feedforward(x) if R_Out.argmax() == y_test[j].argmax(): correct_test_results += 1 show_learning(i, correct_training_results/len(X_train), correct_test_results/len(X_test)) plot_learning()

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Importing the libs and data

Preparing and normalizing the data

Creating the neural network

Defining the learning rate

Initializing the weights

Importing the libs and data