Importing the libs and data
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
Preparing and normalizing the data
X, y = mnist["data"], mnist["target"]
train_images, test_images, train_labels, test_labels = X[:60000], X[60000:], y[:60000], y[60000:]
X_train = train_images.reshape(60000, 784)
mean = np.mean(X_train)
stddev = np.std(X_train)
X_train = (X_train - mean) / stddev
X_test = test_images.reshape(10000, 784)
X_test = (X_test - mean) / stddev
y_train = np.zeros((60000, 10))
y_test = np.zeros((10000, 10))
for i, y in enumerate(train_labels):
y_train[i][int(y)] = 1
for i, y in enumerate(test_labels):
y_test[i][int(y)] = 1
index_list = list(range(len(X_train)))
Creating the neural network
Defining the learning rate
np.random.seed(7)
LEARNING_RATE = 0.01 #Learning Rate is the value by which the gradient descent algorithm evolves and updates
# the weights of each neuron
EPOCHS = 20 #defining the number of epochs by which we are going to train our neural network
Initializing the weights
#We initiate the weights randomly with a uniform law within a [-0.1, 0.1] intervall
#Never initialize the weights using 0, because the gradient descent algorithm will never launch off
def layer_weights_init(number_of_neurons, number_of_inputs):
w_matrix = np.zeros((number_of_neurons, number_of_inputs+1)) #the +1 is for the bias weight
for i in range(number_of_neurons):
for j in range(1,(number_of_inputs+1)):
w_matrix[i][j]=np.random.uniform(-0.1, 0.1)
return w_matrix
W_1 = layer_weights_init(25, 784)
R_1 = np.zeros(25)
Error_1 = np.zeros(25)
W_Out = layer_weights_init(10, 25)
R_Out = np.zeros(10)
Error_Out = np.zeros(10)
def feedforward(x): #Forward Propagation where apply the activation functions to the weighted sum of the inputs
global R_1
global R_Out
for i, w in enumerate(W_1):
z = np.dot(w,x)
R_1[i] = np.tanh(z)
R_1_array = np.concatenate((np.array([1.0]), R_1))
for i, w in enumerate(W_Out):
z = np.dot(w,R_1_array)
R_Out[i] = 1.0 / (1.0 + np.exp(-z))
def backpropagation(y_real): #We back propagate the loss over the neurons and over the weights
global Error_1
global Error_Out
for i, y in enumerate(R_Out):
Loss_derivative = -(y_real[i] -y) # This is the derivative of the loss function
sigmoid_prime = y * (1.0-y) # This is the derivative of the sigmoid which is sigmoid_prime = sigmoid *(1-sigmoid)
Error_Out[i] = Loss_derivative * sigmoid_prime
for i, y in enumerate(R_1):
error_weights = [0]*W_Out
for j, w in enumerate(W_Out):
error_weights[j] = w[i+1]
error_weight_array = np.array(error_weights)
tanh_derivative = 1.0 - y**2
weighted_error = np.dot(error_weight_array, Error_Out)
Error_1[i] = weighted_error * tanh_derivative
def adjust_weights(x): #Adjusting weights using SGD
global W_Out
global W_1
for i, error in enumerate(Error_1):
W_1[i] -= (x * LEARNING_RATE * error)
R_1_array = np.concatenate(
(np.array([1.0]), R_1))
for i, error in enumerate(Error_Out):
W_Out[i] -= (R_1_array * LEARNING_RATE * error)
chart_x = []
chart_y_train = []
chart_y_test = []
def show_learning(epoch_no, train_acc, test_acc):
global chart_x
global chart_y_train
global chart_y_test
print('epoch no:', epoch_no, ', train_acc: ',
'%6.4f' % train_acc,
', test_acc: ', '%6.4f' % test_acc)
chart_x.append(epoch_no + 1)
chart_y_train.append(1.0 - train_acc)
chart_y_test.append(1.0 - test_acc)
def plot_learning():
plt.plot(chart_x, chart_y_train, 'b++', label='training error')
plt.plot(chart_x, chart_y_test, 'g++', label='test error')
plt.axis([0, len(chart_x), 0.0, 1.0])
plt.xlabel('training epochs')
plt.ylabel('error')
plt.legend()
plt.show()
# Network training loop.
for i in range(EPOCHS): # Train EPOCHS iterations
np.random.shuffle(index_list) # Randomize order
correct_training_results = 0
for j in index_list: # Train on all examples
x = np.concatenate((np.array([1.0]), X_train[j]))
feedforward(x)
if R_Out.argmax() == y_train[j].argmax():
correct_training_results += 1
backpropagation(y_train[j])
adjust_weights(x)
correct_test_results = 0
for j in range(len(X_test)): # Evaluate network
x = np.concatenate((np.array([1.0]), X_test[j]))
feedforward(x)
if R_Out.argmax() == y_test[j].argmax():
correct_test_results += 1
show_learning(i, correct_training_results/len(X_train),
correct_test_results/len(X_test))
plot_learning()