import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
class SingleRNN(nn.Module):
def __init__(self, n_inputs, n_neurons):
super(SingleRNN, self).__init__()
# Weights : we initialize the weigths to random values to
# then optimize them and fine the good values
self.Wx = torch.randn(n_inputs, n_neurons) # 4 X 1
self.Wy = torch.randn(n_neurons, n_neurons) # 1 X 1
# Bias: We initialize them to zeros
self.b = torch.zeros(1,n_neurons) # 1 X 4
def forward(self, X0, X1):
# Activation functions is an hyperbolic tangent
# Parameters are a matrix multiplication between the
# weights of inputs (X) and the first input. A bias is also added
self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1
# Activation functions is an hyperbolic tangent
# Parameters are a matrix multiplication between the
# weights of Y and the output of the first activation function added to
# a matrix multiplication between the second input and the weights of the inputs (X)
# A bias is also added.
self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) +
torch.mm(X1, self.Wx) + self.b) # 4 X 1
return self.Y0, self.Y1
N_INPUT = 4
N_NEURONS = 1
# Create features (tensors)
X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0],
[6,7,8,0], [9,0,1,0]],
dtype = torch.float) #t=0 =>4 X 4
X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0],
[6,5,4,0], [3,2,1,0]],
dtype = torch.float) #t=1 =>4 X 4
model = SingleRNN(N_INPUT, N_NEURONS)
# Put our features in the model (fitting)
Y0_val, Y1_val = model(X0_batch, X1_batch)
# Here are the outputs for each timestep
print(Y0_val)
print(Y1_val)
# As we can see values as been changed, but are surely not optimized
tensor([[ 0.8484],
[ 0.9860],
[ 0.9988],
[-0.1968]])
tensor([[ 0.9892],
[ 0.2069],
[ 0.8886],
[-0.0658]])
# Here the code doesn't really change, we only change the parameters in the next block
class BasicRNN(nn.Module):
def __init__(self, n_inputs, n_neurons):
super(BasicRNN, self).__init__()
self.Wx = torch.randn(n_inputs, n_neurons) # 4 X N
self.Wy = torch.randn(n_neurons, n_neurons) # N X N
self.b = torch.zeros(1,n_neurons) # 1 X N
def forward(self, X0, X1):
# Activation functions
self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1
self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) +
torch.mm(X1, self.Wx) + self.b) # 4 X 1
return self.Y0, self.Y1
# For this question we want 4 inputs and 5 neurons
N_INPUT = 4
N_NEURONS = 5
# Create features (tensors)
X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0],
[6,7,8,0], [9,0,1,0]],
dtype = torch.float) #t=0 =>4 X 4
X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0],
[6,5,4,0], [3,2,1,0]],
dtype = torch.float) #t=1 =>4 X 4
model = BasicRNN(N_INPUT, N_NEURONS)
# Put our features in the model (fitting)
Y0_val, Y1_val = model(X0_batch, X1_batch)
# The results are the folowing
print(Y0_val)
print(Y1_val)
tensor([[ 0.2443, 0.7840, 0.8778, -0.9996, 0.9070],
[-0.9999, 0.9938, -0.4803, -1.0000, 1.0000],
[-1.0000, 0.9998, -0.9841, -1.0000, 1.0000],
[-1.0000, 0.9999, -1.0000, 1.0000, 0.5333]])
tensor([[-1.0000, 0.5178, -1.0000, -1.0000, 1.0000],
[-0.8933, -0.9949, 0.9877, -0.7445, 0.5170],
[-1.0000, -0.3789, -0.9807, -0.9999, 1.0000],
[-0.9999, 0.1996, -0.9677, -0.9977, 0.9996]])
# Here are the size of the outputs
print(Y0_val.shape)
print(Y1_val.shape)
torch.Size([4, 5])
torch.Size([4, 5])
rnn = nn.RNNCell(3,5) # n_input X n_neurons
X_batch = torch.tensor([[[0,1,2], [3,4,5],
[6,7,8], [9,0,1]],
[[9,8,7], [0,0,0],
[6,5,4], [3,2,1]]
], dtype = torch.float) # X0 and X1
hx = torch.randn(4 ,5)
output = []
# for each time step
for i in range(2):
hx = rnn(X_batch[i], hx)
output.append(hx)
print(output)
[tensor([[ 0.8830, 0.7922, -0.4431, 0.6012, -0.0894],
[ 0.9626, 0.9902, -0.9350, 0.7653, 0.8687],
[ 1.0000, 0.9998, -0.9993, 0.7274, 0.9894],
[-0.7742, 0.8852, -0.8363, 0.9938, 0.9964]], grad_fn=<TanhBackward>), tensor([[ 0.9978, 0.9999, -0.9993, 0.9978, 0.9965],
[ 0.7137, 0.4797, -0.0521, 0.2943, -0.7326],
[ 0.9793, 0.9964, -0.9892, 0.9866, 0.9549],
[ 0.5320, 0.9152, -0.6547, 0.9437, 0.8390]], grad_fn=<TanhBackward>)]
class CleanBasicRNN(nn.Module):
def __init__(self, batch_size, n_inputs, n_neurons):
super(CleanBasicRNN, self).__init__()
rnn = nn.RNNCell(n_inputs, n_neurons)
self.hx = torch.randn(batch_size, n_neurons)
def forward(self, X):
output = []
# for each time step
for i in range(2):
self.hx = rnn(X[i], self.hx)
output.append(self.hx)
return output, self.hx
FIXED_BATCH_SIZE = 4 # our batch size is fixed for now
N_INPUT = 3
N_NEURONS = 5
X_batch = torch.tensor([[[0,1,2], [3,4,5],
[6,7,8], [9,0,1]],
[[9,8,7], [0,0,0],
[6,5,4], [3,2,1]]
], dtype = torch.float) # X0 and X1
model = CleanBasicRNN(FIXED_BATCH_SIZE, N_INPUT, N_NEURONS)
output_val, states_val = model(X_batch)
print(output_val) # contains all output for all timesteps
print(states_val) # contain values for final state or final timestep, i.e., t=1
[tensor([[ 0.9913, 0.8704, -0.3220, -0.3551, -0.9046],
[ 0.9993, 0.9968, -0.9770, 0.2204, 0.7289],
[ 0.9988, 0.9999, -0.9978, 0.9851, 0.9933],
[-0.9253, 0.4208, -0.7829, 0.9647, 0.9280]], grad_fn=<TanhBackward>), tensor([[ 0.9960, 0.9999, -0.9995, 0.9982, 0.9973],
[ 0.6172, 0.4531, -0.2126, 0.3779, -0.6614],
[ 0.9821, 0.9966, -0.9870, 0.9853, 0.9457],
[ 0.4642, 0.8953, -0.6693, 0.9302, 0.8592]], grad_fn=<TanhBackward>)]
tensor([[ 0.9960, 0.9999, -0.9995, 0.9982, 0.9973],
[ 0.6172, 0.4531, -0.2126, 0.3779, -0.6614],
[ 0.9821, 0.9966, -0.9870, 0.9853, 0.9457],
[ 0.4642, 0.8953, -0.6693, 0.9302, 0.8592]], grad_fn=<TanhBackward>)
The CleanBasicRNN automatically sets the right number of neurons. The number of layers inside the model is set automatically
import torchvision
import torchvision.transforms as transforms
from ipywidgets import IntProgress
BATCH_SIZE = 64
# list all transformations
transform = transforms.Compose(
[transforms.ToTensor()])
# download and load training dataset
trainset = torchvision.datasets.MNIST(root="./data", train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
/shared-libs/python3.7/py/lib/python3.7/site-packages/torchvision/datasets/mnist.py:498: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:180.)
return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
# img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1,2,0)))
#get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
#show images
imshow(torchvision.utils.make_grid(images))
N_STEPS = 28
N_INPUTS = 28
N_NEURONS = 150
N_OUTPUTS = 10
N_EPOCHS = 20
class ImageRNN(nn.Module):
def __init__(self, batch_size, n_steps, n_inputs, n_neurons, n_outputs):
super(ImageRNN, self).__init__()
# Set all the parameters of the model
# Number of neurons
self.n_neurons = n_neurons
# Size of the batch (number of training exemples used in a single iteration)
self.batch_size = batch_size
# Time series
self.n_steps = n_steps
# number of inputs and outputs
self.n_inputs = n_inputs
self.n_outputs = n_outputs
# Sets the rnn as a basic model
self.basic_rnn = nn.RNN(self.n_inputs, self.n_neurons)
# Apply a linear transformation on neurons and outputs
# Generates the fully connected layer
self.FC = nn.Linear(self.n_neurons, n_outputs)
def init_hidden(self,):
#(num_layers, batch_size, n_neurons)
# Initialize the hidden layer to a
# Batch_size X N_NEURONS matrix full of zeros
return (torch.zeros(1, self.batch_size, self.n_neurons))
def forward(self, X):
# transforms X to dimensions : n_steps X batch_size X n_inputs
# Rotate the input tensor
X=X.permute(1,0,2)
# Set the batch size to the size of the first dimension size of X
self.batch_size = X.size(1)
# Initialize the hidden layer to a
# Batch_size X N_NEURONS matrix full of zeros
self.hidden = self.init_hidden()
# lstm_out => n_steps, batch_size, n_neurons (hidden states for each time step)
# self.hidden => 1, batch_size, n_neurons (final state from each lstm_out)
# Runs the basic_rnn to generate the output of this unit and the hidden layer
# thanks to the input and the hidden layer
lstm_out, self.hidden = self.basic_rnn(X, self.hidden)
# Set the output to the full connection of the hidden layer
out = self.FC(self.hidden)
# Return the output
return out.view(-1,self.n_outputs) # batch_size X n_output
dataiter = iter(trainloader)
images, labels = dataiter.next()
model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS)
logits = model(images.view(-1,28,28))
print(logits[0:10])
tensor([[ 2.9169e-02, -1.1149e-02, 8.8625e-03, -7.6763e-03, -5.2061e-02,
-8.5447e-02, 3.0201e-02, -6.3254e-02, -2.7549e-02, -1.2261e-02],
[ 1.6453e-02, -9.8487e-03, 1.7722e-02, 4.0623e-03, -5.7787e-03,
-7.8731e-02, 2.4465e-02, -4.6391e-02, -3.2010e-02, -1.2370e-02],
[ 2.2632e-02, -1.0352e-02, 1.3139e-02, 1.0243e-02, -3.8938e-03,
-8.9555e-02, 4.3167e-02, -3.3890e-02, -2.9852e-02, -9.8020e-03],
[ 2.3405e-02, -1.0217e-02, 1.2529e-02, 3.7194e-03, -3.4595e-03,
-9.3282e-02, 4.0217e-02, -4.3580e-02, -3.2037e-02, -1.2479e-02],
[ 5.4785e-03, -2.6665e-02, 2.8148e-02, -1.3603e-03, 7.7648e-03,
-7.8847e-02, 1.7233e-02, -5.0675e-02, -7.0606e-02, 5.7491e-03],
[ 1.3987e-02, -9.3011e-03, 1.9609e-02, 7.6693e-03, 1.2730e-03,
-8.0291e-02, 3.1268e-02, -4.6846e-02, -3.8248e-02, -3.3365e-03],
[ 1.3639e-02, -7.7102e-03, 1.9107e-02, 7.4628e-03, -1.0117e-02,
-7.9374e-02, 3.5674e-02, -3.8784e-02, -3.9599e-02, 1.5560e-03],
[ 3.0162e-02, -2.1072e-02, 1.6318e-02, -2.1225e-03, -1.7863e-02,
-8.2486e-02, 3.9402e-02, -4.7962e-02, -3.3162e-02, -7.7961e-03],
[ 1.3820e-02, -1.6137e-03, 1.4968e-02, 1.1088e-02, 3.4086e-07,
-7.9248e-02, 3.1823e-02, -4.1902e-02, -2.5628e-02, 5.9558e-03],
[ 1.1564e-02, -6.5383e-03, 1.8769e-02, 2.8186e-03, -2.0096e-03,
-7.6285e-02, 3.1500e-02, -4.4276e-02, -3.0441e-02, -6.4211e-03]],
grad_fn=<SliceBackward>)
import torch.optim as optim
#Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Model instance
model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def get_accuracy(logit, target, batch_size):
''' Obtain accuracy for training round '''
corrects = (torch.max(logit,1)[1].view(target.size()).data == target.data).sum()
accuracy =100.0 * corrects/batch_size
return accuracy.item()
for epoch in range(N_EPOCHS): # loop over the dataset multiple times
train_running_loss = 0.0
train_acc = 0.0
model.train()
#Training Round
for i,data in enumerate(trainloader):
#zero the parameter gradients
optimizer.zero_grad()
#reset hidden states
model.hidden = model.init_hidden()
#get the inputs
inputs, labels = data
inputs = inputs.view(-1,28,28)
#forward + backward + optimize
outputs = model(inputs)
loss= criterion(outputs, labels)
loss.backward()
optimizer.step()
train_running_loss+=loss.detach().item()
train_acc+=get_accuracy(outputs,labels,BATCH_SIZE)
model.eval()
print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f'
%(epoch, train_running_loss / i, train_acc/i))
Epoch: 0 | Loss: 0.8452 | Train Accuracy: 71.56
Epoch: 1 | Loss: 0.3590 | Train Accuracy: 89.47
Epoch: 2 | Loss: 0.2619 | Train Accuracy: 92.59
Epoch: 3 | Loss: 0.2192 | Train Accuracy: 93.83
Epoch: 4 | Loss: 0.1998 | Train Accuracy: 94.37
Epoch: 5 | Loss: 0.1771 | Train Accuracy: 95.07
Epoch: 6 | Loss: 0.1558 | Train Accuracy: 95.62
Epoch: 7 | Loss: 0.1445 | Train Accuracy: 95.96
Epoch: 8 | Loss: 0.1433 | Train Accuracy: 95.94
Epoch: 9 | Loss: 0.1286 | Train Accuracy: 96.39
Epoch: 10 | Loss: 0.1230 | Train Accuracy: 96.55
Epoch: 11 | Loss: 0.1193 | Train Accuracy: 96.70
Epoch: 12 | Loss: 0.1202 | Train Accuracy: 96.65
Epoch: 13 | Loss: 0.1196 | Train Accuracy: 96.74
Epoch: 14 | Loss: 0.1107 | Train Accuracy: 96.95
Epoch: 15 | Loss: 0.1006 | Train Accuracy: 97.22
Epoch: 16 | Loss: 0.1138 | Train Accuracy: 96.77
Epoch: 17 | Loss: 0.1095 | Train Accuracy: 97.01
Epoch: 18 | Loss: 0.0977 | Train Accuracy: 97.32
Epoch: 19 | Loss: 0.0945 | Train Accuracy: 97.31
# download and load test dataset
testset = torchvision.datasets.MNIST(root="./data", train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
dataiter = iter(testloader)
images, labels = dataiter.next()
logits = model(images.view(-1,28,28))
test_running_loss = 0
test_acc = 0
i=0
#Testing Round
for i,data in enumerate(testloader):
#get the inputs
inputs, labels = data
inputs = inputs.view(-1,28,28)
#forward + backward + optimize
outputs = model(inputs)
loss= criterion(outputs, labels)
loss.backward()
test_running_loss+=loss.detach().item()
test_acc+=get_accuracy(outputs,labels,BATCH_SIZE)
print("Test accuracy : " + str(test_acc/i))
print("Test loss : " + str(test_running_loss/i))
Test accuracy : 97.28565705128206
Test loss : 0.10994659437580058