import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
class SingleRNN(nn.Module):
def __init__(self, n_inputs, n_neurons):
super(SingleRNN, self).__init__()
# Weights : we initialize the weigths to random values to
# then optimize them and fine the good values
self.Wx = torch.randn(n_inputs, n_neurons) # 4 X 1
self.Wy = torch.randn(n_neurons, n_neurons) # 1 X 1
# Bias: We initialize them to zeros
self.b = torch.zeros(1,n_neurons) # 1 X 4
def forward(self, X0, X1):
# Activation functions is an hyperbolic tangent
# Parameters are a matrix multiplication between the
# weights of inputs (X) and the first input. A bias is also added
self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1
# Activation functions is an hyperbolic tangent
# Parameters are a matrix multiplication between the
# weights of Y and the output of the first activation function added to
# a matrix multiplication between the second input and the weights of the inputs (X)
# A bias is also added.
self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) +
torch.mm(X1, self.Wx) + self.b) # 4 X 1
return self.Y0, self.Y1
N_INPUT = 4
N_NEURONS = 1
# Create features (tensors)
X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0],
[6,7,8,0], [9,0,1,0]],
dtype = torch.float) #t=0 =>4 X 4
X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0],
[6,5,4,0], [3,2,1,0]],
dtype = torch.float) #t=1 =>4 X 4
model = SingleRNN(N_INPUT, N_NEURONS)
# Put our features in the model (fitting)
Y0_val, Y1_val = model(X0_batch, X1_batch)
# Here are the outputs for each timestep
print(Y0_val)
print(Y1_val)
# As we can see values as been changed, but are surely not optimized
# Here the code doesn't really change, we only change the parameters in the next block
class BasicRNN(nn.Module):
def __init__(self, n_inputs, n_neurons):
super(BasicRNN, self).__init__()
self.Wx = torch.randn(n_inputs, n_neurons) # 4 X N
self.Wy = torch.randn(n_neurons, n_neurons) # N X N
self.b = torch.zeros(1,n_neurons) # 1 X N
def forward(self, X0, X1):
# Activation functions
self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1
self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) +
torch.mm(X1, self.Wx) + self.b) # 4 X 1
return self.Y0, self.Y1
# For this question we want 4 inputs and 5 neurons
N_INPUT = 4
N_NEURONS = 5
# Create features (tensors)
X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0],
[6,7,8,0], [9,0,1,0]],
dtype = torch.float) #t=0 =>4 X 4
X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0],
[6,5,4,0], [3,2,1,0]],
dtype = torch.float) #t=1 =>4 X 4
model = BasicRNN(N_INPUT, N_NEURONS)
# Put our features in the model (fitting)
Y0_val, Y1_val = model(X0_batch, X1_batch)
# The results are the folowing
print(Y0_val)
print(Y1_val)
# Here are the size of the outputs
print(Y0_val.shape)
print(Y1_val.shape)
rnn = nn.RNNCell(3,5) # n_input X n_neurons
X_batch = torch.tensor([[[0,1,2], [3,4,5],
[6,7,8], [9,0,1]],
[[9,8,7], [0,0,0],
[6,5,4], [3,2,1]]
], dtype = torch.float) # X0 and X1
hx = torch.randn(4 ,5)
output = []
# for each time step
for i in range(2):
hx = rnn(X_batch[i], hx)
output.append(hx)
print(output)
class CleanBasicRNN(nn.Module):
def __init__(self, batch_size, n_inputs, n_neurons):
super(CleanBasicRNN, self).__init__()
rnn = nn.RNNCell(n_inputs, n_neurons)
self.hx = torch.randn(batch_size, n_neurons)
def forward(self, X):
output = []
# for each time step
for i in range(2):
self.hx = rnn(X[i], self.hx)
output.append(self.hx)
return output, self.hx
FIXED_BATCH_SIZE = 4 # our batch size is fixed for now
N_INPUT = 3
N_NEURONS = 5
X_batch = torch.tensor([[[0,1,2], [3,4,5],
[6,7,8], [9,0,1]],
[[9,8,7], [0,0,0],
[6,5,4], [3,2,1]]
], dtype = torch.float) # X0 and X1
model = CleanBasicRNN(FIXED_BATCH_SIZE, N_INPUT, N_NEURONS)
output_val, states_val = model(X_batch)
print(output_val) # contains all output for all timesteps
print(states_val) # contain values for final state or final timestep, i.e., t=1
The CleanBasicRNN automatically sets the right number of neurons. The number of layers inside the model is set automatically
import torchvision
import torchvision.transforms as transforms
from ipywidgets import IntProgress
BATCH_SIZE = 64
# list all transformations
transform = transforms.Compose(
[transforms.ToTensor()])
# download and load training dataset
trainset = torchvision.datasets.MNIST(root="./data", train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
# img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1,2,0)))
#get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
#show images
imshow(torchvision.utils.make_grid(images))
N_STEPS = 28
N_INPUTS = 28
N_NEURONS = 150
N_OUTPUTS = 10
N_EPOCHS = 20
class ImageRNN(nn.Module):
def __init__(self, batch_size, n_steps, n_inputs, n_neurons, n_outputs):
super(ImageRNN, self).__init__()
# Set all the parameters of the model
# Number of neurons
self.n_neurons = n_neurons
# Size of the batch (number of training exemples used in a single iteration)
self.batch_size = batch_size
# Time series
self.n_steps = n_steps
# number of inputs and outputs
self.n_inputs = n_inputs
self.n_outputs = n_outputs
# Sets the rnn as a basic model
self.basic_rnn = nn.RNN(self.n_inputs, self.n_neurons)
# Apply a linear transformation on neurons and outputs
# Generates the fully connected layer
self.FC = nn.Linear(self.n_neurons, n_outputs)
def init_hidden(self,):
#(num_layers, batch_size, n_neurons)
# Initialize the hidden layer to a
# Batch_size X N_NEURONS matrix full of zeros
return (torch.zeros(1, self.batch_size, self.n_neurons))
def forward(self, X):
# transforms X to dimensions : n_steps X batch_size X n_inputs
# Rotate the input tensor
X=X.permute(1,0,2)
# Set the batch size to the size of the first dimension size of X
self.batch_size = X.size(1)
# Initialize the hidden layer to a
# Batch_size X N_NEURONS matrix full of zeros
self.hidden = self.init_hidden()
# lstm_out => n_steps, batch_size, n_neurons (hidden states for each time step)
# self.hidden => 1, batch_size, n_neurons (final state from each lstm_out)
# Runs the basic_rnn to generate the output of this unit and the hidden layer
# thanks to the input and the hidden layer
lstm_out, self.hidden = self.basic_rnn(X, self.hidden)
# Set the output to the full connection of the hidden layer
out = self.FC(self.hidden)
# Return the output
return out.view(-1,self.n_outputs) # batch_size X n_output
dataiter = iter(trainloader)
images, labels = dataiter.next()
model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS)
logits = model(images.view(-1,28,28))
print(logits[0:10])
import torch.optim as optim
#Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Model instance
model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def get_accuracy(logit, target, batch_size):
''' Obtain accuracy for training round '''
corrects = (torch.max(logit,1)[1].view(target.size()).data == target.data).sum()
accuracy =100.0 * corrects/batch_size
return accuracy.item()
for epoch in range(N_EPOCHS): # loop over the dataset multiple times
train_running_loss = 0.0
train_acc = 0.0
model.train()
#Training Round
for i,data in enumerate(trainloader):
#zero the parameter gradients
optimizer.zero_grad()
#reset hidden states
model.hidden = model.init_hidden()
#get the inputs
inputs, labels = data
inputs = inputs.view(-1,28,28)
#forward + backward + optimize
outputs = model(inputs)
loss= criterion(outputs, labels)
loss.backward()
optimizer.step()
train_running_loss+=loss.detach().item()
train_acc+=get_accuracy(outputs,labels,BATCH_SIZE)
model.eval()
print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f'
%(epoch, train_running_loss / i, train_acc/i))
# download and load test dataset
testset = torchvision.datasets.MNIST(root="./data", train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
dataiter = iter(testloader)
images, labels = dataiter.next()
logits = model(images.view(-1,28,28))
test_running_loss = 0
test_acc = 0
i=0
#Testing Round
for i,data in enumerate(testloader):
#get the inputs
inputs, labels = data
inputs = inputs.view(-1,28,28)
#forward + backward + optimize
outputs = model(inputs)
loss= criterion(outputs, labels)
loss.backward()
test_running_loss+=loss.detach().item()
test_acc+=get_accuracy(outputs,labels,BATCH_SIZE)
print("Test accuracy : " + str(test_acc/i))
print("Test loss : " + str(test_running_loss/i))