Lab 4 Deep Learning

import torch import torch.nn as nn import torch.nn.functional as F import os import numpy as np

class SingleRNN(nn.Module): def __init__(self, n_inputs, n_neurons): super(SingleRNN, self).__init__() # Weights : we initialize the weigths to random values to # then optimize them and fine the good values self.Wx = torch.randn(n_inputs, n_neurons) # 4 X 1 self.Wy = torch.randn(n_neurons, n_neurons) # 1 X 1 # Bias: We initialize them to zeros self.b = torch.zeros(1,n_neurons) # 1 X 4 def forward(self, X0, X1): # Activation functions is an hyperbolic tangent # Parameters are a matrix multiplication between the # weights of inputs (X) and the first input. A bias is also added self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1 # Activation functions is an hyperbolic tangent # Parameters are a matrix multiplication between the # weights of Y and the output of the first activation function added to # a matrix multiplication between the second input and the weights of the inputs (X) # A bias is also added. self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) + torch.mm(X1, self.Wx) + self.b) # 4 X 1 return self.Y0, self.Y1

N_INPUT = 4 N_NEURONS = 1 # Create features (tensors) X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0], [6,7,8,0], [9,0,1,0]], dtype = torch.float) #t=0 =>4 X 4 X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0], [6,5,4,0], [3,2,1,0]], dtype = torch.float) #t=1 =>4 X 4 model = SingleRNN(N_INPUT, N_NEURONS) # Put our features in the model (fitting) Y0_val, Y1_val = model(X0_batch, X1_batch)

# Here are the outputs for each timestep print(Y0_val) print(Y1_val) # As we can see values as been changed, but are surely not optimized

# Here the code doesn't really change, we only change the parameters in the next block class BasicRNN(nn.Module): def __init__(self, n_inputs, n_neurons): super(BasicRNN, self).__init__() self.Wx = torch.randn(n_inputs, n_neurons) # 4 X N self.Wy = torch.randn(n_neurons, n_neurons) # N X N self.b = torch.zeros(1,n_neurons) # 1 X N def forward(self, X0, X1): # Activation functions self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b) # 4 X 1 self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy) + torch.mm(X1, self.Wx) + self.b) # 4 X 1 return self.Y0, self.Y1

# For this question we want 4 inputs and 5 neurons N_INPUT = 4 N_NEURONS = 5 # Create features (tensors) X0_batch = torch.tensor([[0,1,2,0], [3,4,5,0], [6,7,8,0], [9,0,1,0]], dtype = torch.float) #t=0 =>4 X 4 X1_batch = torch.tensor([[9,8,7,0], [0,0,0,0], [6,5,4,0], [3,2,1,0]], dtype = torch.float) #t=1 =>4 X 4 model = BasicRNN(N_INPUT, N_NEURONS) # Put our features in the model (fitting) Y0_val, Y1_val = model(X0_batch, X1_batch)

# The results are the folowing print(Y0_val) print(Y1_val)

# Here are the size of the outputs print(Y0_val.shape) print(Y1_val.shape)

rnn = nn.RNNCell(3,5) # n_input X n_neurons X_batch = torch.tensor([[[0,1,2], [3,4,5], [6,7,8], [9,0,1]], [[9,8,7], [0,0,0], [6,5,4], [3,2,1]] ], dtype = torch.float) # X0 and X1 hx = torch.randn(4 ,5) output = [] # for each time step for i in range(2): hx = rnn(X_batch[i], hx) output.append(hx) print(output)

class CleanBasicRNN(nn.Module): def __init__(self, batch_size, n_inputs, n_neurons): super(CleanBasicRNN, self).__init__() rnn = nn.RNNCell(n_inputs, n_neurons) self.hx = torch.randn(batch_size, n_neurons) def forward(self, X): output = [] # for each time step for i in range(2): self.hx = rnn(X[i], self.hx) output.append(self.hx) return output, self.hx

FIXED_BATCH_SIZE = 4 # our batch size is fixed for now N_INPUT = 3 N_NEURONS = 5 X_batch = torch.tensor([[[0,1,2], [3,4,5], [6,7,8], [9,0,1]], [[9,8,7], [0,0,0], [6,5,4], [3,2,1]] ], dtype = torch.float) # X0 and X1 model = CleanBasicRNN(FIXED_BATCH_SIZE, N_INPUT, N_NEURONS) output_val, states_val = model(X_batch) print(output_val) # contains all output for all timesteps print(states_val) # contain values for final state or final timestep, i.e., t=1

The CleanBasicRNN automatically sets the right number of neurons. The number of layers inside the model is set automatically

import torchvision import torchvision.transforms as transforms from ipywidgets import IntProgress BATCH_SIZE = 64 # list all transformations transform = transforms.Compose( [transforms.ToTensor()]) # download and load training dataset trainset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): # img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1,2,0))) #get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() #show images imshow(torchvision.utils.make_grid(images))

N_STEPS = 28 N_INPUTS = 28 N_NEURONS = 150 N_OUTPUTS = 10 N_EPOCHS = 20

class ImageRNN(nn.Module): def __init__(self, batch_size, n_steps, n_inputs, n_neurons, n_outputs): super(ImageRNN, self).__init__() # Set all the parameters of the model # Number of neurons self.n_neurons = n_neurons # Size of the batch (number of training exemples used in a single iteration) self.batch_size = batch_size # Time series self.n_steps = n_steps # number of inputs and outputs self.n_inputs = n_inputs self.n_outputs = n_outputs # Sets the rnn as a basic model self.basic_rnn = nn.RNN(self.n_inputs, self.n_neurons) # Apply a linear transformation on neurons and outputs # Generates the fully connected layer self.FC = nn.Linear(self.n_neurons, n_outputs) def init_hidden(self,): #(num_layers, batch_size, n_neurons) # Initialize the hidden layer to a # Batch_size X N_NEURONS matrix full of zeros return (torch.zeros(1, self.batch_size, self.n_neurons)) def forward(self, X): # transforms X to dimensions : n_steps X batch_size X n_inputs # Rotate the input tensor X=X.permute(1,0,2) # Set the batch size to the size of the first dimension size of X self.batch_size = X.size(1) # Initialize the hidden layer to a # Batch_size X N_NEURONS matrix full of zeros self.hidden = self.init_hidden() # lstm_out => n_steps, batch_size, n_neurons (hidden states for each time step) # self.hidden => 1, batch_size, n_neurons (final state from each lstm_out) # Runs the basic_rnn to generate the output of this unit and the hidden layer # thanks to the input and the hidden layer lstm_out, self.hidden = self.basic_rnn(X, self.hidden) # Set the output to the full connection of the hidden layer out = self.FC(self.hidden) # Return the output return out.view(-1,self.n_outputs) # batch_size X n_output

dataiter = iter(trainloader) images, labels = dataiter.next() model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS) logits = model(images.view(-1,28,28)) print(logits[0:10])

import torch.optim as optim #Device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Model instance model = ImageRNN(BATCH_SIZE, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) def get_accuracy(logit, target, batch_size): ''' Obtain accuracy for training round ''' corrects = (torch.max(logit,1)[1].view(target.size()).data == target.data).sum() accuracy =100.0 * corrects/batch_size return accuracy.item()

for epoch in range(N_EPOCHS): # loop over the dataset multiple times train_running_loss = 0.0 train_acc = 0.0 model.train() #Training Round for i,data in enumerate(trainloader): #zero the parameter gradients optimizer.zero_grad() #reset hidden states model.hidden = model.init_hidden() #get the inputs inputs, labels = data inputs = inputs.view(-1,28,28) #forward + backward + optimize outputs = model(inputs) loss= criterion(outputs, labels) loss.backward() optimizer.step() train_running_loss+=loss.detach().item() train_acc+=get_accuracy(outputs,labels,BATCH_SIZE) model.eval() print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' %(epoch, train_running_loss / i, train_acc/i))

# download and load test dataset testset = torchvision.datasets.MNIST(root="./data", train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) dataiter = iter(testloader) images, labels = dataiter.next() logits = model(images.view(-1,28,28)) test_running_loss = 0 test_acc = 0 i=0 #Testing Round for i,data in enumerate(testloader): #get the inputs inputs, labels = data inputs = inputs.view(-1,28,28) #forward + backward + optimize outputs = model(inputs) loss= criterion(outputs, labels) loss.backward() test_running_loss+=loss.detach().item() test_acc+=get_accuracy(outputs,labels,BATCH_SIZE) print("Test accuracy : " + str(test_acc/i)) print("Test loss : " + str(test_running_loss/i))