Kyrstyn_practical3

import torch import torch.nn as nn import torch.nn.functional as F import torch.distributions as D from torch.distributions.multivariate_normal import MultivariateNormal import matplotlib.pyplot as plt import numpy as np

class MyNetwork(nn.Module): def __init__(self, num_input_nodes, num_hidden_nodes, num_output_nodes, activation_function): super().__init__() # Initialize parameters and create a forward function here: self.W = nn.Parameter(0.1*torch.randn(num_input_nodes, num_hidden_nodes)) self.b = nn.Parameter(0.1*torch.randn(num_hidden_nodes)) self.c = nn.Parameter(0.1*torch.randn(num_hidden_nodes, num_output_nodes)) self.activation_function=activation_function def forward(self, input): hidden = self.activation_function(input@ self.W +self.b) output = hidden @ self.c return output m = MyNetwork(2,10, 1, F.relu) input = torch.randn((5,2)) m(input) #list(m.parameters())

# Visualizing our target function def target_function(x,y): return (np.sin(x) + np.sin(y)) x = np.linspace(-np.pi,np.pi,100) y = np.linspace(-np.pi,np.pi,100) X, Y = np.meshgrid(x, y) Z = target_function(X,Y) fig, ax = plt.subplots() ax.contourf(X,Y, Z, levels = np.linspace(-2, 2, 10)) fig.show()

# Set size of dataset and relative size of train set & test set num_examples = 5000 train_proportion = 0.8 # Sample uniformly from our area of interest x = np.random.uniform(-np.pi, np.pi, [num_examples, 2]) y = target_function(x[:,0], x[:,1]) # Convert to tensors x = torch.tensor(x).float() y = torch.tensor(y).float() # Make the train/test split num_train = int(num_examples * .8) #total # of training examples num_test = num_examples - num_train random_indices = torch.randperm(num_examples) #want training and test to be representative of dataset train_index = random_indices[0:num_train] test_index = random_indices[num_train:]

# Implement your learning algorithm here: #python Adam # Sigmoid Activation m = MyNetwork(num_input_nodes=2, num_hidden_nodes=50, num_output_nodes=1, activation_function=F.sigmoid) batch_size = 1 #hyperparameters you should play around with in general (16 datapoints in each batch) num_epochs = 100 #how long to go through training learning_rate = 1E-2 num_train_batch = (num_train+batch_size-1)//batch_size num_test_batch = (num_test+batch_size-1)//batch_size all_train_loss = [] #initialize training and test loss to store all_test_loss = [] for epoch in range(num_epochs): #every epoch shuffle data points train_order = train_index[torch.randperm(num_train)] #we have num_train so this will give us some permutation of indices from 0 to num_train #not first 80% is training; random 80% is training; take chunks of that as batches over epoch (minibatch) epoch_train_loss = 0 #add to this while we train within each batch for batch in range(num_train_batch): #training_batch: #train network indices = train_order[batch*batch_size:(batch+1)*batch_size] #batch 0: 0-16 and batch 1: 16-32 x_batch = x[indices] #x and y to train on y_batch = y[indices] y_pred=m(x_batch) loss = ((y_pred-y_batch)**2).mean() #16 by 1 and then becoems 1 with mean m.zero_grad() #otherwise you add gradient each time you back propagate (zero before backpropagate) loss.backward() epoch_train_loss += loss.item() #std python float for p in m.parameters(): #list of all parameters defined with torch.no_grad(): p.copy_(p-learning_rate * p.grad) #updating leaf variable epoch_test_loss = 0 for batch in range(num_test_batch): test_indices = test_index[batch*batch_size:(batch+1)*batch_size] x_batch = x[test_indices] #x and y to train on y_batch = y[test_indices] with torch.no_grad(): y_pred=m(x_batch) loss = ((y_pred-y_batch)**2).mean() epoch_test_loss += loss.item() #Compute loss on test data set # Plot train and test loss all_train_loss.append(epoch_train_loss/num_train_batch) all_test_loss.append(epoch_test_loss/num_test_batch) if (epoch % 25 ==0): plt.plot(all_train_loss, label='Training loss (Sigmoid)') plt.plot(all_test_loss, label='Test loss (Sigmoid)') plt.title("Sigmoid Activation Epoch Loss") plt.ylabel('Epoch Loss') plt.xlabel('Epoch') plt.legend() plt.show() # ReLU activation m = MyNetwork(num_input_nodes=2, num_hidden_nodes=50, num_output_nodes=1, activation_function=F.relu) batch_size = 1 #hyperparameters you should play around with in general (16 datapoints in each batch) num_epochs = 100 #how long to go through training learning_rate = 1E-2 num_train_batch = (num_train+batch_size-1)//batch_size num_test_batch = (num_test+batch_size-1)//batch_size all_train_loss = [] #initialize training and test loss to store all_test_loss = [] for epoch in range(num_epochs): #every epoch shuffle data points train_order = train_index[torch.randperm(num_train)] #we have num_train so this will give us some permutation of indices from 0 to num_train #not first 80% is training; random 80% is training; take chunks of that as batches over epoch (minibatch) epoch_train_loss = 0 #add to this while we train within each batch for batch in range(num_train_batch): #training_batch: #train network indices = train_order[batch*batch_size:(batch+1)*batch_size] #batch 0: 0-16 and batch 1: 16-32 x_batch = x[indices] #x and y to train on y_batch = y[indices] y_pred=m(x_batch) loss = ((y_pred-y_batch)**2).mean() #16 by 1 and then becoems 1 with mean m.zero_grad() #otherwise you add gradient each time you back propagate (zero before backpropagate) loss.backward() epoch_train_loss += loss.item() #std python float for p in m.parameters(): #list of all parameters defined with torch.no_grad(): p.copy_(p-learning_rate * p.grad) #updating leaf variable epoch_test_loss = 0 for batch in range(num_test_batch): test_indices = test_index[batch*batch_size:(batch+1)*batch_size] x_batch = x[test_indices] #x and y to train on y_batch = y[test_indices] with torch.no_grad(): y_pred=m(x_batch) loss = ((y_pred-y_batch)**2).mean() epoch_test_loss += loss.item() #Compute loss on test data set # Plot train and test loss all_train_loss.append(epoch_train_loss/num_train_batch) all_test_loss.append(epoch_test_loss/num_test_batch) if (epoch % 25 ==0): plt.plot(all_train_loss, label='Training loss (ReLU)') plt.plot(all_test_loss, label='Test loss (ReLU)') plt.title("ReLU Activation Epoch Loss") plt.ylabel('Epoch Loss') plt.xlabel('Epoch') plt.legend() plt.show()

# Generate a 2D contour plot of your best solution here: def target_function(x,y): return (np.sin(x) + np.sin(y)) x = torch.linspace(-np.pi,np.pi,100) y = torch.linspace(-np.pi,np.pi,100) X, Y = torch.meshgrid(x, y) input = torch.stack((torch.flatten(X), torch.flatten(Y))).T with torch.no_grad(): predZ = m(input) Z = np.reshape(predZ.numpy(), [100,100]) fig, ax = plt.subplots() ax.contourf(X.detach().numpy(),Y.detach().numpy(), Z, levels = np.linspace(-2, 2, 10)) fig.show()