import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as D
from torch.distributions.multivariate_normal import MultivariateNormal
import matplotlib.pyplot as plt
import numpy as np
class MyNetwork(nn.Module):
def __init__(self, num_input_nodes, num_hidden_nodes, num_output_nodes, activation_function):
super().__init__()
# Initialize parameters and create a forward function here:
self.W = nn.Parameter(0.1*torch.randn(num_input_nodes, num_hidden_nodes))
self.b = nn.Parameter(0.1*torch.randn(num_hidden_nodes))
self.c = nn.Parameter(0.1*torch.randn(num_hidden_nodes, num_output_nodes))
self.activation_function=activation_function
def forward(self, input):
hidden = self.activation_function(input@ self.W +self.b)
output = hidden @ self.c
return output
m = MyNetwork(2,10, 1, F.relu)
input = torch.randn((5,2))
m(input)
#list(m.parameters())
# Visualizing our target function
def target_function(x,y):
return (np.sin(x) + np.sin(y))
x = np.linspace(-np.pi,np.pi,100)
y = np.linspace(-np.pi,np.pi,100)
X, Y = np.meshgrid(x, y)
Z = target_function(X,Y)
fig, ax = plt.subplots()
ax.contourf(X,Y, Z, levels = np.linspace(-2, 2, 10))
fig.show()
# Set size of dataset and relative size of train set & test set
num_examples = 5000
train_proportion = 0.8
# Sample uniformly from our area of interest
x = np.random.uniform(-np.pi, np.pi, [num_examples, 2])
y = target_function(x[:,0], x[:,1])
# Convert to tensors
x = torch.tensor(x).float()
y = torch.tensor(y).float()
# Make the train/test split
num_train = int(num_examples * .8) #total # of training examples
num_test = num_examples - num_train
random_indices = torch.randperm(num_examples) #want training and test to be representative of dataset
train_index = random_indices[0:num_train]
test_index = random_indices[num_train:]
# Implement your learning algorithm here:
#python Adam
# Sigmoid Activation
m = MyNetwork(num_input_nodes=2, num_hidden_nodes=50, num_output_nodes=1, activation_function=F.sigmoid)
batch_size = 1 #hyperparameters you should play around with in general (16 datapoints in each batch)
num_epochs = 100 #how long to go through training
learning_rate = 1E-2
num_train_batch = (num_train+batch_size-1)//batch_size
num_test_batch = (num_test+batch_size-1)//batch_size
all_train_loss = [] #initialize training and test loss to store
all_test_loss = []
for epoch in range(num_epochs): #every epoch shuffle data points
train_order = train_index[torch.randperm(num_train)] #we have num_train so this will give us some permutation of indices from 0 to num_train
#not first 80% is training; random 80% is training; take chunks of that as batches over epoch (minibatch)
epoch_train_loss = 0 #add to this while we train within each batch
for batch in range(num_train_batch): #training_batch:
#train network
indices = train_order[batch*batch_size:(batch+1)*batch_size] #batch 0: 0-16 and batch 1: 16-32
x_batch = x[indices] #x and y to train on
y_batch = y[indices]
y_pred=m(x_batch)
loss = ((y_pred-y_batch)**2).mean() #16 by 1 and then becoems 1 with mean
m.zero_grad() #otherwise you add gradient each time you back propagate (zero before backpropagate)
loss.backward()
epoch_train_loss += loss.item() #std python float
for p in m.parameters(): #list of all parameters defined
with torch.no_grad():
p.copy_(p-learning_rate * p.grad) #updating leaf variable
epoch_test_loss = 0
for batch in range(num_test_batch):
test_indices = test_index[batch*batch_size:(batch+1)*batch_size]
x_batch = x[test_indices] #x and y to train on
y_batch = y[test_indices]
with torch.no_grad():
y_pred=m(x_batch)
loss = ((y_pred-y_batch)**2).mean()
epoch_test_loss += loss.item()
#Compute loss on test data set
# Plot train and test loss
all_train_loss.append(epoch_train_loss/num_train_batch)
all_test_loss.append(epoch_test_loss/num_test_batch)
if (epoch % 25 ==0):
plt.plot(all_train_loss, label='Training loss (Sigmoid)')
plt.plot(all_test_loss, label='Test loss (Sigmoid)')
plt.title("Sigmoid Activation Epoch Loss")
plt.ylabel('Epoch Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()
# ReLU activation
m = MyNetwork(num_input_nodes=2, num_hidden_nodes=50, num_output_nodes=1, activation_function=F.relu)
batch_size = 1 #hyperparameters you should play around with in general (16 datapoints in each batch)
num_epochs = 100 #how long to go through training
learning_rate = 1E-2
num_train_batch = (num_train+batch_size-1)//batch_size
num_test_batch = (num_test+batch_size-1)//batch_size
all_train_loss = [] #initialize training and test loss to store
all_test_loss = []
for epoch in range(num_epochs): #every epoch shuffle data points
train_order = train_index[torch.randperm(num_train)] #we have num_train so this will give us some permutation of indices from 0 to num_train
#not first 80% is training; random 80% is training; take chunks of that as batches over epoch (minibatch)
epoch_train_loss = 0 #add to this while we train within each batch
for batch in range(num_train_batch): #training_batch:
#train network
indices = train_order[batch*batch_size:(batch+1)*batch_size] #batch 0: 0-16 and batch 1: 16-32
x_batch = x[indices] #x and y to train on
y_batch = y[indices]
y_pred=m(x_batch)
loss = ((y_pred-y_batch)**2).mean() #16 by 1 and then becoems 1 with mean
m.zero_grad() #otherwise you add gradient each time you back propagate (zero before backpropagate)
loss.backward()
epoch_train_loss += loss.item() #std python float
for p in m.parameters(): #list of all parameters defined
with torch.no_grad():
p.copy_(p-learning_rate * p.grad) #updating leaf variable
epoch_test_loss = 0
for batch in range(num_test_batch):
test_indices = test_index[batch*batch_size:(batch+1)*batch_size]
x_batch = x[test_indices] #x and y to train on
y_batch = y[test_indices]
with torch.no_grad():
y_pred=m(x_batch)
loss = ((y_pred-y_batch)**2).mean()
epoch_test_loss += loss.item()
#Compute loss on test data set
# Plot train and test loss
all_train_loss.append(epoch_train_loss/num_train_batch)
all_test_loss.append(epoch_test_loss/num_test_batch)
if (epoch % 25 ==0):
plt.plot(all_train_loss, label='Training loss (ReLU)')
plt.plot(all_test_loss, label='Test loss (ReLU)')
plt.title("ReLU Activation Epoch Loss")
plt.ylabel('Epoch Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()
# Generate a 2D contour plot of your best solution here:
def target_function(x,y):
return (np.sin(x) + np.sin(y))
x = torch.linspace(-np.pi,np.pi,100)
y = torch.linspace(-np.pi,np.pi,100)
X, Y = torch.meshgrid(x, y)
input = torch.stack((torch.flatten(X), torch.flatten(Y))).T
with torch.no_grad():
predZ = m(input)
Z = np.reshape(predZ.numpy(), [100,100])
fig, ax = plt.subplots()
ax.contourf(X.detach().numpy(),Y.detach().numpy(), Z, levels = np.linspace(-2, 2, 10))
fig.show()