import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as D
from torch.distributions.multivariate_normal import MultivariateNormal
import matplotlib.pyplot as plt
import numpy as np
class MyNetwork(nn.Module):
# Initialize parameters and create a forward function here:
def __init__(self, num_input_nodes, num_hidden_nodes, num_output_nodes, activation_function):
super().__init__()
self.W = nn.Parameter(0.1*torch.randn((num_input_nodes,num_hidden_nodes)))
self.b = nn.Parameter(0.1*torch.randn((num_hidden_nodes)))
self.c = nn.Parameter(0.1*torch.randn((num_hidden_nodes,num_output_nodes)))
self.activation_function = activation_function
def forward(self,input):
hidden = self.activation_function(input @ self.W + self.b)
output = hidden @ self.c
return output
# Visualizing our target function
def target_function(x,y):
return (np.sin(x) + np.sin(y))
x = np.linspace(-np.pi,np.pi,100)
y = np.linspace(-np.pi,np.pi,100)
X, Y = np.meshgrid(x, y)
Z = target_function(X,Y)
fig, ax = plt.subplots()
ax.contourf(X,Y, Z, levels = np.linspace(-2, 2, 10))
fig.show()
# Set size of dataset and relative size of train set & test set
num_examples = 5000
train_proportion = 0.8
# Sample uniformly from our area of interest
x = np.random.uniform(-np.pi, np.pi, [num_examples, 2])
y = target_function(x[:,0], x[:,1])
# Convert to tensors
x = torch.tensor(x).float()
y = torch.tensor(y).float()
# Make the train/test split
num_train = int(num_examples * .8)
num_test = num_examples - num_train
random_indices = torch.randperm(num_examples)
train_index = random_indices[0:num_train]
test_index = random_indices[num_train:]
#Define our network
m = MyNetwork(num_input_nodes = 2,num_hidden_nodes=50,num_output_nodes=1,activation_function=F.relu)
# Hyperparameters
batch_size = 1
num_epochs = 10
learning_rate = 0.01
num_train_batch = (num_train + batch_size -1) // batch_size
num_test_batch = (num_test + batch_size -1) // batch_size
all_train_loss = []
all_test_loss = []
for epoch in range(num_epochs):
print('epoch '+str(epoch))
train_order = train_index[torch.randperm(num_train)]
epoch_train_loss = 0
epoch_test_loss = 0
for batch in range(num_train_batch):
#Select data for mini batch
indices = train_order[batch*batch_size:batch*(batch_size+1)]
x_batch = x[indices]
y_batch = y[indices]
#Predict values, compute loss, and backpropagate
y_pred = m(x_batch)
loss = ((y_pred-y_batch)**2).mean()
m.zero_grad()
loss.backward()
#Optimize with SGD
for p in m.parameters():
with torch.no_grad():
p.copy_(p - learning_rate*p.grad)
#Add the loss
epoch_train_loss += loss.item()
all_train_loss += [epoch_train_loss/num_train_batch]
for batch in range(num_test_batch):
#Select data for mini batch
test_indices = test_index[batch*batch_size:batch*(batch_size+1)]
x_batch = x[test_indices]
y_batch = y[test_indices]
with torch.no_grad():
y_pred = m(x_batch)
loss = ((y_pred-y_batch)**2).mean()
#Add the loss
epoch_test_loss += loss.item()
all_test_loss += [epoch_test_loss/num_train_batch]
# #Plot train and test loss
plt.plot(np.arange(len(all_train_loss)),all_train_loss, label='train loss')
plt.plot(np.arange(len(all_test_loss)),all_test_loss, label = 'test loss')
# Generate a 2D contour plot of your best solution here:
x = torch.linspace(-np.pi,np.pi,100)
y = torch.linspace(-np.pi,np.pi,100)
X,Y = torch.meshgrid(x,y)
input = torch.stack((torch.flatten(X), torch.flatten(Y))).T
with torch.no_grad():
predZ = m(input)
Z = np.reshape(predZ.numpy(),[100,100])
Z = target_function(X,Y)
fig,ax = plt.subplots()
ax.contour(X.detach().numpy(),Y.detach().numpy(),Z,levels=np.linspace(-2,2,100))
fig.show()