import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as D
from torch.distributions.multivariate_normal import MultivariateNormal
import matplotlib.pyplot as plt
import numpy as np
from torch.nn import Parameter as Params
class MyNetwork(nn.Module):
def __init__(self, num_input_nodes, num_hidden_nodes, num_output_nodes, activation_function):
super().__init__()
self.W = Params(torch.ones(num_input_nodes, num_hidden_nodes),requires_grad=True)
self.b = Params(torch.ones(num_hidden_nodes), requires_grad=True)
self.c = Params(torch.ones(num_hidden_nodes, num_output_nodes), requires_grad=True)
self.activation_function = activation_function
# Initialize parameters and create a forward function here:
def forward(self, x):
output_activation = self.activation_function(x @ self.W + self.b)
return output_activation @ self.c
NN = MyNetwork(2,2,1, F.relu)
input_foo = torch.randn((5, 2))
NN(input_foo)
# Visualizing our target function
def target_function(x,y):
return (np.sin(x) + np.sin(y))
x = np.linspace(-np.pi,np.pi,100)
y = np.linspace(-np.pi,np.pi,100)
X, Y = np.meshgrid(x, y)
Z = target_function(X,Y)
fig, ax = plt.subplots()
ax.contourf(X,Y, Z, levels = np.linspace(-2, 2, 10))
fig.show()
# Set size of dataset and relative size of train set & test set
num_examples = 5000
train_proportion = 0.8
# Sample uniformly from our area of interest
x = np.random.uniform(-np.pi, np.pi, [num_examples, 2])
y = target_function(x[:,0], x[:,1])
# Convert to tensors
x = torch.tensor(x)
y = torch.tensor(y)
# Make the train/test split
num_train = int(num_examples * .8)
num_test = num_examples - num_train
random_indices = torch.randperm(num_examples)
train_index = random_indices[0:num_train]
test_index = random_indices[num_train:]
# Implement your learning algorithm here:
NN = MyNetwork(2, 1, 1, F.relu)
lr = .01
n_batch = len(train_index)
n_epochs= 3
all_loss = []
for epoch in range(n_epochs):
print("epoch: ",epoch)
train_index_shuffled = torch.randperm(len(train_index))
# number of entries per batch
batch_count = int(len(train_index_shuffled)/n_batch)
for step in range(10):
index_before = 0
for batch in range(n_batch):
# get the current training indices
train_index_batch = train_index_shuffled[index_before:index_before+batch_count]
# training data
x_train = x[train_index_batch]
y_train = y[train_index_batch]
preds = NN(x_train.float())
loss = ((preds.flatten() - y_train)**2).mean()
#loss = loss_func(preds, y.float())
all_loss.append(loss.item())
NN.zero_grad() # zeroes the gradient so you can recalc based on loss
loss.backward() # gets gradient of parameters
for param in NN.parameters():
with torch.no_grad():
param.copy_(param - lr*param.grad)
index_before += batch_count
# make_plots(LC.params.detach(), a_data, b_data)
# Generate a 2D contour plot of your best solution here:
# Visualizing our target function
x = np.linspace(-np.pi,np.pi,100)
y = np.linspace(-np.pi,np.pi,100)
X, Y = np.meshgrid(x,y)
# generate the matrix Z the least efficient way I know how
Z = np.zeros((len(x), len(y)))
for i in range(len(x)):
for j in range(len(y)):
Z[i][j] = NN(
torch.tensor([[float(x[i]), float(y[j])]])
)
fig, ax = plt.subplots()
ax.contourf(X,Y, Z, levels = np.linspace(-2, 2, 10))
fig.show()