Practical 1 keys

import numpy as np import torch import matplotlib.pyplot as plt from IPython.display import clear_output import sklearn.datasets as datasets

fig, ax = plt.subplots() # This is the simplest way to create a plot with Pyplot. The 'fig' object represents the entire figure you will # create, and the 'ax' object represents the axes you will plot on. By passing a value or tuple to subplots(), # you can create multiple axes or a grid of axes that all belong to the same figure. Here, we just create # one set of axes by leaving the argument blank. #Let's generate some data to plot: x = np.linspace(0,10,100) y = (x-3)**3 #The plot function plots the data on the axes as a graph ax.plot(x,y) #We also ought to adorn our axes with the appropriate titles and legends: ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_title('A Simple Function') ax.legend(['The Simple Function']) # Legends take a list of labels, in case you've plotted more than one curve on the axes #Though it's not necessary here, we can also adjust the axis limits: #ax.set_xlim(-10,10) #ax.set_ylim(-0,100) #When the plot is ready, use fig.show() fig.show() #If necessary, you can easily save a figure you've generated with plt.savefig()

# You can also plot on 3D axes with Pyplot fig = plt.figure() # We can't pass the projection argument directly to subplot(), so we generate the axes in two steps ax = fig.add_subplot(projection = '3d') # To plot a surface, create a 'meshgrid' of points in the x,y plane: x = np.linspace(0,10,100) y = np.linspace(0,10,100) X,Y = np.meshgrid(x,y) # You can then evaluate a 2D function on the meshgrid coordinates, then plot the surface Z = np.sin(X)+np.cos(Y) ax.plot_surface(X,Y,Z, cmap = 'coolwarm') # Trying out different colormaps is fun! # We can also add axis labels and such: ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') ax.set_title('My Lumpy Function') # For surfaces, it isn't trivial to set up a legend, but you can do so easily for scatter plots and such ax.set_zlim(-1.1,1.1) ax.view_init(20,30) # This sets the perspective angle from which we view the graph

#Defining a PyTorch Tensor x = torch.tensor([1.0, 2.0, 3.0]) print(x) print(x.device) print(x.type()) #If we had a CUDA capable GPU, we could define our tensors on a GPU # x = torcbh.tensor([1.0, 2.0, 3.0], device="cuda:0")

#PyTorch recycles a lot of the syntax from NumPy so we can carry out our familiar NumPy operations on PyTorch tensors x = torch.tensor([1.0, 2.0, 3.0]) x += 5 print(x) print(x.size()) x = torch.tensor(([1, 2], [3, 4])) print(x.T) y = torch.tensor(([1, 3], [5, 6])) print(x @ y) #What does this do? print(x * y) #How is this different from above? x = torch.ones((6)) x.unsqueeze(0) print(x) x = x.unsqueeze(0) print(x) print(x.shape)

def f(x): return x**2 x = torch.linspace(-1,1, 100, requires_grad = True) y = torch.sum(f(x)) # We use the sum function to require backward() to compute the gradient with respect to each element of x y.backward() fig, ax = plt.subplots() ax.plot(x.detach().numpy(), f(x).detach().numpy()) ax.plot(x.detach().numpy(), x.grad.detach().numpy()) ax.set_title('Exercise 1') ax.set_xlabel('x') ax.set_ylabel('y') ax.legend(['Function', 'Gradient']) fig.show() # We could also do it this way x = torch.linspace(-1,1, 100, requires_grad = True) y = f(x) # We don't have to use the sum function here, but... y.backward(torch.ones(100)) # We now need to pass a tensor full of ones here; if we passed x, the gradient would be weighted by x's transformation through the function' fig, ax = plt.subplots() ax.plot(x.detach().numpy(), f(x).detach().numpy()) ax.plot(x.detach().numpy(), x.grad.detach().numpy()) ax.set_title('Exercise 1') ax.set_xlabel('x') ax.set_ylabel('y') ax.legend(['Function', 'Gradient']) fig.show()

# Generate some linearly separable data np.random.seed(3) # Set a fixed RNG seed so everyone's data looks the same n_data = 100 # How many data points we want in each cluster a_data = np.random.multivariate_normal([1.0,1.0], 0.2*np.eye(2), [n_data]) # Generate the data b_data = np.random.multivariate_normal([-1.0,-1.0], 0.2*np.eye(2), [n_data]) # Plot the data (using Pyplot's scatter function) fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'blue') ax.scatter(b_data[:,0], b_data[:,1], color = 'red') ax.set_title("Linearly Separable Data") ax.set_xlabel('x') ax.set_ylabel('y') fig.show()

#Solve the normal equations data = np.concatenate((a_data,b_data),axis = 0) augmented_data = np.concatenate((data, np.ones([2*n_data,1])), axis = 1) targets = np.concatenate((np.zeros(n_data), np.ones(n_data)), axis = 0) params = np.linalg.lstsq(augmented_data, targets, rcond=-1)[0] print("The final parameters are: " + str(params)) #Plot the 2D decision boundary def apply_regression_to_grid_data(x,y,params): augmented_data = np.array([x,y,1], dtype = object) return np.matmul(params, augmented_data) fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'red') ax.scatter(b_data[:,0], b_data[:,1], color = 'blue') x = np.linspace(-2.5,2.5,100) y = np.linspace(-2.5,2.5,100) X,Y = np.meshgrid(x,y) Z = apply_regression_to_grid_data(X,Y,params) contour = ax.contour(X,Y,Z, levels = [0.5], colors = 'black') ax.clabel(contour) ax.legend(['A data', 'B data', 'Decision Boundary']) ax.set_title('Normal Equation Solution') ax.set_xlabel('x') ax.set_ylabel('y') fig.show() #Plot the 3D regression fig = plt.figure() ax = fig.add_subplot(projection = '3d') ax.scatter(a_data[:,0], a_data[:,1], 0, color = 'red') ax.scatter(b_data[:,0], b_data[:,1], 1, color = 'blue') ax.plot_surface(X,Y,Z, cmap = 'coolwarm_r', alpha = 0.4) ax.set_title('Normal Equation Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.legend(['A data', 'B data',]) fig.show() #Return the classification accuracy a_data_augmented = augmented_data[:100] b_data_augmented = augmented_data[100:] accuracy = (np.shape(np.where(np.matmul(a_data_augmented, params) < 0.5))[1] + np.shape(np.where(np.matmul(b_data_augmented, params) > 0.5))[1])/(2*n_data) print("Classification Accuracy: " + str(accuracy))

#Create a Pytorch module to apply the regression: class Linear_Regression(torch.nn.Module): def __init__(self): super().__init__() self.w = torch.nn.Parameter(torch.ones(3)) def forward(self, x): x_augmented = torch.cat((x, torch.ones(len(x),1)), axis = 1) return x_augmented @ self.w #Ready the data data = torch.Tensor(data) targets = torch.Tensor(targets) model = Linear_Regression() optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) loss_history = [] #Apply the optimization for step in range(1000): predictions = model(data) loss = torch.sum(torch.square((predictions - targets))) loss_history.append(loss) loss.backward() optimizer.step() optimizer.zero_grad() params = list(model.parameters())[0] params = params.detach().numpy() print("The final parameters are: " + str(params)) #Plot the loss history fig, ax = plt.subplots() ax.plot(loss_history) ax.set_xlabel('Training Epoch') ax.set_ylabel('Sum of Squares Loss') ax.set_title('Sum of Squares Loss Over Training Time') ax.legend('Model Loss') fig.show() #Plot the 2D decision boundary fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'red') ax.scatter(b_data[:,0], b_data[:,1], color = 'blue') x = np.linspace(-2.5,2.5,100) y = np.linspace(-2.5,2.5,100) X,Y = np.meshgrid(x,y) Z = apply_regression_to_grid_data(X,Y,params) contour = ax.contour(X,Y,Z, levels = [0.5], colors = 'black') ax.clabel(contour) ax.legend(['A data', 'B data', 'Decision Boundary']) ax.set_title('Gradient Descent Solution') ax.set_xlabel('x') ax.set_ylabel('y') fig.show() #Plot the 3D regression fig = plt.figure() ax = fig.add_subplot(projection = '3d') ax.scatter(a_data[:,0], a_data[:,1], 0, color = 'red') ax.scatter(b_data[:,0], b_data[:,1], 1, color = 'blue') ax.plot_surface(X,Y,Z, cmap = 'coolwarm_r', alpha = 0.4) ax.set_title('Gradient Descent Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.legend(['A data', 'B data',]) fig.show() #Return the classification accuracy accuracy = (np.shape(np.where(np.matmul(a_data_augmented, params) < 0.5))[1] + np.shape(np.where(np.matmul(b_data_augmented, params) > 0.5))[1])/(2*n_data) print("Classification Accuracy: " + str(accuracy))

# Generate Two-Moons data np.random.seed(4) # Set a fixed RNG seed so everyone's data looks the same n_data = 100 # How many data points we want in each cluster data, targets = datasets.make_moons((n_data,n_data), shuffle = False, noise = 0.05) # Create the data b_data = data[:n_data] # Label the data a_data = data[n_data:] fig, ax = plt.subplots() # Plot the data ax.scatter(a_data[:,0], a_data[:,1], color = 'blue') ax.scatter(b_data[:,0], b_data[:,1], color = 'red') ax.set_title("Data that is NOT Linearly Separable") ax.set_xlabel('x') ax.set_ylabel('y') fig.show()

# Apply the normal equations here ### This is the exact same code from Exercise 2, with a slight adjustment to axis limits data = np.concatenate((a_data,b_data),axis = 0) augmented_data = np.concatenate((data, np.ones([2*n_data,1])), axis = 1) targets = np.concatenate((np.zeros(n_data), np.ones(n_data)), axis = 0) params = np.linalg.lstsq(augmented_data, targets, rcond=-1)[0] print("The final parameters are: " + str(params)) #Plot the 2D decision boundary def apply_regression_to_grid_data(x,y,params): augmented_data = np.array([x,y,1], dtype = object) return np.matmul(params, augmented_data) fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'red') ax.scatter(b_data[:,0], b_data[:,1], color = 'blue') x = np.linspace(-1.5,2.5,100) y = np.linspace(-1,1.5,100) X,Y = np.meshgrid(x,y) Z = apply_regression_to_grid_data(X,Y,params) contour = ax.contour(X,Y,Z, levels = [0.5], colors = 'black') ax.clabel(contour) ax.legend(['A data', 'B data', 'Decision Boundary']) ax.set_title('Normal Equation Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) fig.show() #Plot the 3D regression fig = plt.figure() ax = fig.add_subplot(projection = '3d') ax.scatter(a_data[:,0], a_data[:,1], 0, color = 'red') ax.scatter(b_data[:,0], b_data[:,1], 1, color = 'blue') ax.plot_surface(X,Y,Z, cmap = 'coolwarm_r', alpha = 0.4) ax.set_title('Normal Equation Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) ax.legend(['A data', 'B data',]) fig.show() #Return the classification accuracy a_data_augmented = augmented_data[:100] b_data_augmented = augmented_data[100:] accuracy = (np.shape(np.where(np.matmul(a_data_augmented, params) < 0.5))[1] + np.shape(np.where(np.matmul(b_data_augmented, params) > 0.5))[1])/(2*n_data) print("Classification Accuracy: " + str(accuracy))

# Apply gradient descent here # Again, this is mostly the same code from Exercise 3. #Ready the data data = torch.Tensor(data) targets = torch.Tensor(targets) model = Linear_Regression() optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) loss_history = [] #Apply the optimization for step in range(1000): predictions = model(data) loss = torch.sum(torch.square((predictions - targets))) loss_history.append(loss) loss.backward() optimizer.step() optimizer.zero_grad() params = list(model.parameters())[0] params = params.detach().numpy() print("The final parameters are: " + str(params)) #Plot the loss history fig, ax = plt.subplots() ax.plot(loss_history) ax.set_xlabel('Training Epoch') ax.set_ylabel('Sum of Squares Loss') ax.set_title('Sum of Squares Loss Over Training Time') ax.legend('Model Loss') fig.show() #Plot the 2D decision boundary def apply_regression_to_grid_data(x,y,params): augmented_data = np.array([x,y,1], dtype = object) return np.matmul(params, augmented_data) fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'red') ax.scatter(b_data[:,0], b_data[:,1], color = 'blue') x = np.linspace(-1.5,2.5,100) y = np.linspace(-1,1.5,100) X,Y = np.meshgrid(x,y) Z = apply_regression_to_grid_data(X,Y,params) contour = ax.contour(X,Y,Z, levels = [0.5], colors = 'black') ax.clabel(contour) ax.legend(['A data', 'B data', 'Decision Boundary']) ax.set_title('Gradient Descent Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) fig.show() #Plot the 3D regression fig = plt.figure() ax = fig.add_subplot(projection = '3d') ax.scatter(a_data[:,0], a_data[:,1], 0, color = 'red') ax.scatter(b_data[:,0], b_data[:,1], 1, color = 'blue') ax.plot_surface(X,Y,Z, cmap = 'coolwarm_r', alpha = 0.4) ax.set_title('Gradient Descent Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) ax.legend(['A data', 'B data',]) fig.show() #Return the classification accuracy a_data_augmented = augmented_data[:100] b_data_augmented = augmented_data[100:] accuracy = (np.shape(np.where(np.matmul(a_data_augmented, params) < 0.5))[1] + np.shape(np.where(np.matmul(b_data_augmented, params) > 0.5))[1])/(2*n_data) print("Classification Accuracy: " + str(accuracy))

# We have to create a new Pytorch module. I chose a cubic basis. class Polynomial_Logistic_Regression(torch.nn.Module): def __init__(self): super().__init__() self.w = torch.nn.Parameter(torch.zeros(2,4)) def forward(self, x): x_augmented = torch.stack((torch.ones(len(x)), x[:,0], x[:,0]**2, x[:,0]**3), axis = 1) y_augmented = torch.stack((torch.ones(len(x)), x[:,1], x[:,1]**2, x[:,1]**3), axis = 1) return torch.sigmoid(x_augmented @ self.w[0] + y_augmented @ self.w[1]) #Ready the data data = torch.Tensor(data) targets = torch.Tensor(targets) model = Polynomial_Logistic_Regression() optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) loss_history = [] #Apply the optimization for step in range(4000): predictions = model(data) loss = torch.nn.BCELoss(reduction="sum")(predictions, targets) # loss = torch.sum(-targets*torch.log(predictions) - (1-targets)*torch.log(1-predictions)) # Maximum Likelihood Loss this time loss_history.append(loss) loss.backward() optimizer.step() optimizer.zero_grad() params = list(model.parameters())[0] params = params.detach().numpy() print("The final parameters are: " + str(params)) #Plot the loss history fig, ax = plt.subplots() ax.plot(loss_history) ax.set_xlabel('Training Epoch') ax.set_ylabel('Maximum Likelihood Loss') ax.set_title('Maximum Likelihood Loss Over Training Time') ax.legend('Model Loss') fig.show() #Plot the 2D decision boundary def apply_nonlinear_regression_to_grid_data(x,y,params): x_augmented = np.array([1, x, x**2, x**3], dtype = object) y_augmented = np.array([1, y, y**2, y**3], dtype = object) log_odds = np.matmul(params[0], x_augmented) + np.matmul(params[1], y_augmented) return (1/(1+np.exp(-log_odds))) fig, ax = plt.subplots() ax.scatter(a_data[:,0], a_data[:,1], color = 'red') ax.scatter(b_data[:,0], b_data[:,1], color = 'blue') x = np.linspace(-1.5,2.5,100) y = np.linspace(-1,1.5,100) X,Y = np.meshgrid(x,y) Z = apply_nonlinear_regression_to_grid_data(X,Y,params) contour = ax.contour(X,Y,Z, levels = [0.5], colors = 'black') ax.clabel(contour) ax.legend(['A data', 'B data', 'Decision Boundary']) ax.set_title('Nonlinear Logistic Regression Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) fig.show() #Plot the 3D regression fig = plt.figure() ax = fig.add_subplot(projection = '3d') ax.scatter(a_data[:,0], a_data[:,1], 0, color = 'red') ax.scatter(b_data[:,0], b_data[:,1], 1, color = 'blue') ax.plot_surface(X,Y,Z, cmap = 'coolwarm_r', alpha = 0.4) ax.set_title('Nonlinear Logistic Regression Solution') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.set_xlim(-1.5, 2.5) ax.set_ylim(-1, 1.5) ax.legend(['A data', 'B data',]) fig.show() #Return the classification accuracy (We can do this a little bit differently now that our outputs are themselves between 0 and 1) predictions = model(data) accuracy = (np.shape(np.where(predictions[:100] < 0.5))[1] + np.shape(np.where(predictions[100:] > 0.5))[1])/(2*n_data) print("Classification Accuracy: " + str(accuracy))