import numpy as np
from matplotlib.pyplot import *
# target function
def f(x):
return np.sin(x)*np.exp(x/5)
xx = np.linspace(0,10, 100)
yy = f(xx)
plot(xx, yy)
# gather some data points which we will fit to
x = np.random.random(10)*10
y = f(x)
plot(xx, yy, ':')
plot(x, y, 'ro')
# define a Gaussian kernel
sig = .75 # length scale
def kernel(x1, x2):
return np.exp(-(x1-x2)**2/(2*sig**2))
N = len(x)
K = np.zeros((N,N))
#print(N)
# https://mccormickml.com/2014/02/26/kernel-regression/
# Now apply the formulas from the lecture to fill in the kernel matrix and
# compute fitting coefficients. Use the function numpy.linalg.lstsq() to solve the linear problem K@c=y
# (look up its documentation, and don't forget that it returns 4 things, but you only need the solution vector)
for i in range(N):
for j in range(N):
K[i,j] = kernel(x[i],x[j]) # Gets gaussian kernel over all points (Gaussian kernel is basically a weighted sum where points close to our chosen point are worth more)
# a @ x = b ours k @ c = y
c = np.linalg.lstsq(K, y)
cpred = c[0] # Saves just the solution vector that we need
summing = 0 # Just var to add up summation portion of the eq
ypred = np.zeros((len(xx))) # Initializes our y_pred array
for i in range(len(xx)): # Runs over all x-vars we want
for j in range(N): # Runs over all the basis functions (aka points) we have
calc = kernel(xx[i], x[j])*cpred[j] # Kernel * c for each basis function
summing = summing + calc # Sums above calc for each basis func
ypred[i] = summing # Saves summation as our pred value
summing = 0 # resets summation var
lam = 1 # strength of regulariser
# Lets do the above w/ the regulator now
c_reg = np.linalg.lstsq((K+lam*np.identity(N)), y)
cpred_reg = c_reg[0]
summing_reg = 0
ypred_reg = np.zeros((len(xx)))
for i in range(len(xx)): # Runs over all x-vars we want
for j in range(N): # Runs over all the basis functions (aka points) we have
calc_reg = kernel(xx[i], x[j])*cpred_reg[j] # Kernel * c for each basis function
summing_reg = summing_reg + calc_reg # Sums above calc for each basis func
ypred_reg[i] = summing_reg # Saves summation as our pred value
summing_reg = 0 # resets summation var
print(cpred_reg.shape)
# now use your coefficients to predict the function on the xx array
# you should get something like the picture below:
plot(xx, yy, ':')
plot(x, y, 'ro')
plot(xx, ypred, 'b')
plot(xx, ypred_reg, 'c')
title("lam = 1")
import pandas
sol = pandas.read_csv("curated-solubility-dataset.csv")
K2 = np.zeros((8000,8000))
lam = .3
#print(N)
def kernelinner(x1, x2, sig):
return (x1-x2)**2/(2*sig**2)
def kernelouter(inner):
return np.exp(-inner)
# https://mccormickml.com/2014/02/26/kernel-regression/
# Now apply the formulas from the lecture to fill in the kernel matrix and
# compute fitting coefficients. Use the function numpy.linalg.lstsq() to solve the linear problem K@c=y
# (look up its documentation, and don't forget that it returns 4 things, but you only need the solution vector)
multisum = 0
for i in range(8000):
for j in range(8000):
for k in range(11):
multi = kernelinner(X[k,i],X[k,j], sigma[k])
multisum = multisum + multi
K2[i,j] = kernelouter(multisum) # Gets gaussian kernel over all points (Gaussian kernel is basically a weighted sum where points close to our chosen point are worth more)
multisum = 0
c2 = np.linalg.lstsq((K2+lam*np.identity(8000)), Y2)
c2_pred = c2[0]
summing2 = 0
summingkern = 0
Ypred = np.zeros((1982)) # test pred size
index = 0
for i in range(8000, 9982): # Runs over all vars we want preds on
for j in range(8000): # Training set size
for k in range(11):
calc2 = kernelinner(X[k,i],X[k,j], sigma[k])
summing2 = summing2 + calc2
kernel = kernelouter(summing2)*c2_pred[j]
summingkern = summingkern + kernel
summing2 = 0
Ypred[index] = summingkern
summingkern = 0
index = index + 1
scatter(Y[8000:], Ypred)
plot([-50,50], [-50,50], 'k--')
xlim(-12,6)
ylim(-12,6)
ylabel('predicted')
xlabel('target')
gca().set_aspect('equal')
np.sqrt(sum((Y[8000:] - Ypred)**2)/len(Ypred))