import numpy as np
from matplotlib import pyplot
from scipy import optimize
from scipy.io import loadmat
data = loadmat('ex5data1.mat')
X, y = data['X'], data['y'][:, 0]
Xtest, ytest = data['Xtest'], data['ytest'][:, 0]
Xval, yval = data['Xval'], data['yval'][:, 0]
m = y.size
pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1)
pyplot.xlabel('Change in water level (x)')
pyplot.ylabel('Water flowing out of the dam (y)');
def CostGrad(X, y, theta, lambda_= 0):
m = len(y)
J = 1/2*m * ((X.dot(theta)-y).T.dot(X.dot(theta)-y))
grad = 1/m * X.T.dot(X.dot(theta) - y)
return float(J), grad
def LinearGradReg(X, y, theta, learning_rate, epoch):
m = len(y)
errors = []
for i in range(epoch):
J, grad = CostGrad(X, y, theta, lambda_ = 0)
errors.append(J)
theta = theta - learning_rate * grad
return theta, errors
# Add ones to X and change shape of y
X_ones = np.concatenate([np.ones((len(X), 1)), X], axis=1)
y = y.reshape((len(y), 1))
theta = np.zeros((X_ones.shape[1], 1))
learning_rate = 0.001
epoch = 10000
#train model and get theta
theta_found, errors = LinearGradReg(X_ones, y, theta, learning_rate, epoch)
print(theta_found)
pyplot.plot(range(epoch), errors)
x = np.linspace(-40, 40, 100).reshape((100, 1))
x_ones = np.concatenate([np.ones((len(x), 1)), x], axis=1)
y_ = x_ones.dot(theta_found)
pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1)
pyplot.plot(x, y_, 'b--', ms=10, mec='k', mew=1)
# let's start learning curve
error_train = []
error_val = []
Xval_ones = np.concatenate([np.ones((len(Xval),1)), Xval], axis=1)
yval = yval.reshape((len(yval), 1))
for i in range(1, len(X_ones)):
theta = np.zeros((X_ones.shape[1], 1))
learning_rate = 0.001
epoch = 10000
#train the model on trainig examples
theta, _ = LinearGradReg(X_ones[:i, :], y[:i, :], theta, learning_rate, epoch)
err_train, _ = CostGrad(X_ones[:i, :], y[:i, :], theta, lambda_= 0)
error_train.append(err_train)
# get the errrors on the validatoin set
err_val, _ = CostGrad(Xval_ones, yval, theta, lambda_= 0)
error_val.append(err_val)
pyplot.plot(range(len(error_train)), np.sqrt(error_train))
pyplot.plot(range(len(error_train)), np.sqrt(error_val))
# polynomial regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
model = LinearRegression()
scaler = StandardScaler()
poly = PolynomialFeatures(12)
X_poly = poly.fit_transform(X)
scaler.fit(X_poly)
X_poly_scaled = scaler.transform(X_poly)
X_poly_scaled[:, :1] = np.ones((len(X_poly_scaled), 1))
theta = np.zeros((X_poly_scaled.shape[1], 1))
learning_rate = 0.001
epoch = 100000
theta_found, errors = LinearGradReg(X_poly_scaled, y, theta, learning_rate, epoch)
#model.fit(X_poly_scaled, y)
x = np.linspace(-40, 40, 12).reshape((12, 1))
x_ones_poly = poly.fit_transform(x)
x_ones_poly_scaled = scaler.transform(x_ones_poly)
x_ones_poly_scaled[:, :1] = np.ones((len(x_ones_poly_scaled), 1))
y_ = x_ones_poly_scaled.dot(theta_found)
pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1)
pyplot.plot(x, y_, 'b--', ms=10, mec='k', mew=1)
X_poly_scaled[:, :1]
theta_found