se 1903 - Duplicate

import numpy as np from matplotlib import pyplot from scipy import optimize from scipy.io import loadmat data = loadmat('ex5data1.mat') X, y = data['X'], data['y'][:, 0] Xtest, ytest = data['Xtest'], data['ytest'][:, 0] Xval, yval = data['Xval'], data['yval'][:, 0] m = y.size pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1) pyplot.xlabel('Change in water level (x)') pyplot.ylabel('Water flowing out of the dam (y)');

def CostGrad(X, y, theta, lambda_= 0): m = len(y) J = 1/2*m * ((X.dot(theta)-y).T.dot(X.dot(theta)-y)) grad = 1/m * X.T.dot(X.dot(theta) - y) return float(J), grad def LinearGradReg(X, y, theta, learning_rate, epoch): m = len(y) errors = [] for i in range(epoch): J, grad = CostGrad(X, y, theta, lambda_ = 0) errors.append(J) theta = theta - learning_rate * grad return theta, errors

# Add ones to X and change shape of y X_ones = np.concatenate([np.ones((len(X), 1)), X], axis=1) y = y.reshape((len(y), 1)) theta = np.zeros((X_ones.shape[1], 1)) learning_rate = 0.001 epoch = 10000 #train model and get theta theta_found, errors = LinearGradReg(X_ones, y, theta, learning_rate, epoch) print(theta_found) pyplot.plot(range(epoch), errors)

x = np.linspace(-40, 40, 100).reshape((100, 1)) x_ones = np.concatenate([np.ones((len(x), 1)), x], axis=1) y_ = x_ones.dot(theta_found) pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1) pyplot.plot(x, y_, 'b--', ms=10, mec='k', mew=1)

# let's start learning curve error_train = [] error_val = [] Xval_ones = np.concatenate([np.ones((len(Xval),1)), Xval], axis=1) yval = yval.reshape((len(yval), 1)) for i in range(1, len(X_ones)): theta = np.zeros((X_ones.shape[1], 1)) learning_rate = 0.001 epoch = 10000 #train the model on trainig examples theta, _ = LinearGradReg(X_ones[:i, :], y[:i, :], theta, learning_rate, epoch) err_train, _ = CostGrad(X_ones[:i, :], y[:i, :], theta, lambda_= 0) error_train.append(err_train) # get the errrors on the validatoin set err_val, _ = CostGrad(Xval_ones, yval, theta, lambda_= 0) error_val.append(err_val)

pyplot.plot(range(len(error_train)), np.sqrt(error_train)) pyplot.plot(range(len(error_train)), np.sqrt(error_val))

# polynomial regression from sklearn.preprocessing import PolynomialFeatures from sklearn.preprocessing import StandardScaler model = LinearRegression() scaler = StandardScaler() poly = PolynomialFeatures(12) X_poly = poly.fit_transform(X) scaler.fit(X_poly) X_poly_scaled = scaler.transform(X_poly) X_poly_scaled[:, :1] = np.ones((len(X_poly_scaled), 1)) theta = np.zeros((X_poly_scaled.shape[1], 1)) learning_rate = 0.001 epoch = 100000 theta_found, errors = LinearGradReg(X_poly_scaled, y, theta, learning_rate, epoch) #model.fit(X_poly_scaled, y) x = np.linspace(-40, 40, 12).reshape((12, 1)) x_ones_poly = poly.fit_transform(x) x_ones_poly_scaled = scaler.transform(x_ones_poly) x_ones_poly_scaled[:, :1] = np.ones((len(x_ones_poly_scaled), 1)) y_ = x_ones_poly_scaled.dot(theta_found) pyplot.plot(X, y, 'ro', ms=10, mec='k', mew=1) pyplot.plot(x, y_, 'b--', ms=10, mec='k', mew=1)

X_poly_scaled[:, :1]

theta_found