i-CES ML Session

#Performing linear Regression using Scikit learn import numpy as np from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt x = 2 * np.random.rand(100, 1) y = 4 + 3 * x + np.random.rand(100,1) lin_reg = LinearRegression() lin_reg.fit(x, y) lin_reg.intercept_, lin_reg.coef_ plt.plot(x, y, 'b.') plt.plot(x, lin_reg.predict(x), 'r-') plt.title('Linear Regression') plt.xlabel('x values') plt.ylabel('y values')

#Performing linear regression using Gradient Descent import numpy as np import matplotlib.pyplot as plt %matplotlib inline learning_rate = 0.1 n_iterations = 1000 m = 100 theta = np.random.randn(2,1) print(theta) # #generating some linear looking data np.random.seed(0) x = 2 * np.random.rand(100,1) y = 4 + 3 * x + np.random.rand(100,1) print(x) print(y) x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance print(x_b) for iteration in range(n_iterations): gradients = 2/m * x_b.T.dot(x_b.dot(theta) - y) theta = theta -learning_rate * gradients

import numpy as np import matplotlib.pyplot as plt %matplotlib inline #generating some linear looking data np.random.seed(0) x = 2 * np.random.rand(100,1) y = 4 + 3 * x + np.random.rand(100,1) # print(x) # print(y) x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance theta_best = np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y)#Normal equation print('The value of theta that minimizes the cost function is: {}'.format(theta_best)) #making prediction using theta_best x_new = np.array([[0],[1],[2]]) x_new_b = np.c_[np.ones((3,1)), x_new] y_predict = x_new_b.dot(theta_best) #plotting the model's predictions plt.plot(x_new, y_predict, 'g-') plt.plot(x, y, 'r.') plt.xlabel('x') plt.ylabel('y') plt.title('Models predictions') plt.axis([0, 2, 0, 15]) plt.show()

from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression m = 100 x_values = 6 * np.random.rand(m, 1) - 3 y_values = 0.5 * x_values **2 + x_values + 2 + np.random.randn(m, 1) # theta = theta2 * x_square + theta1 + 2 * theta0 poly_reg = PolynomialFeatures(degree=2) # print(poly_reg) X_poly = poly_reg.fit_transform(x_values) # print(x_values[0]) # print(X_poly[0]) pol_reg = LinearRegression() pol_reg.fit(X_poly, y_values) pol_reg.intercept_, pol_reg.coef_ #Visualizing the Polymonial Regression results def viz_polymonial(): plt.scatter(x_values, y_values, color='red') plt.plot(x_values, pol_reg.predict(poly_reg.fit_transform(x_values)), color='blue') plt.title('(Linear Regression)') plt.xlabel('x_values') plt.ylabel('y_values') plt.show() return viz_polymonial()

# For the below We are just exporting and naming them individually but not passing the hyperparameter from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression lin_reg_1 = LinearRegression() poly_reg_1 = PolynomialFeatures()

#Here we will use the data of the above linear and polynomial regression from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def plot_learning_curves_linear(model, x, y,colors): x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2) train_errors, val_errors = [], [] for m in range (1, len(x_train)): model.fit(x_train[:m], y_train[:m]) y_train_predict = model.predict(x_train[:m]) y_val_predict = model.predict(x_val[:m]) train_errors.append(mean_squared_error(y_train[:m], y_train_predict)) val_errors.append(mean_squared_error(y_val[:m], y_val_predict)) plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train') plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val') def plot_learning_curves_poly(model, x, y,colors): x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2) train_errors, val_errors = [], [] for m in range (1, len(x_train)): X_poly = model.fit_transform(x_train[:m]) lin_reg_1.fit(X_poly, y_train[:m]) y_train_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_train[:m])) y_val_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_val[:m])) train_errors.append(mean_squared_error(y_train[:m], y_train_predict)) val_errors.append(mean_squared_error(y_val[:m], y_val_predict)) plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train') plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val') plt.title('Learning Curves using Linear Regression') plot_learning_curves_linear(lin_reg_1, x_values, y_values,colors=['b--','r-+']) plt.legend(['train', 'val']) plt.ylabel("RMSE") plt.xlabel("Training Set Size") plt.show() plt.title('Learning Curves Using Polynomial Regression ') plot_learning_curves_poly(poly_reg_1,x_values , y_values,colors=['#3c3c3c','g-+'] ) plt.legend(['train', 'val']) plt.ylabel("RMSE") plt.xlabel("Training Set Size") plt.show()

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error Y_actual = [5, -1, 2, 10] Y_predic = [3.5, -0.9, 2, 9.9] print ('R Squared =',r2_score(Y_actual, Y_predic)) # The more near to 1 the better is our model print ('MAE =',mean_absolute_error(Y_actual, Y_predic)) print ('MSE =',mean_squared_error(Y_actual, Y_predic))