i-CES ML Session

#Performing linear Regression using Scikit learn import numpy as np from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt x = 2 * np.random.rand(100, 1) y = 4 + 3 * x + np.random.rand(100,1) lin_reg = LinearRegression() lin_reg.fit(x, y) lin_reg.intercept_, lin_reg.coef_ plt.plot(x, y, 'b.') plt.plot(x, lin_reg.predict(x), 'r-') plt.title('Linear Regression') plt.xlabel('x values') plt.ylabel('y values')

#Performing linear regression using Gradient Descent import numpy as np import matplotlib.pyplot as plt %matplotlib inline learning_rate = 0.1 n_iterations = 1000 m = 100 theta = np.random.randn(2,1) print(theta) # #generating some linear looking data np.random.seed(0) x = 2 * np.random.rand(100,1) y = 4 + 3 * x + np.random.rand(100,1) print(x) print(y) x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance print(x_b) for iteration in range(n_iterations): gradients = 2/m * x_b.T.dot(x_b.dot(theta) - y) theta = theta -learning_rate * gradients

[[ 0.69115545]
 [-0.32253251]]
[[1.09762701]
 [1.43037873]
 [1.20552675]
 [1.08976637]
 [0.8473096 ]
 [1.29178823]
 [0.87517442]
 [1.783546  ]
 [1.92732552]
 [0.76688304]
 [1.58345008]
 [1.05778984]
 [1.13608912]
 [1.85119328]
 [0.14207212]
 [0.1742586 ]
 [0.04043679]
 [1.66523969]
 [1.5563135 ]
 [1.7400243 ]
 [1.95723668]
 [1.59831713]
 [0.92295872]
 [1.56105835]
 [0.23654885]
 [1.27984204]
 [0.28670657]
 [1.88933783]
 [1.04369664]
 [0.82932388]
 [0.52911122]
 [1.54846738]
 [0.91230066]
 [1.1368679 ]
 [0.0375796 ]
 [1.23527099]
 [1.22419145]
 [1.23386799]
 [1.88749616]
 [1.3636406 ]
 [0.7190158 ]
 [0.87406391]
 [1.39526239]
 [0.12045094]
 [1.33353343]
 [1.34127574]
 [0.42076512]
 [0.2578526 ]
 [0.6308567 ]
 [0.72742154]
 [1.14039354]
 [0.87720303]
 [1.97674768]
 [0.20408962]
 [0.41775351]
 [0.32261904]
 [1.30621665]
 [0.50658321]
 [0.93262155]
 [0.48885118]
 [0.31793917]
 [0.22075028]
 [1.31265918]
 [0.2763659 ]
 [0.39316472]
 [0.73745034]
 [1.64198646]
 [0.19420255]
 [1.67588981]
 [0.19219682]
 [1.95291893]
 [0.9373024 ]
 [1.95352218]
 [1.20969104]
 [1.47852716]
 [0.07837558]
 [0.56561393]
 [0.24039312]
 [0.5922804 ]
 [0.23745544]
 [0.63596636]
 [0.82852599]
 [0.12829499]
 [1.38494424]
 [1.13320291]
 [0.53077898]
 [1.04649611]
 [0.18788102]
 [1.15189299]
 [1.8585924 ]
 [0.6371379 ]
 [1.33482076]
 [0.26359572]
 [1.43265441]
 [0.57881219]
 [0.36638272]
 [1.17302587]
 [0.04021509]
 [1.65788006]
 [0.00939095]]
[[ 7.97069756]
 [ 8.56114417]
 [ 8.35177428]
 [ 8.23148764]
 [ 6.79068194]
 [ 8.45152201]
 [ 7.2175652 ]
 [ 9.92288991]
 [10.0050582 ]
 [ 7.25339812]
 [ 9.19747561]
 [ 8.01977819]
 [ 8.10774664]
 [ 9.85101678]
 [ 5.24001417]
 [ 4.91928154]
 [ 5.00241358]
 [ 9.57699195]
 [ 9.55067587]
 [ 9.91260448]
 [10.59696433]
 [ 9.29627577]
 [ 7.72495981]
 [ 9.32716526]
 [ 5.1335016 ]
 [ 8.44591934]
 [ 4.87931292]
 [ 9.96958832]
 [ 7.79126347]
 [ 6.77804925]
 [ 6.2053491 ]
 [ 9.07417084]
 [ 6.87237606]
 [ 7.70888602]
 [ 4.68270371]
 [ 8.29668574]
 [ 8.24689959]
 [ 8.3548048 ]
 [10.31459174]
 [ 8.52234023]
 [ 7.053594  ]
 [ 6.98975359]
 [ 8.6216521 ]
 [ 5.25327618]
 [ 8.80679428]
 [ 8.7277158 ]
 [ 5.36252225]
 [ 5.6930404 ]
 [ 6.60681141]
 [ 7.18111163]
 [ 7.57062893]
 [ 7.49973514]
 [10.09273596]
 [ 5.22782843]
 [ 5.37708052]
 [ 5.81586534]
 [ 8.72596891]
 [ 6.08885035]
 [ 7.20504793]
 [ 5.53572055]
 [ 5.65124628]
 [ 5.11579353]
 [ 8.66003314]
 [ 5.69548003]
 [ 6.15501568]
 [ 7.06815437]
 [ 8.93767346]
 [ 4.94258572]
 [ 9.75766001]
 [ 4.74822012]
 [10.3797934 ]
 [ 6.8662452 ]
 [10.06056305]
 [ 7.64759491]
 [ 9.22927918]
 [ 4.45905144]
 [ 6.04219346]
 [ 5.64926066]
 [ 6.48125559]
 [ 4.74420524]
 [ 6.07259323]
 [ 7.10705637]
 [ 4.96211357]
 [ 8.39272554]
 [ 8.33382272]
 [ 6.2063029 ]
 [ 7.67512112]
 [ 5.15355304]
 [ 8.185801  ]
 [ 9.88772218]
 [ 6.30963478]
 [ 8.21430603]
 [ 4.97698018]
 [ 9.24233561]
 [ 6.47598735]
 [ 5.58960698]
 [ 7.74649224]
 [ 4.37500176]
 [ 9.03166934]
 [ 4.46258948]]
[[1.         1.09762701]
 [1.         1.43037873]
 [1.         1.20552675]
 [1.         1.08976637]
 [1.         0.8473096 ]
 [1.         1.29178823]
 [1.         0.87517442]
 [1.         1.783546  ]
 [1.         1.92732552]
 [1.         0.76688304]
 [1.         1.58345008]
 [1.         1.05778984]
 [1.         1.13608912]
 [1.         1.85119328]
 [1.         0.14207212]
 [1.         0.1742586 ]
 [1.         0.04043679]
 [1.         1.66523969]
 [1.         1.5563135 ]
 [1.         1.7400243 ]
 [1.         1.95723668]
 [1.         1.59831713]
 [1.         0.92295872]
 [1.         1.56105835]
 [1.         0.23654885]
 [1.         1.27984204]
 [1.         0.28670657]
 [1.         1.88933783]
 [1.         1.04369664]
 [1.         0.82932388]
 [1.         0.52911122]
 [1.         1.54846738]
 [1.         0.91230066]
 [1.         1.1368679 ]
 [1.         0.0375796 ]
 [1.         1.23527099]
 [1.         1.22419145]
 [1.         1.23386799]
 [1.         1.88749616]
 [1.         1.3636406 ]
 [1.         0.7190158 ]
 [1.         0.87406391]
 [1.         1.39526239]
 [1.         0.12045094]
 [1.         1.33353343]
 [1.         1.34127574]
 [1.         0.42076512]
 [1.         0.2578526 ]
 [1.         0.6308567 ]
 [1.         0.72742154]
 [1.         1.14039354]
 [1.         0.87720303]
 [1.         1.97674768]
 [1.         0.20408962]
 [1.         0.41775351]
 [1.         0.32261904]
 [1.         1.30621665]
 [1.         0.50658321]
 [1.         0.93262155]
 [1.         0.48885118]
 [1.         0.31793917]
 [1.         0.22075028]
 [1.         1.31265918]
 [1.         0.2763659 ]
 [1.         0.39316472]
 [1.         0.73745034]
 [1.         1.64198646]
 [1.         0.19420255]
 [1.         1.67588981]
 [1.         0.19219682]
 [1.         1.95291893]
 [1.         0.9373024 ]
 [1.         1.95352218]
 [1.         1.20969104]
 [1.         1.47852716]
 [1.         0.07837558]
 [1.         0.56561393]
 [1.         0.24039312]
 [1.         0.5922804 ]
 [1.         0.23745544]
 [1.         0.63596636]
 [1.         0.82852599]
 [1.         0.12829499]
 [1.         1.38494424]
 [1.         1.13320291]
 [1.         0.53077898]
 [1.         1.04649611]
 [1.         0.18788102]
 [1.         1.15189299]
 [1.         1.8585924 ]
 [1.         0.6371379 ]
 [1.         1.33482076]
 [1.         0.26359572]
 [1.         1.43265441]
 [1.         0.57881219]
 [1.         0.36638272]
 [1.         1.17302587]
 [1.         0.04021509]
 [1.         1.65788006]
 [1.         0.00939095]]

import numpy as np import matplotlib.pyplot as plt %matplotlib inline #generating some linear looking data np.random.seed(0) x = 2 * np.random.rand(100,1) y = 4 + 3 * x + np.random.rand(100,1) # print(x) # print(y) x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance theta_best = np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y)#Normal equation print('The value of theta that minimizes the cost function is: {}'.format(theta_best)) #making prediction using theta_best x_new = np.array([[0],[1],[2]]) x_new_b = np.c_[np.ones((3,1)), x_new] y_predict = x_new_b.dot(theta_best) #plotting the model's predictions plt.plot(x_new, y_predict, 'g-') plt.plot(x, y, 'r.') plt.xlabel('x') plt.ylabel('y') plt.title('Models predictions') plt.axis([0, 2, 0, 15]) plt.show()

The value of theta that minimizes the cost function is: [[4.55808002]
 [2.96827553]]

from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression m = 100 x_values = 6 * np.random.rand(m, 1) - 3 y_values = 0.5 * x_values **2 + x_values + 2 + np.random.randn(m, 1) # theta = theta2 * x_square + theta1 + 2 * theta0 poly_reg = PolynomialFeatures(degree=2) # print(poly_reg) X_poly = poly_reg.fit_transform(x_values) # print(x_values[0]) # print(X_poly[0]) pol_reg = LinearRegression() pol_reg.fit(X_poly, y_values) pol_reg.intercept_, pol_reg.coef_ #Visualizing the Polymonial Regression results def viz_polymonial(): plt.scatter(x_values, y_values, color='red') plt.plot(x_values, pol_reg.predict(poly_reg.fit_transform(x_values)), color='blue') plt.title('(Linear Regression)') plt.xlabel('x_values') plt.ylabel('y_values') plt.show() return viz_polymonial()

# For the below We are just exporting and naming them individually but not passing the hyperparameter from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression lin_reg_1 = LinearRegression() poly_reg_1 = PolynomialFeatures()

#Here we will use the data of the above linear and polynomial regression from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def plot_learning_curves_linear(model, x, y,colors): x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2) train_errors, val_errors = [], [] for m in range (1, len(x_train)): model.fit(x_train[:m], y_train[:m]) y_train_predict = model.predict(x_train[:m]) y_val_predict = model.predict(x_val[:m]) train_errors.append(mean_squared_error(y_train[:m], y_train_predict)) val_errors.append(mean_squared_error(y_val[:m], y_val_predict)) plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train') plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val') def plot_learning_curves_poly(model, x, y,colors): x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2) train_errors, val_errors = [], [] for m in range (1, len(x_train)): X_poly = model.fit_transform(x_train[:m]) lin_reg_1.fit(X_poly, y_train[:m]) y_train_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_train[:m])) y_val_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_val[:m])) train_errors.append(mean_squared_error(y_train[:m], y_train_predict)) val_errors.append(mean_squared_error(y_val[:m], y_val_predict)) plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train') plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val') plt.title('Learning Curves using Linear Regression') plot_learning_curves_linear(lin_reg_1, x_values, y_values,colors=['b--','r-+']) plt.legend(['train', 'val']) plt.ylabel("RMSE") plt.xlabel("Training Set Size") plt.show() plt.title('Learning Curves Using Polynomial Regression ') plot_learning_curves_poly(poly_reg_1,x_values , y_values,colors=['#3c3c3c','g-+'] ) plt.legend(['train', 'val']) plt.ylabel("RMSE") plt.xlabel("Training Set Size") plt.show()

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error Y_actual = [5, -1, 2, 10] Y_predic = [3.5, -0.9, 2, 9.9] print ('R Squared =',r2_score(Y_actual, Y_predic)) # The more near to 1 the better is our model print ('MAE =',mean_absolute_error(Y_actual, Y_predic)) print ('MSE =',mean_squared_error(Y_actual, Y_predic))

R Squared = 0.9656060606060606
MAE = 0.42499999999999993
MSE = 0.5674999999999999