#Performing linear Regression using Scikit learn
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
x = 2 * np.random.rand(100, 1)
y = 4 + 3 * x + np.random.rand(100,1)
lin_reg = LinearRegression()
lin_reg.fit(x, y)
lin_reg.intercept_, lin_reg.coef_
plt.plot(x, y, 'b.')
plt.plot(x, lin_reg.predict(x), 'r-')
plt.title('Linear Regression')
plt.xlabel('x values')
plt.ylabel('y values')
#Performing linear regression using Gradient Descent
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
learning_rate = 0.1
n_iterations = 1000
m = 100
theta = np.random.randn(2,1)
print(theta)
# #generating some linear looking data
np.random.seed(0)
x = 2 * np.random.rand(100,1)
y = 4 + 3 * x + np.random.rand(100,1)
print(x)
print(y)
x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance
print(x_b)
for iteration in range(n_iterations):
gradients = 2/m * x_b.T.dot(x_b.dot(theta) - y)
theta = theta -learning_rate * gradients
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#generating some linear looking data
np.random.seed(0)
x = 2 * np.random.rand(100,1)
y = 4 + 3 * x + np.random.rand(100,1)
# print(x)
# print(y)
x_b = np.c_[np.ones((100,1)), x]#to add x0 = 1 to each instance
theta_best = np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y)#Normal equation
print('The value of theta that minimizes the cost function is: {}'.format(theta_best))
#making prediction using theta_best
x_new = np.array([[0],[1],[2]])
x_new_b = np.c_[np.ones((3,1)), x_new]
y_predict = x_new_b.dot(theta_best)
#plotting the model's predictions
plt.plot(x_new, y_predict, 'g-')
plt.plot(x, y, 'r.')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Models predictions')
plt.axis([0, 2, 0, 15])
plt.show()
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
m = 100
x_values = 6 * np.random.rand(m, 1) - 3
y_values = 0.5 * x_values **2 + x_values + 2 + np.random.randn(m, 1)
# theta = theta2 * x_square + theta1 + 2 * theta0
poly_reg = PolynomialFeatures(degree=2)
# print(poly_reg)
X_poly = poly_reg.fit_transform(x_values)
# print(x_values[0])
# print(X_poly[0])
pol_reg = LinearRegression()
pol_reg.fit(X_poly, y_values)
pol_reg.intercept_, pol_reg.coef_
#Visualizing the Polymonial Regression results
def viz_polymonial():
plt.scatter(x_values, y_values, color='red')
plt.plot(x_values, pol_reg.predict(poly_reg.fit_transform(x_values)), color='blue')
plt.title('(Linear Regression)')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.show()
return
viz_polymonial()
# For the below We are just exporting and naming them individually but not passing the hyperparameter
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
lin_reg_1 = LinearRegression()
poly_reg_1 = PolynomialFeatures()
#Here we will use the data of the above linear and polynomial regression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
def plot_learning_curves_linear(model, x, y,colors):
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2)
train_errors, val_errors = [], []
for m in range (1, len(x_train)):
model.fit(x_train[:m], y_train[:m])
y_train_predict = model.predict(x_train[:m])
y_val_predict = model.predict(x_val[:m])
train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
val_errors.append(mean_squared_error(y_val[:m], y_val_predict))
plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train')
plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val')
def plot_learning_curves_poly(model, x, y,colors):
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2)
train_errors, val_errors = [], []
for m in range (1, len(x_train)):
X_poly = model.fit_transform(x_train[:m])
lin_reg_1.fit(X_poly, y_train[:m])
y_train_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_train[:m]))
y_val_predict = lin_reg_1.predict(poly_reg_1.fit_transform(x_val[:m]))
train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
val_errors.append(mean_squared_error(y_val[:m], y_val_predict))
plt.plot(np.sqrt(train_errors), colors[0], linewidth = 2, label = 'train')
plt.plot(np.sqrt(val_errors), colors[1], linewidth = 2, label = 'val')
plt.title('Learning Curves using Linear Regression')
plot_learning_curves_linear(lin_reg_1, x_values, y_values,colors=['b--','r-+'])
plt.legend(['train', 'val'])
plt.ylabel("RMSE")
plt.xlabel("Training Set Size")
plt.show()
plt.title('Learning Curves Using Polynomial Regression ')
plot_learning_curves_poly(poly_reg_1,x_values , y_values,colors=['#3c3c3c','g-+'] )
plt.legend(['train', 'val'])
plt.ylabel("RMSE")
plt.xlabel("Training Set Size")
plt.show()
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
Y_actual = [5, -1, 2, 10]
Y_predic = [3.5, -0.9, 2, 9.9]
print ('R Squared =',r2_score(Y_actual, Y_predic)) # The more near to 1 the better is our model
print ('MAE =',mean_absolute_error(Y_actual, Y_predic))
print ('MSE =',mean_squared_error(Y_actual, Y_predic))