Task 1
Suppose we have the following two lists of values in Python: x = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] y = [4, 7, 11, 16, 22, 29, 38, 49, 63, 80] we’d like to find the y-value associated with a new x-value of 13.
Approach 1 - python implementation
import numpy as np
import matplotlib.pyplot as plt
def estimate_coefficient(x, y):
n = np.size(x)
m_x = np.mean(x)
m_y = np.mean(y)
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
slope = SS_xy / SS_xx
intercept = m_y - slope*m_x
return (intercept, slope)
x = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20])
y = np.array([4, 7, 11, 16, 22, 29, 38, 49, 63, 80])
intercept,slope = estimate_coefficient(x, y)
def predict(x_val, slope, intercept):
return round(slope * x_val + intercept)
y_pred = predict(13, slope , intercept)
print(y_pred)
Approach 2 - scipy library
import matplotlib.pyplot as plt
from scipy import stats
x = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
y = [4, 7, 11, 16, 22, 29, 38, 49, 63, 80]
slope, intercept, r, p, std_err = stats.linregress(x, y)
def model(x_val):
return slope * x_val + intercept
mymodel = list(map(model, x))
plt.scatter(x, y)
plt.plot(x, mymodel)
plt.show()
y_pred2 = round(model(13))
print(y_pred2)
Metrics
r
y_predicted_list = [round(model(x_val)) for x_val in x]
y_predicted_list
import pandas as pd
data = { 'actual' : y , 'predicted' : y_predicted_list}
df = pd.DataFrame(data)
df.head(10)
from sklearn.metrics import mean_squared_error,r2_score, mean_absolute_error
from math import sqrt
mse = mean_squared_error(y,y_predicted_list)
rmse = round(sqrt(mse))
r2 = r2_score(y,y_predicted_list)
mse, rmse, r2
Approach 3 - sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
x = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20]).reshape(-1, 1)
y = np.array([4, 7, 11, 16, 22, 29, 38, 49, 63, 80]).reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)
regr = LinearRegression()
regr.fit(X_train, y_train)
def calc(slope, intercept, x_val):
return slope*x_val+intercept
score = calc(regr.coef_, regr.intercept_, 13)
print(score[0][0])
y_pred_list = regr.predict(X_test)
plt.scatter(X_test, y_test, color ='b')
plt.plot(X_test, y_pred_list, color ='k')
plt.show()