Linear Regression
# modules
import numpy as np
import matplotlib.pyplot as plt
def get_dataset(m, b, size):
''' Read the pendient, the constant, and the number
of elements that will be genearted for a linear function
'''
x = np.linspace(1, size, size) # start, end, number of elements
# x = np.arange(-size/2, size/2, 1) # start, end, increment
y = m*x + b
# adding noise to y
noise = np.random.normal(min(-m,m)*size/10, max(-m,m)*size/10, y.shape)
y = y + noise
return x, y
# getting the linear function with <size> elements and plot it
x, y = get_dataset(m=10, b=5, size=1000)
plt.scatter(x, y)
def least_squares(x, y):
''' implement the methof of least squares for
linear regression'''
# calc n (size of x)
n = len(x)
# calc means of x and y
x_mean = np.mean(x)
y_mean = np.mean(y)
# calc the slope
m = np.sum((x - x_mean)*(y - y_mean)) / np.sum((x - x_mean)**2)
# calc the value of b
b = y_mean - x_mean * m
return m, b
m, b = least_squares(x,y)
print(m,b)
y_pred = m*x + b
# original points
plt.scatter(x, y)
# add predicted line
plt.plot(x, y_pred, color = 'r')
# adding tags to axis [optional]
plt.xlabel('Year (Independent)')
plt.ylabel('Cost (Dependent)')
plt.show()
We also can do this with "scipy" module
# scipy implementation
from scipy import stats
slope, intercept, r, p, std_err = stats.linregress(x, y)
print(slope, intercept, r, p, std_err)