Linear Regression

# modules import numpy as np import matplotlib.pyplot as plt

def get_dataset(m, b, size): ''' Read the pendient, the constant, and the number of elements that will be genearted for a linear function ''' x = np.linspace(1, size, size) # start, end, number of elements # x = np.arange(-size/2, size/2, 1) # start, end, increment y = m*x + b # adding noise to y noise = np.random.normal(min(-m,m)*size/10, max(-m,m)*size/10, y.shape) y = y + noise return x, y

# getting the linear function with <size> elements and plot it x, y = get_dataset(m=10, b=5, size=1000) plt.scatter(x, y)

def least_squares(x, y): ''' implement the methof of least squares for linear regression''' # calc n (size of x) n = len(x) # calc means of x and y x_mean = np.mean(x) y_mean = np.mean(y) # calc the slope m = np.sum((x - x_mean)*(y - y_mean)) / np.sum((x - x_mean)**2) # calc the value of b b = y_mean - x_mean * m return m, b

m, b = least_squares(x,y) print(m,b) y_pred = m*x + b # original points plt.scatter(x, y) # add predicted line plt.plot(x, y_pred, color = 'r') # adding tags to axis [optional] plt.xlabel('Year (Independent)') plt.ylabel('Cost (Dependent)') plt.show()

We also can do this with "scipy" module

# scipy implementation from scipy import stats slope, intercept, r, p, std_err = stats.linregress(x, y) print(slope, intercept, r, p, std_err)