Starter Project 👋

#Import libraries import numpy as np import matplotlib.pyplot as plt

#Unpack data data_1 = np.loadtxt('ex1data1.txt', delimiter=',') data_2 = np.loadtxt('ex1data2.txt', delimiter=',')

#Task A #Setup training data for Task A task_A_X_population = data_1[:, 0] task_A_Y_profit = data_1[:, 1] #Ploting the graph plt.scatter(task_A_X_population, task_A_Y_profit) plt.xlabel("Population in city (in 10,000s)") plt.ylabel("Profit of a food truck (in $10,000s)") plt.show() """ For sure, this graph is not readable for any information. There is no obvious trends and lines that could represent something informative """

#Setting up training enviroment learning_rate = 0.01 epoch = 1500 m = len(task_A_X_population) thethas = [0,0] cost_function_result = [] """ What is the learning rate? Learning rate is a step coefficient of how fast our theta will increase/decrease. What if I will set small or larger values for alpha (Explain)? For the smaller value of learning rate (further lr.) the calculations and convergence would go slow. On the other side, if the lr. is big, the cost function J(thetas) could never converge What is the epoch? Is the number of iterations where the gradient descent would be used to figure out the best thetas for our hypothesis What if I will set too large or too small values for epoch? The too large number of epoch gives us inefficient countings that don't lead to any result. However, when the number of epoch is small, you could not come to a desired result """ for _ in range(epoch): Y_pred = thethas[0] + thethas[1] * task_A_X_population cost_function = (1/(2*m)) * sum(np.square(task_A_Y_profit - Y_pred)) cost_function_result.append(cost_function) gradient_0 = (-1/m) * sum(task_A_Y_profit - Y_pred) gradient_1 = (-1/m) * sum((task_A_Y_profit - Y_pred) * task_A_X_population) thethas[0] = thethas[0] - (learning_rate * gradient_0) thethas[1] = thethas[1] - (learning_rate * gradient_1) print(thethas) plt.plot(list(range(epoch)), cost_function_result) plt.xlabel("epochs") plt.ylabel("J(thetha0, thetha1)") plt.show() plt.scatter(task_A_X_population, task_A_Y_profit) plt.plot( (min(task_A_X_population), max(task_A_X_population)), (min(Y_pred), max(Y_pred)), color='red' ) plt.xlabel("Population in city (in 10,000s)") plt.ylabel("Prediction of profit of a food truck (in $10,000s)") plt.show()

#Task B number_of_rows = len(task_A_X_population) task_A_X_population = task_A_X_population.reshape((number_of_rows, 1)) task_A_Y_profit = task_A_Y_profit.reshape((number_of_rows, 1)) """ Here we add a column ones to the beginning because of the theta0 that must be single calculated in hypothesis We just always multiply it by 1. """ task_A_X_population = np.insert(task_A_X_population, 0, np.ones((1, number_of_rows)), axis=1) thethas = np.zeros((task_A_X_population.shape[1], 1)) print(task_A_X_population.shape) print(task_A_Y_profit.shape) print(thethas.shape) for _ in range(epoch): G = task_A_X_population.T.dot(task_A_X_population.dot(thethas)-task_A_Y_profit) * (1/m) thethas = thethas - (learning_rate * G) print(thethas) """ Dimension of X is number of features + 1 (because ones are added) Dimension of y is the target numbers that is one in this case Dimension of theta is the number is one column to the number of features + 1 I got a close same result as in Task A """

#Task C y = data_2[:, 2] #Pointer for Standart Scaler for i in range(data_2.shape[1]-1): data_2[:, i] = (data_2[:, i] - data_2[:, i].mean()) / (data_2[:, i].std()) """ Feature normalization is used for scaling, cause most of the time features are not in the same range. Some could be thousands while others could be digits from 1 to 10, so feature normalization is used to scale them in one format for better usage and plotting """ x1 = data_2[:, 0] x2 = data_2[:, 1] thetas = [0,0,0] cost_function_result = [] for _ in range(epoch): Y_pred = thetas[0] + thetas[1] * x1 + thetas[2] * x2 cost_function = (1/(2*m)) * sum(np.square(y - Y_pred)) cost_function_result.append(cost_function) gradient_0 = (-1/m) * sum(y - Y_pred) gradient_1 = (-1/m) * sum((y - Y_pred) * x1) gradient_2 = (-1/m) * sum((y - Y_pred) * x2) thetas[0] = thetas[0] - (learning_rate * gradient_0) thetas[1] = thetas[1] - (learning_rate * gradient_1) thetas[2] = thetas[2] - (learning_rate * gradient_2) print(thetas) plt.plot(list(range(epoch)), cost_function_result) plt.xlabel("epochs") plt.ylabel("J(thetha0, thetha1)") plt.show() """ It's obvious that our cost function decreases during the calculations, so in the latest epoch's(tries) cost function is going close to zero """

#Task D X = data_2[:, :2] y = y.reshape((len(data_2),1)) #I've already done it upper. Look to Standart Scaler Pointer X = np.insert(X, 0, np.ones((1, len(data_2))), axis=1) tetas = np.zeros((3,1)) for _ in range(epoch): G = X.T.dot(X.dot(tetas)-y) * (1/m) tetas = tetas - (learning_rate * G) print(tetas) """ They are close the same. """

tetas = np.zeros((3,1)) tetas = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y) print(tetas) """ There is a little difference, however tetas are close the same """