Unit2_ML

# Simple Linear Regression Program ---------------------------- #Import statements import numpy as np import pandas as pd from matplotlib import pyplot as plt import matplotlib.animation as animation def calcCost(X,W,Y): pred = np.dot(X,W) costArray = (pred - Y)**2 costs = np.sum(costArray) return costs def calcGradient(X,Y,W): pred = np.dot(X,W) pred_Y = pred - Y print(pred) grad_sum = np.dot(pred_Y,X) return grad_sum/len(X)

fileName = 'murdersunemployment.csv' print("fileName: ", fileName) raw_data = open(fileName, 'rt') #loadtxt defaults to floats - skip first row (column headers) data = np.loadtxt(raw_data, usecols = (2,3,4), skiprows = 1, delimiter=",")

means = np.mean(data, axis=0) stddev = np.std(data, axis = 0) maxVal = np.max(data, axis = 0) minVal = np.min(data, axis = 0) print(means) print(stddev) print(maxVal) print(minVal) max_min = maxVal[0]-minVal[0] #Scale the data X1 = np.subtract(data[:, 0], means[0]) X1 = np.divide(X1, max_min) #print(X1) max_min = maxVal[1]-minVal[1] X2 = np.subtract(data[:, 1], means[1]) X2 = np.divide(X2, max_min) #print(X2) X = np.c_[X1, X2] X = np.c_[np.ones(X.shape[0]),X] #print(X) Y = data[:,2]

lr = [0.01] # Set the inital weights W = [20, 10, 10] initW = W costArray = [] costArray.append(calcCost(X, W, Y)) finished = False count =0 while (not finished): gradient = calcGradient(X,Y,W) W = W - lr * gradient cost = calcCost(X, W, Y) costArray.append(cost) #print(cost) lengthOfGradientVector = np.linalg.norm(gradient) if (lengthOfGradientVector < .0001): finished=True count+=1 print("Learning Rate used:", lr) print("Initial weights:", initW) print("Final weights:", W) print("Initial cost",costArray[0]) print("Final cost",costArray[-1] ) fig = plt.figure() ax = fig.add_axes([0.1,0.1,0.8,0.8])# [left, bottom, width, height] ax.plot(np.arange(len(costArray)), costArray, "ro", label = "cost") ax.set_title("Cost as weights are changed") ax.set_xlabel("iteration") ax.set_ylabel("Costs") ax.legend() plt.show()

# Linear Regression Analysis **************************************** Ypredict = np.dot(X, W) print("Actual Values:", Y) print("Predicted values:",Ypredict) error = (Y - Ypredict)/Y * 100 print("Relative Error: ", error) #********* Scatter Plot of Predicted Vs Actual Label values ****************************** YpredictAll = np.dot(X,W) plt.scatter(YpredictAll, Y) plt.title("Predicted V Actual") plt.xlabel("Y prediction") plt.ylabel("Y") plt.show()