# Simple Linear Regression Program ----------------------------
#Import statements
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.animation as animation
def calcCost(X,W,Y):
pred = np.dot(X,W)
costArray = (pred - Y)**2
costs = np.sum(costArray)
return costs
def calcGradient(X,Y,W):
pred = np.dot(X,W)
pred_Y = pred - Y
print(pred)
grad_sum = np.dot(pred_Y,X)
return grad_sum/len(X)
fileName = 'murdersunemployment.csv'
print("fileName: ", fileName)
raw_data = open(fileName, 'rt')
#loadtxt defaults to floats - skip first row (column headers)
data = np.loadtxt(raw_data, usecols = (2,3,4), skiprows = 1, delimiter=",")
means = np.mean(data, axis=0)
stddev = np.std(data, axis = 0)
maxVal = np.max(data, axis = 0)
minVal = np.min(data, axis = 0)
print(means)
print(stddev)
print(maxVal)
print(minVal)
max_min = maxVal[0]-minVal[0]
#Scale the data
X1 = np.subtract(data[:, 0], means[0])
X1 = np.divide(X1, max_min)
#print(X1)
max_min = maxVal[1]-minVal[1]
X2 = np.subtract(data[:, 1], means[1])
X2 = np.divide(X2, max_min)
#print(X2)
X = np.c_[X1, X2]
X = np.c_[np.ones(X.shape[0]),X]
#print(X)
Y = data[:,2]
lr = [0.01]
# Set the inital weights
W = [20, 10, 10]
initW = W
costArray = []
costArray.append(calcCost(X, W, Y))
finished = False
count =0
while (not finished):
gradient = calcGradient(X,Y,W)
W = W - lr * gradient
cost = calcCost(X, W, Y)
costArray.append(cost)
#print(cost)
lengthOfGradientVector = np.linalg.norm(gradient)
if (lengthOfGradientVector < .0001):
finished=True
count+=1
print("Learning Rate used:", lr)
print("Initial weights:", initW)
print("Final weights:", W)
print("Initial cost",costArray[0])
print("Final cost",costArray[-1] )
fig = plt.figure()
ax = fig.add_axes([0.1,0.1,0.8,0.8])# [left, bottom, width, height]
ax.plot(np.arange(len(costArray)), costArray, "ro", label = "cost")
ax.set_title("Cost as weights are changed")
ax.set_xlabel("iteration")
ax.set_ylabel("Costs")
ax.legend()
plt.show()
# Linear Regression Analysis ****************************************
Ypredict = np.dot(X, W)
print("Actual Values:", Y)
print("Predicted values:",Ypredict)
error = (Y - Ypredict)/Y * 100
print("Relative Error: ", error)
#********* Scatter Plot of Predicted Vs Actual Label values ******************************
YpredictAll = np.dot(X,W)
plt.scatter(YpredictAll, Y)
plt.title("Predicted V Actual")
plt.xlabel("Y prediction")
plt.ylabel("Y")
plt.show()