import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# Load dataset
housing_data = pd.read_csv('data/data_assignment2.csv')
# Define keys for accessing wanted columns of data
LIVING_AREA_KEY = "Living_area"
SELLING_PRICE_KEY = "Selling_price"
# Calculate and perform the linear regression
X = housing_data[LIVING_AREA_KEY].values.reshape(-1,1)
Y = housing_data[SELLING_PRICE_KEY].values.reshape(-1,1)
linear_regressor = LinearRegression()
lin_regression = linear_regressor.fit(X, Y)
Y_pred = lin_regression.predict(X)
# Plot regression line together with scatter plot
plt.xlabel("Living area in square meters")
plt.ylabel("Selling price in million SEK")
plt.scatter(X, Y)
plt.plot(X, Y_pred, color='red')
plt.show()
# get intercept of regression line
intercept = lin_regression.intercept_
# get slope of regression line
slope = lin_regression.coef_
print("Slope:",slope)
print("Intercept:", intercept)
Slope: [[19370.13854733]]
Intercept: [2220603.24335587]
# values to predict for (100, 150, 200)
predict_values = np.array([100,150,200]).reshape(-1,1)
# calculate and print the prediction
print(lin_regression.predict(predict_values))
[[4157617.09808903]
[5126124.02545561]
[6094630.95282218]]
residuals = Y - Y_pred
plt.scatter(X, residuals)
plt.xlabel("Living area in square meters")
plt.ylabel("Residual value in million SEK")
plt.axhline(y=0, color='r', linestyle="dashed")
plt.plot()
plt.show()
from helper import plot_iris_cm
# Load the data set
iris_data = load_iris()
# Split the data set into train data and test data
x_train, x_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target, test_size=0.25, random_state=0)
# Train a logistic regression model with the given train data
logisticRegr = LogisticRegression(multi_class='ovr', solver='liblinear')
logisticRegr.fit(x_train, y_train)
# Use the model to predict the test data and calculate the model's accuracy score
predictions = logisticRegr.predict(x_test)
score = logisticRegr.score(x_test, y_test)
# Plot a confusion matrix
labels = iris_data.target_names
cm = metrics.confusion_matrix(y_test, predictions)
plot_iris_cm(cm, labels, score)
import math
k_values = [3, 6, 25, 50, 100]
uniform_accuracies = []
distance_accuracies = []
fig, axs = plt.subplots(len(k_values), 2, figsize=(16, 8*len(k_values)))
for i, ax in enumerate(axs.ravel()):
# Pick k-value and weight-type
k = k_values[math.floor(i / 2)]
weight = 'uniform' if (i%2 == 0) else 'distance'
# Generate the model, obtain accuracy, and confusion matrix
neigh = KNeighborsClassifier(n_neighbors=k, weights=weight)
neigh.fit(x_train, y_train)
predictions = neigh.predict(x_test)
score = neigh.score(x_test, y_test)
cm = metrics.confusion_matrix(y_test, predictions)
# Plot the confusion matrix for the classifications of iris flowers
plot_iris_cm(cm, labels, score, ax=ax, k=k, weights=weight)
# Add the score to the corresponding list
if (i%2==0):
uniform_accuracies.append(score)
else:
distance_accuracies.append(score)
accuracies = {
'K': k_values,
'Uniform': uniform_accuracies,
'Distance': distance_accuracies
}
pd.DataFrame(data=accuracies)