df = pd.read_csv('data_assignment2.csv')
df = df.rename(columns = {'Selling_price':'Selling price (SEK)', 'Land_size':'Land size (m²)', 'Living_area':'Living area (m²)'})
df_readable = df.copy()
df_readable['Selling price (SEK)'] = df_readable.apply(lambda x: "{:,}".format(x['Selling price (SEK)']), axis=1)
df_readable.head()
plt.subplot(121)
sns.scatterplot(x = 'Selling price (SEK)',y='Living area (m²)', hue='Rooms', data=df)
plt.subplot(122)
sns.scatterplot(x = 'Selling price (SEK)',y='Land size (m²)', hue='Rooms', data=df)
plt.show()
plt.subplot(131)
sns.violinplot(x = 'Selling price (SEK)',data=df)
plt.subplot(132)
sns.violinplot(x = 'Living area (m²)',data=df)
plt.subplot(133)
sns.violinplot(x = 'Land size (m²)',data=df)
from sklearn.linear_model import LinearRegression
y = pd.DataFrame(df['Selling price (SEK)'])
X = pd.DataFrame(df['Living area (m²)'])
model = LinearRegression()
model.fit(X,y)
xfit = X.values
yfit = model.predict(xfit)
plt.rcParams['figure.figsize'] = (20,14)
scattered = sns.scatterplot(y = 'Selling price (SEK)',x='Living area (m²)', hue='Rooms', data=df)
plt.plot(xfit, yfit, label = 'Fitted regression model')
plt.show()
fittedPrices = model.predict(X)
residuals = fittedPrices - y
df['Residuals'] = residuals
plt.rcParams['figure.figsize'] = (20,14)
sns.scatterplot(data = df, x = 'Selling price (SEK)', y = 'Residuals')
plt.show()
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
print(iris.feature_names)
X = iris.data
y = iris.target
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
plt.rcParams['figure.figsize'] = (12,10)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
model = LogisticRegression(max_iter = 100)
model.fit(x_train, y_train) #Training the model
predictions = model.predict(x_train)
conf_mat=confusion_matrix(y_train, predictions)
plt.title("Confusion Matrix")
sns.heatmap(conf_mat, annot=True,fmt='d', cmap='YlGnBu')
plt.ylabel("Actual Values")
plt.xlabel("Predicted Values")
plt.savefig('confusion_matrix 2_a.png')
plt.rcParams['figure.figsize'] = (16,8)
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import matplotlib.pyplot as plt
maxNeighbours = 100
accuracy = np.zeros(shape=[2,maxNeighbours])
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
for k in range(1,maxNeighbours):
knn = KNeighborsClassifier(n_neighbors= k,weights='uniform')
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
accuracy[0,k]=metrics.accuracy_score(y_test,y_pred)
for k in range(1,maxNeighbours):
knn = KNeighborsClassifier(n_neighbors= k,weights='distance')
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
accuracy[1,k]=metrics.accuracy_score(y_test,y_pred)
xAxisData = np.linspace(1,maxNeighbours,maxNeighbours)
plt.plot(xAxisData,accuracy[0,:],label='uniform')
plt.plot(xAxisData,accuracy[1,:],label='distance')
plt.legend()
plt.xlabel('number of k neighbours')
plt.ylabel('Accuracy')
plt.show()
bestKDistance = int(xAxisData[accuracy[1,:].argmax()])
bestKUniformity = int(xAxisData[accuracy[0,:].argmax()])
from sklearn.metrics import confusion_matrix
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
k = bestKDistance
knn = KNeighborsClassifier(n_neighbors= k, weights='distance')
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
confusionMatrix = confusion_matrix(y_test, y_pred)
print('k =',bestKDistance,' distance weighted\n')
plt.title("Confusion Matrix")
sns.heatmap(confusionMatrix, annot=True, fmt='d', cmap='YlGnBu')
plt.ylabel("Actual Values")
plt.xlabel("Predicted Values")
plt.savefig('confusion_matrix 2_c_distance.png')
k = bestKUniformity
knn = KNeighborsClassifier(n_neighbors= k, weights='uniform')
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
confusionMatrix = confusion_matrix(y_test, y_pred)
print('k =',bestKUniformity,' uniform weighted \n')
plt.title("Confusion Matrix")
sns.heatmap(confusionMatrix, annot=True, fmt='d', cmap='YlGnBu')
plt.ylabel("Actual Values")
plt.xlabel("Predicted Values")
plt.savefig('confusion_matrix 2_c_uniform.png')