K-Nearest Neighbors (KNN)
A brief overview of the KNN algorithm and its applications in machine learning.
Visualization example
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets, neighbors
from mlxtend.plotting import plot_decision_regions
def knn_comparison(data, k):
x = data[['X','Y']].values
y = data['class'].astype(int).values
clf = neighbors.KNeighborsClassifier(n_neighbors=k) #sklearn knn classifier
clf.fit(x, y)
# Plotting decision region
plot_decision_regions(x, y, clf=clf, legend=2)
# Adding axes annotations
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Knn with K='+ str(k))
plt.show()
data1 = pd.read_csv('ushape.csv')
for i in [1,5,20,30,40,80]:
knn_comparison(data1, i)
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier, KNeighborsRegressor
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
data1 = pd.read_csv('ushape.csv')
X = data1[['X','Y']].values
y = data1['class'].astype(int).values
knn = KNeighborsRegressor(n_neighbors=10)
y_pred = cross_val_predict(knn, X, y, cv=5)
#we can plot the error to compare different k values as well
error = []
for k in range(1,51):
knn = KNeighborsRegressor(n_neighbors=k)
y_pred = cross_val_predict(knn, X, y, cv=5)
error.append(mean_squared_error(y,y_pred))
plt.plot(range(1,51),error)