import numpy as np
import pandas as pd
data = pd.read_csv(r"Iris.csv")
data["Species"] = data["Species"].map({"Iris-setosa": 0, "Iris-virginica": 1, "Iris-versicolor": 2})
data.drop("Id", axis=1, inplace = True)
data.drop_duplicates(inplace = True)
class KNN:
def __init__(self, k):
self.k = k;
def fit(self, X, y):
self.X = np.asarray(X)
self.y = np.asarray(y)
def predict(self, X):
X = np.asarray(X)
result = []
for x in X:
distance = np.sqrt(np.sum((x - self.X) ** 2, axis = 1))
index = distance.argsort()
max = np.bincount(self.y[index[:self.k]]).argmax()
result.append(max)
return result
t0 = data[data["Species"] == 0]
t1 = data[data["Species"] == 1]
t2 = data[data["Species"] == 2]
t0 = t0.sample(len(t0), random_state = 0)
t1 = t1.sample(len(t1), random_state = 0)
t2 = t2.sample(len(t2), random_state = 0)
train_X = pd.concat([t0.iloc[:40,:-1], t1.iloc[:40,:-1], t2.iloc[:40,:-1]])
train_y = pd.concat([t0.iloc[:40,-1], t1.iloc[:40,-1], t2.iloc[:40,-1]])
test_X = pd.concat([t0.iloc[40:,:-1], t1.iloc[40:,:-1], t2.iloc[40:,:-1]])
test_y = pd.concat([t0.iloc[40:,-1], t1.iloc[40:,-1], t2.iloc[40:,-1]])
knn = KNN(5)
knn.fit(train_X, train_y)
test = knn.predict(test_X)
#display(test)
display(np.sum(test_y == test)/ len(test_y))
data
import matplotlib as mpt
import matplotlib.pyplot as plt
#mpt.rcParams["font.family"] = "SimHei"
#mpt.rcParams["axes.unicode_minus"] = False
right = test_X[test == test_y]
wrong = test_X[test != test_y]
plt.figure(figsize=(10,10))
plt.scatter(t0["SepalLengthCm"], t0["PetalLengthCm"], c="r", label="Iris-setosa")
plt.scatter(t1["SepalLengthCm"], t1["PetalLengthCm"], c="g", label="Iris-virginica")
plt.scatter(t2["SepalLengthCm"], t2["PetalLengthCm"], c="b", label="Iris-versicolor")
plt.scatter(right["SepalLengthCm"], right["PetalLengthCm"], c="c", marker="x")
plt.scatter(wrong["SepalLengthCm"], wrong["PetalLengthCm"], c="m", marker=">")
plt.xlabel = "SepalLengthCm"
plt.ylabel = "PetalLengthCm"
plt.legend(loc="best")
plt.show()