import matplotlib.pyplot as plt
from sklearn import datasets, neighbors
from sklearn.model_selection import train_test_split
# Load the digits data set
digits = datasets.load_digits()
# Visualize an example digit image
plt.gray()
plt.matshow(digits.images[0]) # Use the first digit in the data set
plt.show()
# Extract the input data, force values to be between 0.0 and 1.0 (NORMALIZE the data)
X_digits = digits.data / digits.data.max()
# Extract the true values for each sample (each a digit between 0-9)
y_digits = digits.target
# Print the first 20 target values
y_digits[:20]
# Training set will be 90% of available samples,
# testing set will be remaining 10%
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, train_size=0.9)
# Import the default K-Nearest Neighbors classifier
# What does the 5 represent in the line of code below?
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
# Train the classifer
knn.fit(X_train, y_train)
# Compute the score (mean accuracy) on test set
score = knn.score(X_test, y_test)
print('KNN score: %f' % score)
KNN score: 0.988889
#Create a new knn model with a different k value
# Train the classifer
# Compute the score (mean accuracy) on test set
#Print out the y for the test set
print(y_test)
#Create the y^ (predictions) for the test set and print
[7 6 6 7 6 1 0 9 4 4 0 9 6 5 1 3 8 0 2 1 3 7 1 6 6 4 8 5 5 5 6 3 4 1 7 4 8
7 6 8 2 0 1 7 7 7 0 0 4 7 6 2 3 0 3 7 9 5 1 5 1 0 5 5 1 7 2 1 7 2 4 0 2 3
6 9 3 8 6 0 8 7 4 9 1 2 5 5 5 0 8 3 8 1 8 2 9 9 7 2 3 0 4 2 1 1 5 2 7 0 6
6 3 3 7 5 7 6 0 8 7 0 1 9 2 2 9 0 1 5 1 3 5 4 4 0 2 0 1 7 1 9 8 5 6 9 7 0
6 7 5 8 2 1 7 3 4 8 7 3 9 8 3 0 4 3 9 7 9 3 8 4 1 7 0 0 8 1 2 3]