import matplotlib.pyplot as plt
from sklearn import datasets, neighbors
from sklearn.model_selection import train_test_split
# Load the digits data set
digits = datasets.load_digits()
# Visualize an example digit image
plt.gray()
plt.matshow(digits.images[0]) # Use the first digit in the data set
plt.show()
# Extract the input data, force values to be between 0.0 and 1.0 (NORMALIZE the data)
X_digits = digits.data / digits.data.max()
# Extract the true values for each sample (each a digit between 0-9)
y_digits = digits.target
# Print the first 20 target values
y_digits[:20]
# Training set will be 90% of available samples,
# testing set will be remaining 10%
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, train_size=0.9)
# Import the default K-Nearest Neighbors classifier
# What does the 5 represent in the line of code below?
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
# Train the classifer
knn.fit(X_train, y_train)
# Compute the score (mean accuracy) on test set
score = knn.score(X_test, y_test)
print('KNN score: %f' % score)
#Create a new knn model with a different k value
# Train the classifer
# Compute the score (mean accuracy) on test set
#Print out the y for the test set
print(y_test)
#Create the y^ (predictions) for the test set and print