import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
def preprocess(array):
"""
Normalizes the supplied array and reshapes it into the appropriate format.
"""
array = array.astype('float32') / 255.
array = np.reshape(array, (len(array), 28, 28, 1))
return array
def display_pairs(images, labels, correct=None):
"""
Displays the first ten pairs from the supplied array.
Args:
- images: An array containing the pair of images.
- labels: An array containing the corresponding label (0 if both
pairs are different, and 1 if both pairs are the same.)
- correct (optional): An array of boolean values indicating whether
the supplied labels correctly represent the image pairs.
"""
n = 10
plt.figure(figsize=(20, 6))
for i, (image1, image2) in enumerate(zip(images[:n, 0], images[:n, 1])):
label = int(labels[:n][i][0])
text = "Label"
color = "silver"
# If we know whether the supplied labels are correct, let's change the
# text and the face color of the annotation on the chart.
if correct is not None:
text = "Prediction"
color = "mediumseagreen" if correct[:n][i][0] else "indianred"
ax = plt.subplot(3, n, i + 1)
ax.text(1, -3 ,f"{text}: {label}", style="italic", bbox={
"facecolor": color,
"pad": 4
})
plt.imshow(image1.reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(3, n, i + 1 + n)
plt.imshow(image2.reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plot_history(history):
"""
Plots the training and validation loss.
"""
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Training and Validation Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()
def generate_pairs(images, labels):
"""
Creates a collection of positive and negative image pairs from the supplied
array of images.
A positive pair contains two images of the same digit. A negative pair contains
two images representing different digits.
"""
x_pairs = []
y_pairs = []
for i in range(len(images)):
label = labels[i]
j = np.random.choice(np.where(labels == label)[0])
x_pairs.append([images[i], images[j]])
y_pairs.append([1])
k = np.random.choice(np.where(labels != label)[0])
x_pairs.append([images[i], images[k]])
y_pairs.append([0])
indices = np.arange(len(x_pairs))
np.random.shuffle(indices)
return np.array(x_pairs)[indices], np.array(y_pairs)[indices]
(x_train, y_train), (x_test, y_test) = mnist.load_data()
VALIDATION_SIZE = int(len(x_train) * 0.2)
x_val = x_train[:VALIDATION_SIZE]
y_val = y_train[:VALIDATION_SIZE]
x_train = x_train[VALIDATION_SIZE:]
y_train = y_train[VALIDATION_SIZE:]
x_train = preprocess(x_train)
x_val = preprocess(x_val)
x_test = preprocess(x_test)
print(f"Train: {len(x_train)}")
print(f"Validation: {len(x_val)}")
print(f"Test: {len(x_test)}")
x_pairs_train, y_pairs_train = generate_pairs(x_train, y_train)
x_pairs_val, y_pairs_val = generate_pairs(x_val, y_val)
x_pairs_test, y_pairs_test = generate_pairs(x_test, y_test)
display_pairs(x_pairs_train, y_pairs_train)
def norm(features):
"""
Computes the euclidean norm of the two feature vectors generated
by the twins of the Siamese network.
"""
return tf.norm(features[0] - features[1], axis=1, keepdims=True)
def accuracy(y_true, y_pred):
"""
Computes the accuracy of the predictions.
"""
# Notice that `y_true` is 0 whenever two images are not the same and 1
# otherwise, but `y_pred` is the opposite. The closer `y_pred` is to 0,
# the shorter the distance between both images, therefore the more likely
# it is that they are the same image. To correctly compute the accuracy we
# need to substract `y_pred` from 1 so both vectors are comparable.
return metrics.binary_accuracy(y_true, 1 - y_pred)
def contrastive_loss(y_true, y_pred):
"""
Computes the contrastive loss introduced by Yann LeCun et al. in the paper
"Dimensionality Reduction by Learning an Invariant Mapping," 2005.
"""
margin = 1
y_true = tf.cast(y_true, y_pred.dtype)
# The original formula proposed by Yann LeCunn et al. assumes that Y is 0
# if both images are similar and 1 otherwise. Our implementation (where Y is
# `y_true`) is the opposite, hence the modification to the formula below.
loss = y_true / 2 * K.square(y_pred) + (1 - y_true) / 2 \
* K.square(K.maximum(0.0, margin - y_pred))
return loss
def siamese_twin():
"""
Creates the subnetwork that represents each one of the twins of the
Siamese network.
"""
inputs = layers.Input((28, 28, 1))
x = layers.Conv2D(128, (2, 2), activation="relu")(inputs)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.Conv2D(128, (2, 2), activation="relu")(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.Conv2D(64, (2, 2), activation="relu")(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.GlobalAveragePooling2D()(x)
# We don't want any activation function on the final layer. This layer
# will contain the embedding for the input image.
outputs = layers.Dense(128, activation=None)(x)
return Model(inputs, outputs)
def siamese_network():
"""
Creates the Siamese Network model.
"""
input1 = layers.Input(shape=(28, 28, 1))
input2 = layers.Input(shape=(28, 28, 1))
twin = siamese_twin()
# We can use a Lambda layer to compute the euclidean distance of the
# embeddings from each image.
distance = layers.Lambda(norm)([
twin(input1),
twin(input2)
])
# Our model has two inputs: the first input is for the anchor image and
# the second input is for the second image of the pair. The output of the
# model is the euclidean distance between the embeddings from each image.
return Model(inputs=[input1, input2], outputs=distance)
model = siamese_network()
model.compile(
loss=contrastive_loss,
optimizer="adam",
metrics=[accuracy]
)
model.summary()
history = model.fit(
x=[x_pairs_train[:, 0], x_pairs_train[:, 1]],
y=y_pairs_train[:],
validation_data=([x_pairs_val[:, 0], x_pairs_val[:, 1]], y_pairs_val[:]),
batch_size=64,
epochs=15
)
plot_history(history.history)
predictions = np.round(1 - model.predict([x_pairs_test[:, 0], x_pairs_test[:, 1]]))
display_pairs(x_pairs_test, predictions, predictions == y_pairs_test)
accuracy = metrics.BinaryAccuracy()
accuracy.update_state(y_pairs_test, predictions)
print(f"\nAccuracy: {accuracy.result().numpy()}")