import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
def preprocess(array):
"""
Normalizes the supplied array and reshapes it into the appropriate format.
"""
array = array.astype('float32') / 255.
array = np.reshape(array, (len(array), 28, 28, 1))
return array
def display_pairs(images, labels, correct=None):
"""
Displays the first ten pairs from the supplied array.
Args:
- images: An array containing the pair of images.
- labels: An array containing the corresponding label (0 if both
pairs are different, and 1 if both pairs are the same.)
- correct (optional): An array of boolean values indicating whether
the supplied labels correctly represent the image pairs.
"""
n = 10
plt.figure(figsize=(20, 6))
for i, (image1, image2) in enumerate(zip(images[:n, 0], images[:n, 1])):
label = int(labels[:n][i][0])
text = "Label"
color = "silver"
# If we know whether the supplied labels are correct, let's change the
# text and the face color of the annotation on the chart.
if correct is not None:
text = "Prediction"
color = "mediumseagreen" if correct[:n][i][0] else "indianred"
ax = plt.subplot(3, n, i + 1)
ax.text(1, -3 ,f"{text}: {label}", style="italic", bbox={
"facecolor": color,
"pad": 4
})
plt.imshow(image1.reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(3, n, i + 1 + n)
plt.imshow(image2.reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plot_history(history):
"""
Plots the training and validation loss.
"""
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Training and Validation Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()
def generate_pairs(images, labels):
"""
Creates a collection of positive and negative image pairs from the supplied
array of images.
A positive pair contains two images of the same digit. A negative pair contains
two images representing different digits.
"""
x_pairs = []
y_pairs = []
for i in range(len(images)):
label = labels[i]
j = np.random.choice(np.where(labels == label)[0])
x_pairs.append([images[i], images[j]])
y_pairs.append([1])
k = np.random.choice(np.where(labels != label)[0])
x_pairs.append([images[i], images[k]])
y_pairs.append([0])
indices = np.arange(len(x_pairs))
np.random.shuffle(indices)
return np.array(x_pairs)[indices], np.array(y_pairs)[indices]
(x_train, y_train), (x_test, y_test) = mnist.load_data()
VALIDATION_SIZE = int(len(x_train) * 0.2)
x_val = x_train[:VALIDATION_SIZE]
y_val = y_train[:VALIDATION_SIZE]
x_train = x_train[VALIDATION_SIZE:]
y_train = y_train[VALIDATION_SIZE:]
x_train = preprocess(x_train)
x_val = preprocess(x_val)
x_test = preprocess(x_test)
print(f"Train: {len(x_train)}")
print(f"Validation: {len(x_val)}")
print(f"Test: {len(x_test)}")
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step
Train: 48000
Validation: 12000
Test: 10000
x_pairs_train, y_pairs_train = generate_pairs(x_train, y_train)
x_pairs_val, y_pairs_val = generate_pairs(x_val, y_val)
x_pairs_test, y_pairs_test = generate_pairs(x_test, y_test)
display_pairs(x_pairs_train, y_pairs_train)
def norm(features):
"""
Computes the euclidean norm of the two feature vectors generated
by the twins of the Siamese network.
"""
return tf.norm(features[0] - features[1], axis=1, keepdims=True)
def accuracy(y_true, y_pred):
"""
Computes the accuracy of the predictions.
"""
# Notice that `y_true` is 0 whenever two images are not the same and 1
# otherwise, but `y_pred` is the opposite. The closer `y_pred` is to 0,
# the shorter the distance between both images, therefore the more likely
# it is that they are the same image. To correctly compute the accuracy we
# need to substract `y_pred` from 1 so both vectors are comparable.
return metrics.binary_accuracy(y_true, 1 - y_pred)
def contrastive_loss(y_true, y_pred):
"""
Computes the contrastive loss introduced by Yann LeCun et al. in the paper
"Dimensionality Reduction by Learning an Invariant Mapping," 2005.
"""
margin = 1
y_true = tf.cast(y_true, y_pred.dtype)
# The original formula proposed by Yann LeCunn et al. assumes that Y is 0
# if both images are similar and 1 otherwise. Our implementation (where Y is
# `y_true`) is the opposite, hence the modification to the formula below.
loss = y_true / 2 * K.square(y_pred) + (1 - y_true) / 2 \
* K.square(K.maximum(0.0, margin - y_pred))
return loss
def siamese_twin():
"""
Creates the subnetwork that represents each one of the twins of the
Siamese network.
"""
inputs = layers.Input((28, 28, 1))
x = layers.Conv2D(128, (2, 2), activation="relu")(inputs)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.Conv2D(128, (2, 2), activation="relu")(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.Conv2D(64, (2, 2), activation="relu")(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.4)(x)
x = layers.GlobalAveragePooling2D()(x)
# We don't want any activation function on the final layer. This layer
# will contain the embedding for the input image.
outputs = layers.Dense(128, activation=None)(x)
return Model(inputs, outputs)
def siamese_network():
"""
Creates the Siamese Network model.
"""
input1 = layers.Input(shape=(28, 28, 1))
input2 = layers.Input(shape=(28, 28, 1))
twin = siamese_twin()
# We can use a Lambda layer to compute the euclidean distance of the
# embeddings from each image.
distance = layers.Lambda(norm)([
twin(input1),
twin(input2)
])
# Our model has two inputs: the first input is for the anchor image and
# the second input is for the second image of the pair. The output of the
# model is the euclidean distance between the embeddings from each image.
return Model(inputs=[input1, input2], outputs=distance)
model = siamese_network()
model.compile(
loss=contrastive_loss,
optimizer="adam",
metrics=[accuracy]
)
model.summary()
Model: "model_1"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 28, 28, 1)] 0
__________________________________________________________________________________________________
input_2 (InputLayer) [(None, 28, 28, 1)] 0
__________________________________________________________________________________________________
model (Functional) (None, 128) 107456 input_1[0][0]
input_2[0][0]
__________________________________________________________________________________________________
lambda (Lambda) (None, 1) 0 model[0][0]
model[1][0]
==================================================================================================
Total params: 107,456
Trainable params: 107,456
Non-trainable params: 0
__________________________________________________________________________________________________
history = model.fit(
x=[x_pairs_train[:, 0], x_pairs_train[:, 1]],
y=y_pairs_train[:],
validation_data=([x_pairs_val[:, 0], x_pairs_val[:, 1]], y_pairs_val[:]),
batch_size=64,
epochs=15
)
Epoch 1/15
1500/1500 [==============================] - 205s 136ms/step - loss: 0.0937 - accuracy: 0.7088 - val_loss: 0.0686 - val_accuracy: 0.8029
Epoch 2/15
1500/1500 [==============================] - 205s 137ms/step - loss: 0.0732 - accuracy: 0.7987 - val_loss: 0.0550 - val_accuracy: 0.8651
Epoch 3/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0655 - accuracy: 0.8335 - val_loss: 0.0473 - val_accuracy: 0.8920
Epoch 4/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0621 - accuracy: 0.8451 - val_loss: 0.0488 - val_accuracy: 0.8850
Epoch 5/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0601 - accuracy: 0.8543 - val_loss: 0.0448 - val_accuracy: 0.8982
Epoch 6/15
1500/1500 [==============================] - 206s 138ms/step - loss: 0.0587 - accuracy: 0.8596 - val_loss: 0.0447 - val_accuracy: 0.9000
Epoch 7/15
1500/1500 [==============================] - 206s 137ms/step - loss: 0.0576 - accuracy: 0.8631 - val_loss: 0.0415 - val_accuracy: 0.9116
Epoch 8/15
1500/1500 [==============================] - 208s 139ms/step - loss: 0.0566 - accuracy: 0.8673 - val_loss: 0.0414 - val_accuracy: 0.9115
Epoch 9/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0560 - accuracy: 0.8702 - val_loss: 0.0405 - val_accuracy: 0.9145
Epoch 10/15
1500/1500 [==============================] - 205s 137ms/step - loss: 0.0552 - accuracy: 0.8735 - val_loss: 0.0415 - val_accuracy: 0.9081
Epoch 11/15
1500/1500 [==============================] - 206s 137ms/step - loss: 0.0547 - accuracy: 0.8745 - val_loss: 0.0405 - val_accuracy: 0.9122
Epoch 12/15
1500/1500 [==============================] - 200s 133ms/step - loss: 0.0541 - accuracy: 0.8770 - val_loss: 0.0403 - val_accuracy: 0.9134
Epoch 13/15
1500/1500 [==============================] - 206s 137ms/step - loss: 0.0540 - accuracy: 0.8773 - val_loss: 0.0387 - val_accuracy: 0.9183
Epoch 14/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0539 - accuracy: 0.8771 - val_loss: 0.0382 - val_accuracy: 0.9214
Epoch 15/15
1500/1500 [==============================] - 207s 138ms/step - loss: 0.0532 - accuracy: 0.8804 - val_loss: 0.0395 - val_accuracy: 0.9135
plot_history(history.history)
predictions = np.round(1 - model.predict([x_pairs_test[:, 0], x_pairs_test[:, 1]]))
display_pairs(x_pairs_test, predictions, predictions == y_pairs_test)
accuracy = metrics.BinaryAccuracy()
accuracy.update_state(y_pairs_test, predictions)
print(f"\nAccuracy: {accuracy.result().numpy()}")
Accuracy: 0.9210500121116638