# Training Siamese Network for MNIST
Example from: https://github.com/keras-team/keras/blob/master/examples/mnist_siamese.py

In [47]:
import numpy as np
import random

import tensorflow as tf

from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dropout, Dense, Lambda
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import backend as K

In [48]:
nClasses = 10
nEpochs  = 20

In [49]:
def euclidean_distance( vects ):
    x, y = vects
    sum_square = K.sum( K.square(x-y), axis = 1, keepdims = True )
    return K.sqrt( K.maximum(sum_square,K.epsilon()) )

In [50]:
def eucl_dist_out_shape( shapes ):
    shape1, shape2 = shapes
    return (shape1[0],1)

In [51]:
def contrastive_loss( y_true, y_pred ):
    margin = 1
    square_pred = K.square( y_pred )
    margin_square = K.square( K.maximum(margin-y_pred,0) )
    return K.mean( y_true*square_pred + (1-y_true)*margin_square )

In [52]:
def create_pairs( x, digit_indices ):
    '''Positive and negative pair creation
    Alternates between positive and negative pairs
    '''
    pairs  = []
    labels = []
    n = min( [len(digit_indices[d]) for d in range(nClasses)] ) - 1
    for d in range(nClasses):
        for i in range(n):
            z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
            pairs += [ [x[z1],x[z2]] ]
            inc = random.randrange( 1, nClasses )
            dn  = (d+inc) % nClasses
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [ [x[z1],x[z2]] ]
            labels += [ 1, 0 ]
    return np.array( pairs ), np.array( labels )

In [76]:
def create_base_network( input_shape ):
    '''Base network to be shared
    '''
    input = Input( shape = input_shape )
    x = Flatten()(input)
    x = Dense( 128, activation = 'relu' )(x)
    x = Dropout(0.1)(x)
    x = Dense( 128, activation = 'relu' )(x)
    x = Dropout(0.1)(x)
    x = Dense( 128, activation = 'relu' )(x)
    x = Dense( 10, activation = 'sigmoid' )(x)
    
    return Model( input, x )

In [77]:
def compute_accuracy( y_true, y_pred ):
    '''Compute classification accuracy with a fixed threshold on distances
    '''
    pred = y_pred.ravel() < 0.5
    return np.mean( pred == y_true )

In [78]:
def accuracy( y_true, y_pred ):
    '''Compute classification accuracy with a fixed threshold on distances
    '''
    return K.mean( K.equal(y_true,K.cast(y_pred<0.5,y_true.dtype)) )

Data split between train and test samples

In [79]:
(x_train,y_train), (x_test,y_test) = mnist.load_data()
x_train = x_train.astype( 'float32' )
x_test  = x_test.astype( 'float32' )
x_train /= 255
x_test  /= 255
x_train = x_train.reshape( -1, 28, 28, 1 )
x_test  = x_test.reshape( -1, 28, 28, 1 )
input_shape = x_train.shape[1:]

Create training+test positive and negative examples

In [80]:
digit_indices_train = [ np.where(y_train==i)[0] for i in range(nClasses) ]
tr_pairs, tr_y = create_pairs( x_train, digit_indices_train )
digit_indices_test = [ np.where(y_test==i)[0] for i in range(nClasses) ]
te_pairs, te_y = create_pairs( x_test, digit_indices_test )

Network definition

In [81]:
base_network = create_base_network( input_shape )

input_a = Input( shape = input_shape )
input_b = Input( shape = input_shape )

Because we reuse the same instance of base network, the weight of the network will be shared across two branches

In [82]:
processed_a = base_network( input_a )
processed_b = base_network( input_b )

distance = Lambda( euclidean_distance, output_shape=eucl_dist_out_shape )( [processed_a,processed_b] )
model    = Model( [input_a,input_b], distance )

Training

In [83]:
rms = RMSprop()
model.compile( loss = contrastive_loss, optimizer = rms, metrics = [ accuracy ] )
model.fit( [tr_pairs[:,0],tr_pairs[:,1]], tr_y,
           batch_size = 128, epochs = nEpochs, validation_data = ([te_pairs[:,0],te_pairs[:,1]],te_y) )

Train on 108400 samples, validate on 17820 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fcc2b307cf8>

Test Accuracy on train and test sets

In [84]:
y_pred_tr = model.predict( [tr_pairs[:,0],tr_pairs[:,1]] )
tr_acc = compute_accuracy( tr_y, y_pred_tr )
y_pred_te = model.predict( [te_pairs[:,0],te_pairs[:,1]] )
te_acc = compute_accuracy( te_y, y_pred_te )

print( "Accuracy on training set = %0.2f%%" %(100*tr_acc) )
print( "Accuracy on test set     = %0.2f%%" %(100*te_acc) )

Accuracy on training set = 99.82%
Accuracy on test set     = 97.91%


In [85]:
embeddingModel = Model( input_a, processed_a )

In [86]:
for l in range( len(digit_indices_train) ):
    y = embeddingModel.predict( x_train[digit_indices_train[l]] )
    y_bar = np.mean( y, axis = 0 )
    y_bar[y_bar<0.01] = 0.0
    print( "l = ", l, "    r = ", y_bar )

l =  0     r =  [0.         0.         0.9995201  0.         0.99577284 0.999553
 0.         0.99950933 0.         0.9989072 ]
l =  1     r =  [0.         0.59213316 0.78118354 0.43414107 0.99561214 0.9995576
 0.38781098 0.9992588  0.46998414 0.48810837]
l =  2     r =  [0.        0.9981816 0.9988401 0.        0.9956107 0.9997263 0.
 0.9995349 0.        0.       ]
l =  3     r =  [0.         0.9962082  0.14696868 0.         0.99562085 0.99947196
 0.         0.99872065 0.         0.48909113]
l =  4     r =  [0.         0.4674495  0.99985677 0.         0.99561256 0.35803995
 0.5850248  0.9999345  0.         0.        ]
l =  5     r =  [0.         0.         0.13891643 0.         0.99561185 0.9997191
 0.         0.99996895 0.         0.49527392]
l =  6     r =  [0.         0.         0.9992276  0.         0.99560887 0.9999397
 0.         0.9998335  0.         0.        ]
l =  7     r =  [0.         0.9989631  0.99959594 0.         0.9956139  0.9991925
 0.         0.99997914 0.         0.9