In [None]:
import os 
import numpy as np
from PIL import Image
from itertools import combinations
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Defining function for generating array of Similar (Real) Pairs from given dataset and reducing image size:

def similar_pairs(LIST,dataset):
    
    X =[]
    y =[]
    
    for number in LIST:
        print(number)
        
        location = 'Signatures_2/' + dataset + '/'
        
        # Removing '.DS_Store' from list:
        if '.DS_Store' in os.listdir(location + number + '/Real/'):
            listR = os.listdir(location + number + '/Real/')
            listR.remove('.DS_Store')
        else:
            listR = os.listdir(location + number + '/Real/')
        
        for pairs in combinations(listR,2):
            
            image1 = np.asarray(Image.open(location + number +'/Real/'+ pairs[0]).convert('RGB').resize((100,100)))
            image2 = np.asarray(Image.open(location + number +'/Real/'+ pairs[1]).convert('RGB').resize((100,100)))
            
            
            X.append([image1,image2])
            y.append(float(1))
            
        
    return np.array(X) / 255 , np.array(y)

In [None]:
def dissimilar_pair(LIST,dataset):
    
    X =[]
    y =[]
    
    for number in LIST:
        print(number)
        location = 'Signatures_2/' + dataset + '/'
        R = location + number + '/Real/'
        F = location + number + '/Forged/'
        
        # Removing '.DS_Store' from lists:
        list1=os.listdir(R)
        if '.DS_Store' in list1:
            list1.remove('.DS_Store')
        
        list2=os.listdir(F)
        if '.DS_Store' in list2:
            list2.remove('.DS_Store')
        
        if '.DS_Store' in list2:
            list2 = list2[1:]
            if '.DS_Store' in list1:
                list1.remove('.DS_Store')
        else:
            list1 = list1[1:]
            if '.DS_Store' in list2:
                list2.remove('.DS_Store')
            
        # Pairing Dissimilar Pairs
        output = [[a, b] for a in list1  
          for b in list2 if a != b]
        
        for pairs in output:
            
            image1 = np.asarray(Image.open(location + number +'/Real/'+ pairs[0]).convert('RGB').resize((100,100)))
            image2 = np.asarray(Image.open(location + number +'/Forged/'+ pairs[1]).convert('RGB').resize((100,100)))
            
            
            X.append([image1,image2])
            y.append(float(0))                        
        
    return np.array(X) / 255 , np.array(y)

In [None]:
# Generating Lists of Training and Testing Set Input Folders:
training_set = os.listdir('./Signatures_2/Train_set')
testing_set = os.listdir('./Signatures_2/Test_set')

In [None]:
# Deleting '.DS_Store' files from the lists:
training_set.remove('.DS_Store')
testing_set.remove('.DS_Store')

In [None]:
# Generating Similar pairs of Real Signature from the Training Set
train_sp, train_sp_label = similar_pairs(training_set,'Train_set')
# np.save('Train_sp_100.npy', train_sp)
# np.save('Train_sp_label.npy', train_sp_label)

In [None]:
# Generating Similar pairs of Real Signature from the Testing Set
test_sp, test_sp_label = similar_pairs(testing_set,'Test_set')
# np.save('Test_sp_100.npy', test_sp)
# np.save('Test_sp_label.npy', test_sp_label)

In [None]:
# Generating Dissimilar pairs of Real and Forged Signatures from the Training Set
train_fp, train_fp_label = dissimilar_pair(training_set,'Train_set')
# np.save('Train_fp_100.npy', train_fp)
# np.save('Train_fp_label.npy', train_fp_label)

In [None]:
# Generating Dissimilar pairs of Real and Forged Signatures from the Testing Set
test_fp, test_fp_label = dissimilar_pair(testing_set,'Test_set')
# np.save('Test_fp_100.npy', test_fp)
# np.save('Test_fp_label.npy', test_fp_label)

In [None]:
# train_sp = np.load('Train_sp_100.npy')
# train_sp_label = np.load('Train_sp_label.npy')
# train_fp = np.load('Train_fp_100.npy')
# train_fp_label = np.load('Train_fp_label.npy')
# test_sp = np.load('Test_sp_100.npy')
# test_sp_label = np.load('Test_sp_label.npy')
# test_fp =  np.load('Test_fp_100.npy')
# test_fp_label = np.load('Test_fp_label.npy')

In [None]:
print('Similar Training Pairs:',train_sp.shape[0], '\n')
print('Dissimilar Training Pairs:',train_fp.shape[0] , '\n')
print('Similar Testing Pairs:',test_sp.shape[0] , '\n')
print('Dissimilar Testing Pairs:',test_fp.shape[0])

In [None]:
# Combine Similar and Dissimilar pairs of Training and Testing Sets, respectively and shuffle the rows:
Training_pairs = np.vstack((train_sp, train_fp ))
Testing_pairs = np.vstack((test_sp, test_fp))

In [None]:
np.save('Training_pairs.npy', Training_pairs)
np.save('Testing_pairs.npy', Testing_pairs)

In [None]:
# Training_pairs = np.load('Training_pairs.npy')
# Testing_pairs = np.load('Testing_pairs.npy')

In [None]:
Training_labels = np.hstack((train_sp_label, train_fp_label))
Testing_labels = np.hstack((test_sp_label, test_fp_label))

In [None]:
random_state = np.random.get_state()
np.random.shuffle(Training_pairs)
np.random.set_state(random_state)
np.random.shuffle(Training_labels)

In [None]:
random_state = np.random.get_state()
np.random.shuffle(Testing_pairs)
np.random.set_state(random_state)
np.random.shuffle(Testing_labels)

In [None]:
from keras.layers import Input, Conv2D, Lambda, merge, Dense, Flatten, AveragePooling2D
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K
from keras.optimizers import SGD,Adam
from keras.losses import binary_crossentropy
from keras import initializers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.utils import shuffle
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification
from keras import layers

In [None]:
input_shape = (100,100,3)
left_input= Input((100,100,3))
right_input= Input((100,100,3))

In [None]:
# Building Siamese Neural Network:

def get_siamese_model(left_input, right_input):
    
    # Setting up Parameters for Neural Network:
    initialize_weights = initializers.RandomNormal(mean = 0.5 ,stddev=0.01)
    initialize_bias = initializers.RandomNormal(mean = 0.5 ,stddev=0.01)
        
    # Convolutional Neural Network:
    model = Sequential()
    model.add(Conv2D(100, (3,3), strides= 1, padding = "same", activation='relu', 
                     input_shape=input_shape, kernel_initializer=initialize_weights, 
                     kernel_regularizer=l2(2e-4)))
    model.add(AveragePooling2D(2,2))
        
    model.add(Conv2D(200, (3,3), strides= 1, padding = "same", activation='relu', 
                     kernel_initializer=initialize_weights, bias_initializer=initialize_bias, 
                     kernel_regularizer=l2(2e-4)))
    model.add(AveragePooling2D(2,2))
    model.add(Flatten())
    
    model.add(Dense(400, activation='sigmoid', kernel_initializer=initialize_weights, 
                     bias_initializer=initialize_bias,kernel_regularizer=l2(1e-3)))
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    
    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
    
    # return the model
    return siamese_net

In [None]:
model = get_siamese_model(left_input, right_input)
model.summary()

In [None]:
# Applying Adam optimizer and compiling the model with loss function and accuracy:

optimizer = Adam(lr = 0.001)
model.compile(loss="binary_crossentropy",optimizer=optimizer, metrics = ['accuracy'] )

In [None]:
# Applying KFold Validation on Training Set:

from sklearn.model_selection import KFold
kfold = KFold(n_splits = 3)
c=1
for train_index, test_index in kfold.split(Training_pairs, Training_labels):
    
    print('Fold:',c)
    c+=1
    
    X_train = Training_pairs[train_index]
    X_test = Training_pairs[test_index]
    y_train = Training_labels[train_index]
    y_test = Training_labels[test_index]
    
    model.fit( [X_train[:,0],X_train[:,1]] , y_train, validation_data = ([X_test[:,0],X_test[:,1]] , y_test ), 
              batch_size=30, epochs=2, workers=-1 )
    print('\n-------------------------------------------------------------------------------------------\n')

In [None]:
# Training the model on the complete Training set:

model.fit( [Training_pairs[:,0], Training_pairs[:,1]] , Training_labels, 
              batch_size=30, epochs=2, workers=-1, use_multiprocessing=True)

In [None]:
# Evaluating the model with Testing Set:

model.evaluate([Testing_pairs[:,0],Testing_pairs[:,1]], Testing_labels)

In [None]:
# Classification of Signatures from Testing Set:

predict = model.predict([Testing_pairs[:,0], Testing_pairs[:,1]])

In [None]:
# Analyzing performance metrics of the prediction:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(Testing_labels, predict.round()))
print(classification_report(Testing_labels, predict.round()))

In [None]:
# This Siamese Model predicts with a very high Accuracy for the classification of Similar and Dissimilar Signatures.
# This could be a result of overfitting the data with 5 folds and again training over the complete dataset.