**Hyperparameters**

In [None]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from os import path
from PIL import Image
import matplotlib.pyplot as plt
import keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Load configuration from YAML file
def load_config(config_path='config_siamese.yaml'):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

config = load_config()

In [None]:
epochs = config['hyperparameters']['epochs']
batch_size = config['hyperparameters']['batch_size']
margin = config['hyperparameters']['margin']

dataset_path = config['dataset']['base_path']
dataset_version = config['dataset']['version']
roi_folder_name = config['dataset']['roi_folder_name']
number_of_roi_per_user = config['dataset']['num_roi']
landmarks = config['dataset']['landmarks']

# Example usage
print(f"Training for {epochs} epochs with batch size {batch_size} and margin {margin}")
print(f"Dataset path: {dataset_path}{dataset_version}, ROI folder: {roi_folder_name}")


users = os.listdir(base_dataset_path + dataset_version)
random.shuffle(users)
length = len(users)
training_len = (int)(length*0.9)
users_train_val = users[:training_len]
users_test = users[training_len:]

In [None]:
# import os
def list_files_in_directory(directory):
  files = []
  for file in os.listdir(directory):
    # Join the directory path with the file name to get the full path
    full_path = os.path.join(directory, file)
    if os.path.isfile(full_path):
      files.append(full_path)
  return files

def get_file_path_list(dir):
  # Get a list of all files in the current directory
  all_files = list_files_in_directory(dir)
  if len(all_files) < number_of_roi_per_user:
    first_file = all_files[0]
    while(len(all_files) < number_of_roi_per_user):
      all_files.append(first_file)
  # Get a list of all file paths in the current directory
  all_file_paths = [os.path.join(dir, file) for file in all_files]


  return all_file_paths[:number_of_roi_per_user]

In [None]:
def create_triplets(landmark, hand, users):
  triplets = pd.DataFrame(columns=['anchor_image', 'hydrated_image', 'dehydrated_image'])

  index = 0

  for user in users:
    hydrated_list_url = base_dataset_path + dataset_version + user + "/" + "hydrated/" + hand + "/" + landmark + "/" + roi_folder_name
    hydrated_list = get_file_path_list(hydrated_list_url)

    dehydrated_list_url = base_dataset_path + dataset_version + user + "/" + "dehydrated/" + hand + "/" + landmark + "/" + roi_folder_name
    dehydrated_list = get_file_path_list(dehydrated_list_url)

    if ((len(hydrated_list) == 0) or (len(dehydrated_list) == 0)):
      continue

    anchor = hydrated_list[0]

    for i in range(len(dehydrated_list)):
      dehydrated = dehydrated_list[i]
      for j in range(len(hydrated_list)):
        hydrated = hydrated_list[j]
        triplets.loc[len(triplets)] = [anchor, hydrated, dehydrated]
  return triplets

In [None]:
def load_images(triplets):
  anchor_images_array = []
  hydrated_images_array = []
  dehydrated_images_array = []

  count = 0
  fileError = 0

  for i, directory in triplets['anchor_image'].items():
    try:
      img = image.load_img(directory, target_size=(200, 200))
      img = image.img_to_array(img)
      img = tf.image.convert_image_dtype(img, tf.float32)
      img = preprocess_input(img)

      anchor_images_array.append(img)
      count = count + 1
    except FileNotFoundError:
      fileError = fileError + 1

  print(len(anchor_images_array))

  for i, directory in triplets['hydrated_image'].items():
    try:
      img = image.load_img(directory, target_size=(200, 200))
      img = image.img_to_array(img)
      img = tf.image.convert_image_dtype(img, tf.float32)
      img = preprocess_input(img)

      hydrated_images_array.append(img)
      count = count + 1
    except FileNotFoundError:
      fileError = fileError + 1

  print(len(hydrated_images_array))

  for i, directory in triplets['dehydrated_image'].items():
    try:
      img = image.load_img(directory, target_size=(200, 200))
      img = image.img_to_array(img)
      img = tf.image.convert_image_dtype(img, tf.float32)
      img = preprocess_input(img)

      dehydrated_images_array.append(img)
      count = count + 1
    except FileNotFoundError:
      fileError = fileError + 1

  print(len(dehydrated_images_array))

  image_triplets = []

  for i in range(len(anchor_images_array)):
    anchor_img = anchor_images_array[i]
    hydrated_img = hydrated_images_array[i]
    dehydrated_img = dehydrated_images_array[i]
    image_triplets += [[anchor_img, hydrated_img, dehydrated_img]]

  print(len(image_triplets))
  return image_triplets

In [None]:
def make_pairs(triplets):
    pairs = []
    labels = []

    for idx in range(len(triplets)):
      # add a matching example
      x1 = triplets[idx][0]
      x2 = triplets[idx][1]
      pairs += [[x1, x2]]
      labels += [1]

      # add a non-matching example
      x1 = triplets[idx][0]
      x2 = triplets[idx][2]
      pairs += [[x1, x2]]
      labels += [0]

    return np.array(pairs), np.array(labels).astype("float32")

In [None]:
def split_dataset_pair(x, y):
  x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)

  # index 0 has anchor that means x_train_1 has acnhor images only and x_train_2 hydrated or dehydarted
  x_train_1 = x_train[:, 0]
  x_train_2 = x_train[:, 1]

  x_val_1 = x_val[:, 0]
  x_val_2 = x_val[:, 1]

  return (x_train_1, x_train_2, x_val_1, x_val_2, y_train, y_val)

In [None]:
def euclidean_distance(vects):
    x, y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

def manhattan_distance(vects):
    x, y = vects
    return K.sum(K.abs(x - y), axis=1, keepdims=True)

def cosine_distance(vects):
    x, y = vects
    x_norm = tf.nn.l2_normalize(x, axis = 1)
    y_norm = tf.nn.l2_normalize(y, axis = 1)
    cos = tf.math.reduce_sum(x_norm * y_norm, axis = 1, keepdims=True)
    return tf.math.abs(cos)

**Multi Modal Siamese Model**

Embedding for dorsal

left hand dorsal

In [None]:
input = layers.Input((200, 200, 3))
x = tf.keras.layers.BatchNormalization()(input)
x = layers.Conv2D(4, (5, 5), activation="relu")(x)
x = layers.AveragePooling2D(pool_size=(2, 2))(x)
x = layers.Conv2D(16, (5, 5), activation="relu")(x)
x = layers.AveragePooling2D(pool_size=(2, 2))(x)
x = layers.Flatten()(x)

x = tf.keras.layers.BatchNormalization()(x)
x = layers.Dense(10, activation="relu")(x)
embedding_network_dorsal_left = keras.Model(input, x)

right hand dorsal

In [None]:
input2 = layers.Input((200, 200, 3))
x2 = tf.keras.layers.BatchNormalization()(input2)
x2 = layers.Conv2D(5, (5, 5), activation="relu")(x2)
x2 = layers.AveragePooling2D(pool_size=(2, 2))(x2)
x2 = layers.Conv2D(20, (5, 5), activation="relu")(x2)
x2 = layers.AveragePooling2D(pool_size=(2, 2))(x2)
x2 = layers.Flatten()(x2)

x2 = tf.keras.layers.BatchNormalization()(x2)
x2 = layers.Dense(10, activation="relu")(x2)
embedding_network_dorsal_right = keras.Model(input2, x2)

Embedding for wrist

left hand wrist

In [None]:
input3 = layers.Input((200, 200, 3))
x3 = tf.keras.layers.BatchNormalization()(input3)
x3 = layers.Conv2D(5, (5, 5), activation="relu")(x3)
x3 = layers.AveragePooling2D(pool_size=(2, 2))(x3)
x3 = layers.Conv2D(25, (5, 5), activation="relu")(x3)
x3 = layers.AveragePooling2D(pool_size=(2, 2))(x3)
x3 = layers.Flatten()(x3)

x3 = tf.keras.layers.BatchNormalization()(x3)
x3 = layers.Dense(10, activation="relu")(x3)
embedding_network_wrist_left = keras.Model(input3, x3)

right hand wrist

In [None]:
input4 = layers.Input((200, 200, 3))
x4 = tf.keras.layers.BatchNormalization()(input4)
x4 = layers.Conv2D(4, (5, 5), activation="relu")(x4)
x4 = layers.AveragePooling2D(pool_size=(2, 2))(x4)
x4 = layers.Conv2D(16, (5, 5), activation="relu")(x4)
x4 = layers.AveragePooling2D(pool_size=(2, 2))(x4)
x4 = layers.Flatten()(x4)

x4 = tf.keras.layers.BatchNormalization()(x4)
x4 = layers.Dense(10, activation="relu")(x4)
embedding_network_wrist_right = keras.Model(input4, x4)

In [None]:
input_left_dorsal_ref = layers.Input((200, 200, 3))
input_left_dorsal_state = layers.Input((200, 200, 3))

input_right_dorsal_ref = layers.Input((200, 200, 3))
input_right_dorsal_state = layers.Input((200, 200, 3))

input_left_wrist_ref = layers.Input((200, 200, 3))
input_left_wrist_state = layers.Input((200, 200, 3))

input_right_wrist_ref = layers.Input((200, 200, 3))
input_right_wrist_state = layers.Input((200, 200, 3))

input_refs = [input_left_dorsal_ref, input_right_dorsal_ref, input_left_wrist_ref, input_right_wrist_ref]
input_states = [input_left_dorsal_state, input_right_dorsal_state, input_left_wrist_state, input_right_wrist_state]

tower_ref_dorsal_left = embedding_network_dorsal_left(input_refs[0])
tower_state_dorsal_left = embedding_network_dorsal_left(input_states[0])

tower_ref_dorsal_right = embedding_network_dorsal_right(input_refs[1])
tower_state_dorsal_right = embedding_network_dorsal_right(input_states[1])

tower_ref_wrist_left = embedding_network_wrist_left(input_refs[2])
tower_state_wrist_left = embedding_network_wrist_left(input_states[2])

tower_ref_wrist_right = embedding_network_wrist_right(input_refs[3])
tower_state_wrist_right = embedding_network_wrist_right(input_states[3])

# tower_ref feature merge
# number row must be same as before but feature will be merged
tower_ref = layers.Concatenate(axis=-1)([tower_ref_dorsal_left, tower_ref_dorsal_right, tower_ref_wrist_left, tower_ref_wrist_right])
tower_state = layers.Concatenate(axis=-1)([tower_state_dorsal_left, tower_state_dorsal_right, tower_state_wrist_left, tower_state_wrist_right])

merge_layer = layers.Lambda(euclidean_distance)([tower_ref, tower_state])
normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)
output_layer = layers.Dense(1, activation="sigmoid")(normal_layer)

# order in inputs must be maintained
siamese = keras.Model(inputs=[input_refs, input_states], outputs=output_layer)

In [None]:
def loss(margin=1):
    def contrastive_loss(y_true, y_pred):
        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss


In [None]:
siamese.compile(loss=loss(margin=margin), optimizer="RMSprop", metrics=["accuracy"])
siamese.summary()

In [None]:
def load_pairs(users):
  left_hand = "left_hand"
  right_hand = "right_hand"

  dorsal = 'dorsal'
  wrist = 'wrist'

  triplets_left_dorsal = create_triplets(dorsal, left_hand, users)
  triplets_left_dorsal = triplets_left_dorsal[triplets_left_dorsal.notna()]

  triplets_right_dorsal = create_triplets(dorsal, right_hand, users)
  triplets_right_dorsal = triplets_right_dorsal[triplets_right_dorsal.notna()]

  triplets_left_wrist = create_triplets(wrist, left_hand, users)
  triplets_left_wrist = triplets_left_wrist[triplets_left_wrist.notna()]

  triplets_right_wrist = create_triplets(wrist, right_hand, users)
  triplets_right_wrist = triplets_right_wrist[triplets_right_wrist.notna()]

  image_triplets_left_dorsal = load_images(triplets_left_dorsal)
  image_triplets_right_dorsal = load_images(triplets_right_dorsal)

  image_triplets_left_wrist = load_images(triplets_left_wrist)
  image_triplets_right_wrist = load_images(triplets_right_wrist)

  res_left_dorsal = make_pairs(image_triplets_left_dorsal)
  res_right_dorsal = make_pairs(image_triplets_right_dorsal)

  res_left_wrist = make_pairs(image_triplets_left_wrist)
  res_right_wrist = make_pairs(image_triplets_right_wrist)

  pairs_left_dorsal = res_left_dorsal[0]
  labels_left_dorsal = res_left_dorsal[1]

  pairs_right_dorsal = res_right_dorsal[0]
  labels_right_dorsal = res_right_dorsal[1]

  pairs_left_wrist = res_left_wrist[0]
  labels_left_wrist = res_left_wrist[1]

  pairs_right_wrist = res_right_wrist[0]
  labels_right_wrist = res_right_wrist[1]

  print(labels_left_dorsal)
  print(labels_right_dorsal)
  print(labels_left_wrist)
  print(labels_right_wrist)

  pairs = [pairs_left_dorsal, pairs_right_dorsal, pairs_left_wrist, pairs_right_wrist]
  labels = labels_left_dorsal

  return pairs, labels

In [None]:
def landmark_model():
  pairs_train, labels_train = load_pairs(users_train_val)

  (x_train_left_dorsal_ref, x_train_left_dorsal_state, x_val_left_dorsal_ref,
  x_val_left_dorsal_state, y_train_left_dorsal, y_val_left_dorsal) = split_dataset_pair(pairs_train[0], labels_train)

  (x_train_right_dorsal_ref, x_train_right_dorsal_state, x_val_right_dorsal_ref,
  x_val_right_dorsal_state, y_train_right_dorsal, y_val_right_dorsal) = split_dataset_pair(pairs_train[1], labels_train)

  (x_train_left_wrist_ref, x_train_left_wrist_state, x_val_left_wrist_ref,
  x_val_left_wrist_state, y_train_left_wrist, y_val_left_wrist) = split_dataset_pair(pairs_train[2], labels_train)

  (x_train_right_wrist_ref, x_train_right_wrist_state, x_val_right_wrist_ref,
  x_val_right_wrist_state, y_train_right_wrist, y_val_right_wrist) = split_dataset_pair(pairs_train[3], labels_train)

  y_train = y_train_left_dorsal
  y_val = y_val_left_dorsal

  input_train_refs = []
  input_train_states = []

  input_train_refs.append(x_train_left_dorsal_ref)
  input_train_refs.append(x_train_right_dorsal_ref)
  input_train_refs.append(x_train_left_wrist_ref)
  input_train_refs.append(x_train_right_wrist_ref)

  input_train_states.append(x_train_left_dorsal_state)
  input_train_states.append(x_train_right_dorsal_state)
  input_train_states.append(x_train_left_wrist_state)
  input_train_states.append(x_train_right_wrist_state)

  input_val_refs = []
  input_val_states = []

  input_val_refs.append(x_val_left_dorsal_ref)
  input_val_refs.append(x_val_right_dorsal_ref)
  input_val_refs.append(x_val_left_wrist_ref)
  input_val_refs.append(x_val_right_wrist_ref)

  input_val_states.append(x_val_left_dorsal_state)
  input_val_states.append(x_val_right_dorsal_state)
  input_val_states.append(x_val_left_wrist_state)
  input_val_states.append(x_val_right_wrist_state)


  history = siamese.fit(
    [input_train_refs, input_train_states], y_train,
    validation_data=([input_val_refs, input_val_states], y_val),
    batch_size=batch_size,
    epochs=epochs,
  )

  # load test data
  pairs_test, labels_test = load_pairs(users_test)

  x_test_left_dorsal_ref = pairs_test[0][:, 0]
  x_test_left_dorsal_state = pairs_test[0][:, 1]

  x_test_right_dorsal_ref = pairs_test[1][:, 0]
  x_test_right_dorsal_state = pairs_test[1][:, 1]

  x_test_left_wrist_ref = pairs_test[2][:, 0]
  x_test_left_wrist_state = pairs_test[2][:, 1]

  x_test_right_wrist_ref = pairs_test[3][:, 0]
  x_test_right_wrist_state = pairs_test[3][:, 1]

  input_test_refs = []
  input_test_states = []

  input_test_refs.append(x_test_left_dorsal_ref)
  input_test_refs.append(x_test_right_dorsal_ref)
  input_test_refs.append(x_test_left_wrist_ref)
  input_test_refs.append(x_test_right_wrist_ref)

  input_test_states.append(x_test_left_dorsal_state)
  input_test_states.append(x_test_right_dorsal_state)
  input_test_states.append(x_test_left_wrist_state)
  input_test_states.append(x_test_right_wrist_state)

  predictions = siamese.predict([input_test_refs, input_test_states])

  return history, predictions, labels_test

In [None]:
# Function to calculate evaluation metrics
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    return accuracy, precision, recall, f1

In [None]:
history, predictions, y_test = landmark_model()
evaluate_model(y_test, predictions)