### **Environment Setup: Install and import required package**
---

In [None]:
# Install the pakage
!pip install -e .

In [None]:
# Import module
from utils.helpers import load_config, save_model, train_model, train_model_kfold
from utils.model_evaluation import draw_plot_find_acc
from models.Triplet_Siamese_Similarity_Network import tSSN
from losses.triplet_loss import TripletLoss
from dataloader.tSSN_trainloader import SignatureTrainDataset

print("Package installed and imported successfully!")

In [None]:
import os
import yaml
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader,Subset
import numpy as np
import matplotlib.pyplot as plt
import random

# Set global seed for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

---
### **Setup: Load configuration and prepare training dataset**
---

In [None]:
# Transform chung
transform = transforms.Compose([
    transforms.Resize((220, 150)),
    transforms.Grayscale(),  # Đảm bảo ảnh 1 kênh xám
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

dataset = SignatureTrainDataset(
    org_dir=r'/kaggle/input/cedardataset/signatures/full_org',
    forg_dir=r'/kaggle/input/cedardataset/signatures/full_forg',
    transform=transform
)

train_size = int(0.8 * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_size, len(dataset) - train_size]
)

train_loader = DataLoader(train_dataset, num_workers=4, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, num_workers=4, batch_size=32, shuffle=False)

In [None]:
# Check Dataset
print(f"Dataset - Total triplets: {len(dataset)}")

anchor, positive, negative = dataset[0]

print(f"Anchor shape: {anchor.shape}")
print(f"Positive shape: {positive.shape}")
print(f"Negative shape: {negative.shape}")

print(f"Training dataset: {len(train_dataset)}")

anchor_train, positive_train, negative_train = train_dataset[0]

print(f"Anchor shape: {anchor_train.shape}")
print(f"Positive shape: {positive_train.shape}")
print(f"Negative shape: {negative_train.shape}")

print(f"Testing dataset: {len(test_dataset)}")

anchor_test, positive_test, negative_test = test_dataset[0]

print(f"Anchor shape: {anchor_test.shape}")
print(f"Positive shape: {positive_test.shape}")
print(f"Negative shape: {negative_test.shape}")

print("LOAD DATASET SUCCESSFULLY")
print(f"Train-Test Split Ratio: {len(train_dataset)/len(dataset)*100:.1f}% - {len(test_dataset)/len(dataset)*100:.1f}%")

In [None]:
# Load experiment configuration (e.g., training settings)
config = load_config(r'/kaggle/working/Deep_Learning-Based_Signature_Forgery_Detection_for_Personal_Identity_Authentication/configs/config_tSSN.yaml')
print(config)
print("LOAD CONFIG SUCCESSFULLY")

---
### **Experiment: Triplet Loss with different distance modes and margins (5-fold CV)**
---

In [None]:
# Define the different distance metrics to be used in triplet loss
modes = ['euclidean', 'cosine', 'manhattan', 'learnable']

# Define a range of margin values for static (non-learnable) triplet loss
margins = np.arange(0.2, 1.01, 0.2)

# Dictionary to store cross-validation results for each configuration
results_dict:dict = {}

# Iterate through each distance mode
for mode in modes:
    if mode == 'learnable':
        print(f"\nTraining mode: {mode} | margin: learnable")

        # Initialize Triplet Loss with learnable margin
        loss_fn = TripletLoss(
                margin=margin, # `margin` should be defined globally if using learnable
                mode=mode,
                input_dim=config['model']['feature_dim']
        )
        
        # Perform K-Fold cross-validation training
        mean_acc, mean_loss = train_model_kfold(
            config=config,
            loss_fn=loss_fn,
            dataset=train_dataset,
            k_folds=10
        )

        results_dict[f"{mode}"] = {
            'mean_acc': mean_acc,
            'mean_loss': mean_loss
        }
    else:
        for margin in margins:
            print(f"\nTraining mode: {mode} | margin: {margin:.1f}")
                    
            # Initialize Triplet Loss with fixed margin
            loss_fn = TripletLoss(
                margin=margin,
                mode=mode,
                input_dim=config['model']['feature_dim']
            )

            # Perform K-Fold cross-validation training
            mean_acc, mean_loss = train_model_kfold(
                config=config,
                loss_fn=loss_fn,
                dataset=train_dataset,
                k_folds=10
            )
            results_dict[f"{mode}_{margin:.1f}"] = {
                'mean_acc': mean_acc,
                'mean_loss': mean_loss
            }   

In [None]:
# Visualize accuracy results and extract the best-performing configuration
best_params = draw_plot_find_acc(results_dict)

---
### **Final Training with Best Parameters and Model Export**
---

In [None]:
# Re-train model on full dataset using best parameters
best_mode = best_params['mode']
best_margin = best_params['margin']
save_path = f"/kaggle/working/final"

model = tSSN(config['model']['backbone'], config['model']['feature_dim'])
device = torch.device(config['device'] if torch.cuda.is_available() else "cpu")
model.to(device)

if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model, device_ids=[0, 1])

loss_fn = TripletLoss(
    margin=0 if best_mode == 'learnable' else best_margin,
    mode=best_mode,
    input_dim=config['model']['feature_dim']
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=config['training']['learning_rate'])

# Final training (use full dataset, no cross-validation)
model, final_train_loss = train_model(
    model=model,
    loss_fn=loss_fn,
    optimizer=optimizer,
    train_loader=train_loader,
    num_epochs=config['training']['num_epochs'],
    device=device,
    early_stop=config['training']['early_stop']
)

# Save model to disk (e.g., for Kaggle submission or later inference)
save_model(model=model,
        dir= save_path,     
        optimizer=optimizer,
        avg_loss=final_train_loss,
        model_name=f"tSNN_{best_mode}_{best_margin:.1f}")
           

---
### **Export Model and Upload to Kaggle**
---

In [None]:
#Up load model
import kagglehub

kagglehub.login()

kagglehub.model_upload(
    handle="...",
    local_model_dir="/kaggle/working/final",
    version_notes="Upload latest model with best parameters",
)