In [2]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import cv2
import os
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from facenet_pytorch import MTCNN, InceptionResnetV1

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Siamese Network definition (matches your trained model)
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.cnn = InceptionResnetV1(pretrained="vggface2").eval()

    def forward_one(self, x):
        return self.cnn(x)

    def forward(self, x1, x2):
        emb1 = self.forward_one(x1)
        emb2 = self.forward_one(x2)
        return emb1, emb2

# Initialize MTCNN
mtcnn = MTCNN(keep_all=False, device=device)

# Face extraction function
def extract_face(image_path, target_size=(160, 160), margin=20):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Failed to load {image_path}")
        return None
    
    # Convert to RGB for MTCNN
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    boxes, _ = mtcnn.detect(img_rgb)
    
    if boxes is not None and len(boxes) > 0:
        x1, y1, x2, y2 = map(int, boxes[0])
        # Add margin and clamp to image bounds
        x1 = max(0, x1 - margin)
        y1 = max(0, y1 - margin)
        x2 = min(img.shape[1], x2 + margin)
        y2 = min(img.shape[0], y2 + margin)
        
        # Check for valid crop
        if x2 <= x1 or y2 <= y1:
            print(f"Invalid crop coordinates for {image_path}: ({x1}, {y1}, {x2}, {y2})")
            return None
        
        # Crop face (BGR format)
        face = img[y1:y2, x1:x2]
        # Convert to RGB PIL Image
        face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        face_pil = Image.fromarray(face_rgb)
        
        # Resize with aspect ratio preservation
        old_size = face_pil.size  # (width, height)
        ratio = min(target_size[0] / old_size[0], target_size[1] / old_size[1])
        new_size = (int(old_size[0] * ratio), int(old_size[1] * ratio))
        face_pil = face_pil.resize(new_size, Image.LANCZOS)
        # Pad to target size
        new_image = Image.new("RGB", target_size, (128, 128, 128))  # Gray padding
        new_image.paste(face_pil, ((target_size[0] - new_size[0]) // 2, (target_size[1] - new_size[1]) // 2))
        
        return new_image
    else:
        print(f"No face detected in {image_path}")
        return None

# Preprocessing transform for model input
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# load images from path not  extracting with mtcnn 
def load_cropped_face(image_path, target_size=(160, 160)):
    try:
        img = Image.open(image_path).convert('RGB')
        img = img.resize(target_size, Image.LANCZOS)
        return img
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

# Manual testing function
def test_manual_pair(model, id_image_path, selfie_image_path, transform):
    model.eval()
    
    #uses mtcnn to first extarct a face
    id_img = extract_face(id_image_path)
    selfie_img = extract_face(selfie_image_path)
    
    # loads extracted images
    # id_img = load_cropped_face(id_image_path)
    # selfie_img = load_cropped_face(selfie_image_path)
    # Check if extraction succeeded
    if id_img is None or selfie_img is None:
        print("Face extraction failed for one or both images. Aborting.")
        return
    
    # # Save raw faces for debugging
    # id_img.save("debug_id_extracted.png")
    # selfie_img.save("debug_selfie_extracted.png")
    # print("Extracted faces saved as 'debug_id_extracted.png' and 'debug_selfie_extracted.png'")
    
    # Prepare display images (grayscale)
    id_img_display = id_img.convert("L")
    selfie_img_display = selfie_img.convert("L")
    
    # Prepare model input
    id_tensor = transform(id_img).unsqueeze(0).to(device)
    selfie_tensor = transform(selfie_img).unsqueeze(0).to(device)
    
    # Debug shapes
    print(f"ID tensor shape: {id_tensor.shape}")
    print(f"Selfie tensor shape: {selfie_tensor.shape}")
    
    # Get embeddings
    with torch.no_grad():
        emb1, emb2 = model(id_tensor, selfie_tensor)
    
    # Compute distance and similarity
    dist = torch.sqrt(torch.sum((emb1 - emb2) ** 2) + 1e-10).item()
    similarity = max(0, 100 * (1 - dist / 1.5))  # Heuristic, tune if needed
    
    # Display images and similarity
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.imshow(id_img_display, cmap="gray")
    ax1.set_title("ID Image")
    ax1.axis("off")
    ax2.imshow(selfie_img_display, cmap="gray")
    ax2.set_title("Selfie Image")
    ax2.axis("off")
    plt.suptitle(f"Similarity: {similarity:.2f}%", fontsize=16)
    # plt.savefig("similarity_plot.png")
    # print("Plot saved as 'similarity_plot.png'")
    plt.show(block=True)
    
    print(f"Distance: {dist:.4f}, Similarity: {similarity:.2f}%")

Using device: cuda


  from .autonotebook import tqdm as notebook_tqdm
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)


In [3]:
if __name__ == "__main__":
    # Load trained model
    model = SiameseNetwork().to(device)
    model.load_state_dict(torch.load("siamese_facenet.pth"))
    model.eval()

    # Test pair
    test_id_image = r"D:/Projects/PhotosWorkl/extracted_id_faces/person_116_id_face.jpg"
    test_selfie_image = r"D:/Projects/finalGPT/originals/1191/1191-5.jpg"
    

    # Test with face extraction
    test_manual_pair(model, test_id_image, test_selfie_image, transform)


  state_dict = torch.load(cached_file)
  model.load_state_dict(torch.load("siamese_facenet.pth"))


ID tensor shape: torch.Size([1, 3, 160, 160])
Selfie tensor shape: torch.Size([1, 3, 160, 160])
Distance: 1.7323, Similarity: 0.00%


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

def evaluate_siamese_on_csv(model, csv_path, transform, threshold=1.0, save_path=None):
    """
    Evaluate Siamese model on a dataset of pairs (CSV format).
    
    Args:
        model: Trained Siamese model (nn.Module).
        csv_path: Path to CSV file with [id_path, selfie_path, label].
        transform: Transform to preprocess input images.
        threshold: Distance threshold for deciding match (default 1.0).
        save_path: If not None, saves confusion matrix to this path.
    """
    
    # Load CSV
    df = pd.read_csv(csv_path)
    if list(df.columns)[:3] == [0, 1, 2]:  # no header
        df = pd.read_csv(csv_path, header=None, names=["id_path", "selfie_path", "label"])
    else:
        # Normalize expected names
        col_map = {c.lower(): c for c in df.columns}
        df = df.rename(columns={
            col_map.get("id_path", list(df.columns)[0]): "id_path",
            col_map.get("selfie_path", list(df.columns)[1]): "selfie_path",
            col_map.get("label", list(df.columns)[2]): "label"
        })
    
    all_labels, all_preds = [], []
    
    for _, row in df.iterrows():
        id_path = row["id_path"]
        selfie_path = row["selfie_path"]
        label = int(row["label"])
        
        id_img = load_cropped_face(id_path)   # use your load function (no mtcnn)
        selfie_img = load_cropped_face(selfie_path)
        
        if id_img is None or selfie_img is None:
            print(f"Skipping {id_path}, {selfie_path}")
            continue
        
        id_tensor = transform(id_img).unsqueeze(0).to(device)
        selfie_tensor = transform(selfie_img).unsqueeze(0).to(device)
        
        with torch.no_grad():
            emb1, emb2 = model(id_tensor, selfie_tensor)
            dist = torch.sqrt(torch.sum((emb1 - emb2) ** 2) + 1e-10).item()
            pred = 1 if dist < threshold else 0
        
        all_labels.append(label)
        all_preds.append(pred)
    
    # Confusion matrix
    cm = np.zeros((2, 2), dtype=int)
    for t, p in zip(all_labels, all_preds):
        cm[t, p] += 1
    
    accuracy = (cm[0, 0] + cm[1, 1]) / np.sum(cm)
    print("Confusion Matrix:\n", cm)
    print(f"Accuracy: {accuracy*100:.2f}%")
    
    # Plot confusion matrix
    fig, ax = plt.subplots()
    im = ax.imshow(cm, cmap="Blues")
    
    ax.set_xticks([0, 1])
    ax.set_yticks([0, 1])
    ax.set_xticklabels(["Pred 0 (Different)", "Pred 1 (Same)"])
    ax.set_yticklabels(["True 0 (Different)", "True 1 (Same)"])
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    plt.colorbar(im, ax=ax)
    
    for i in range(2):
        for j in range(2):
            ax.text(j, i, cm[i, j], ha="center", va="center", color="red")
    
    plt.title(f"Confusion Matrix - Siamese (Threshold={threshold})")
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches="tight")
        print(f"Confusion matrix saved to {save_path}")
    else:
        plt.show()


In [5]:
# Evaluate with your Siamese network on test pairs
evaluate_siamese_on_csv(
    model, 
    "D:/Projects/PhotosWorkl/train_pairs_balanced.csv", 
    transform=transform,
    threshold=1.0, 
    save_path="siamese_confusion_matrix_Pretraind_OWN_DAT_ONDEVELOPMENTDATA.png"
)

Confusion Matrix:
 [[1569  760]
 [ 831 1498]]
Accuracy: 65.84%
Confusion matrix saved to siamese_confusion_matrix_Pretraind_OWN_DAT_ONDEVELOPMENTDATA.png
