# Libraries import

In [1]:
import tensorflow as tf 
import numpy as np
import os 
import cv2
import random
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input,Flatten,Dense,Dropout,Lambda
from tensorflow.keras.regularizers import l2


# Configuration

In [2]:
# Defining image dimension
img_width = 128
img_height = 128

# Defining size of face embedding vector
Embedding_size = 128

# The number of triplets per training step
Batch_size = 32

# Data set path
dataset_path = 'celebrity'
train_path = os.path.join(dataset_path,"Train")
test_path = os.path.join(dataset_path,"Test")


# Data Loading and Prepration

In [3]:
def load_data(path):
    """ Loads images and organizes them by person """
    people = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path,d))]
    data = {}
    for person in people:
        person_path = os.path.join(path,person)
        images = [os.path.join(person_path,f) for f in os.listdir(person_path)]
        data[person] = images

    return data


def preprocess_image(filepath):
    """ Reads , resizes, and normalizes an image."""
    image = cv2.imread(filepath)
    if image is None:
        return None
    image = cv2.resize(image,(img_width,img_height))
    image = image.astype('float32')/255.0
    return image


# Model Architecture

In [4]:
def create_embedding_model(input_shape):
    """ 
        Creates a ResNet50-base model for generating face embeddings.
    
    """
    # We removed the dense layers that means fully connected layers that are on top
    base_model = ResNet50(weights='imagenet',include_top=False,input_shape=input_shape)

    # Freeze the layers of the pre-trained model
    # because we don't want to train from scratch
    for layers in base_model.layers:
        layers.trainable = False

    # Add Embedding Layers
    x = base_model.output
    x = Flatten()(x)
    x = Dense(512,activation = 'relu',kernel_regularizer = l2(0.01))(x)
    x = Dropout(0.5)(x)

    # The Final Embedding Layer
    embedding = Dense(Embedding_size,activation=None)(x)

    # Create the model
    model = Model(inputs = base_model.input,outputs = embedding)

    return model



# Training With Triplet loss

In [5]:
def get_triplet_loss(margin=0.2):
    """ 
        Defines the Triplet Loss Function
    """
    def triplet_loss(y_true,y_pred):
        anchor = y_pred[:, :Embedding_size]
        positive = y_pred[:,Embedding_size:Embedding_size*2]
        negative = y_pred[:, Embedding_size*2:]

        # Calculate the distance between the anchor and the postive image
        pos_dist = tf.reduce_sum(tf.square(anchor - positive),axis = -1)

        # Calculate the distance between the anchor and the negative image
        neg_dist = tf.reduce_sum(tf.square(anchor - negative),axis = -1)
    
        # Calculate the loss using formula max(0,pos_dist - neg_dist + margin)
        # if negative_dist is greater that means the model is learning right in that case the formula will choose 0
        # if postive+margin is greater than negative then it will chosse the difference between these that will be the loss

        loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
        return tf.reduce_mean(loss)
    
    return triplet_loss

def generate_triplets(data,num_triplets,embedding_model):
    """ Generates a batch of triplets for training """
    triplets = []
    people = list(data.keys())
    for _ in range(num_triplets):
        # Anchor and Positive images are from teh same person
        person_anchor = random.choice(people)
        images_same_person = data[person_anchor]

        # Ensure there are at least two images for the same person
        if len(images_same_person)<2:
            continue

        anchor_path , positive_path = random.sample(images_same_person,2)

        # Negative image is from a different Person
        people_other = [p for p in people if p!= person_anchor]
        if not people_other:
            continue

        person_negative = random.choice(people_other)
        negative_path = random.choice(data[person_negative])


        # Preprocessing Images

        anchor_img = preprocess_image(anchor_path)
        positive_img = preprocess_image(positive_path)
        negative_img = preprocess_image(negative_path)

        if anchor_img is not None and positive_img is not None and negative_img is not None:
            triplets.append((anchor_img,positive_img,negative_img))

    # Convert to numpy array for training
    if not triplets:
        return np.zeros((1,img_width,img_height,3)),np.zeros((1,img_width,img_height,3)),np.zeros((1,img_width,img_height,3))
    
    anchors = np.array([t[0] for t in triplets])
    positives = np.array([t[1] for t in triplets])
    negatives = np.array([t[2] for t in triplets])

    return anchors,positives,negatives



# Face Recognition

In [6]:
def get_embedding(model,image_path):
    """ Generates and embedding for a single image."""
    image = preprocess_image(image_path)
    image = np.expand_dims(image,axis = 0)
    embedding = model.predict(image)
    return embedding[0]

def verify_faces(model,data,threshold=0.5):
    """ Performs face verification on a test set."""
    people  = list(data.keys())

    print("\n-------- Verifying Faces ------------")

    # Generate anchor embeddings for each person in the test set
    anchor_embeddings = {}
    for person in people:
        images = data[person]
        if images:
            # use the first image of a person as a reference anchor
            anchor_path = images[0]
            anchor_embeddings[person] = get_embedding(model,anchor_path)

    for person_test in people:
        print(f"\nVerifying images for {person_test} : ")
        # Get all images for a person from teh test set
        test_paths = data[person_test][1:] # use remaining images for testing

        for test_path in test_paths:
            test_embedding = get_embedding(model,test_path)

            # Compare test image to its own person's anchor

            distance = np.linalg.norm(test_embedding-anchor_embeddings[person_test])

            if distance < threshold:
                print(f" Mathch : {test_path.split("/")[-1]} -> Same Person ({person_test}) . Distance : {distance:.4f}")
            else:
                print(f"  No Match: {test_path.split('/')[-1]} -> Different person. Distance: {distance:.4f}")


def identify_person(model,gallery_data,unknown_image_path, threshold = 0.5):
    """ 
        Identifies a person from a new image against a gallery of known faces.
    
        Args:
            model (tf.keras.Model): The trained embedding model.
            gallery_data (dict): A dictionary of known people and their image paths.
            unknown_image_path (str): The file path to the new, unknown image.
            threshold (float): The maximum distance for a positive match.
        
        Returns:
            str: The name of the identified person or "Unknown".
        
    
    """
    print(f"\n--- Identifying {os.path.basename(unknown_image_path)} ---")

    # 1. Generate embedding for the unknown face
    unknown_embedding = get_embedding(model,unknown_image_path)
    if unknown_embedding is None:
        return "Unknown"
    
    # 2. Generate a gallery of embedding for know faces
    known_embeddings = {}
    for person, images in gallery_data.items():
        # use the first image of each person as the reference
        if images:
            known_embeddings[person] = get_embedding(model,images[0])

    #3. Find the Closes match
    best_match_person = "Unknown"
    min_distance = float('inf')

    for person, embedding in known_embeddings.items():
        if embedding is not None:

            distance = np.linalg.norm(unknown_embedding - embedding)

            # Check if this is the closest match so far
            if distance < min_distance:
                min_distance = distance
                best_match_person = person

    #4. Check against the threshold
    if min_distance<threshold:
        print(f" Best Mathc : {best_match_person} with distace {min_distance:.4f} (Below Threshold)")
        return best_match_person
    else:
        print(f"  No match found. Closest is {best_match_person} with distance {min_distance:.4f} (Above threshold)")
        return "Unknown" 
                

# Main Execution

In [8]:
if __name__ == "__main__":
    # Load the data from Train and Test Folders
    train_data = load_data(train_path)
    test_data = load_data(test_path)

    # Create the embedding model
    embedding_model = create_embedding_model(input_shape = (img_width,img_height,3))

    # Create the training model taht takes three inputs and passes them to the embedding model
    input_anchor = Input(shape=(img_width,img_height,3),name = "input_anchor")
    input_positive = Input(shape=(img_width,img_height,3),name = "input_positive")
    input_negative = Input(shape=(img_width, img_height, 3), name='input_negative')
    
    embedding_anchor = embedding_model(input_anchor)
    embedding_positive = embedding_model(input_positive)
    embedding_negative = embedding_model(input_negative)

    # Concatenate the embeddings for the triplet loss function

    merged_output = Lambda(lambda x: tf.concat(x, axis = 1))([embedding_anchor, embedding_positive, embedding_negative])

    triplet_model = Model(inputs=[input_anchor, input_positive, input_negative], outputs=merged_output)

    # Compile the model with our custom Triplet Loss
    triplet_model.compile(optimizer='adam', loss=get_triplet_loss())
    
    print("\n--- Starting Training ---")
    
    # Custom training loop
    EPOCHS = 10
    STEPS_PER_EPOCH = 50
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch+1}/{EPOCHS}")
        for step in range(STEPS_PER_EPOCH):
            # Generate a batch of triplets
            anchors, positives, negatives = generate_triplets(train_data, Batch_size, embedding_model)
            
            # Train the model on the triplet batch
            loss = triplet_model.train_on_batch(
                [anchors, positives, negatives],
                y=np.zeros(len(anchors)) # y_true is ignored by triplet loss, so we use dummy data
            )
            print(f"  Step {step+1}/{STEPS_PER_EPOCH}, Loss: {loss:.4f}")
            
    print("\n--- Training Complete ---")
    
   
   


--- Starting Training ---
Epoch 1/10
  Step 1/50, Loss: 19.9206
  Step 2/50, Loss: 19.3309
  Step 3/50, Loss: 27.5612
  Step 4/50, Loss: 28.3178
  Step 5/50, Loss: 31.7409
  Step 6/50, Loss: 35.7341
  Step 7/50, Loss: 41.0150
  Step 8/50, Loss: 41.6133
  Step 9/50, Loss: 42.2676
  Step 10/50, Loss: 45.7333
  Step 11/50, Loss: 45.1018
  Step 12/50, Loss: 45.7017
  Step 13/50, Loss: 43.6471
  Step 14/50, Loss: 43.2570
  Step 15/50, Loss: 41.9231
  Step 16/50, Loss: 40.8208
  Step 17/50, Loss: 39.6795
  Step 18/50, Loss: 38.5118
  Step 19/50, Loss: 37.8372
  Step 20/50, Loss: 37.0636
  Step 21/50, Loss: 36.2066
  Step 22/50, Loss: 35.0594
  Step 23/50, Loss: 34.2588
  Step 24/50, Loss: 33.2150
  Step 25/50, Loss: 32.1551
  Step 26/50, Loss: 31.2609
  Step 27/50, Loss: 30.2490
  Step 28/50, Loss: 29.2580
  Step 29/50, Loss: 28.3494
  Step 30/50, Loss: 27.4502
  Step 31/50, Loss: 26.6147
  Step 32/50, Loss: 25.8153
  Step 33/50, Loss: 25.0579
  Step 34/50, Loss: 24.3413
  Step 35/50, Loss:

In [9]:
# Evaluate the model on the test data (1:1 Verification)
verify_faces(embedding_model, test_data)

# Demonstrate Face Identification (1:N Matching)
# We will use an image from a person in the test set as our "unknown" face
# and try to identify it against the entire database.

# Get a random person and a random image from that person for identification
if test_data and test_data['Angelina Jolie'] and len(test_data['Angelina Jolie']) > 1:
    unknown_image_path = test_data['Angelina Jolie'][1] # Using the second image as a test
    
    # We pass the entire test_data as our gallery of known faces
    identified_person = identify_person(embedding_model, test_data, unknown_image_path)
    
    print(f"Final Identification for {os.path.basename(unknown_image_path)}: {identified_person}")
else:
    print("\nCould not perform identification demonstration. Test data not structured as expected.")



-------- Verifying Faces ------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━