In [2]:
import os
import cv2
import tempfile
from deepface import DeepFace
from urllib.request import urlopen
import shutil
import pandas as pd
import numpy as np
from PIL import Image, ImageEnhance
import pandas as pd
from src import logger

## method-2


In [3]:
columns = ['image_path', 'hash', 'score', 'no_of_occurance' , 'recent_occurance']

# Create an empty DataFrame with the specified columns
df = pd.DataFrame(columns=columns)

In [None]:
from src import logger
def handle_unique(output_directory , face_path  , j , model_name , distance_metric , score):
    
    """
    Handles the faces that are unique by creating a new row for unique image.
    This is done by extracting the hash using deepface.find() of the face image
    
    Args:
        output_directory (str): returned by deepface.find() function.
        face_path(str) : Path of the current unique face
        j (int): Current video number which we are iterating
        
    
    Returns:
        None: Just updates the df
    """
    
    global df
    
    logger.info("Adding unique image in the dataframe...")

    # Finding hash of image
    hash = DeepFace.find(
            img_path=face_path,
            db_path=output_directory,
            model_name=model_name,
            threshold = 0.8,
            distance_metric=distance_metric
        )[0].iloc[0]['hash']
    
    
    # Creating new row
    new_row = {
        'image_path' : face_path,
        'hash' : hash,
        'score' : score,
        'no_of_occurance' : 1,
        'recent_occurance': j
    }

    # Update the DataFrame i.e add the new row
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

    logger.info(f"New row for unique image added successfully !!! \n {new_row} ")


def handle_not_unique(results  , score , j):
    """
    Handles the faces that are not unique by updating the score for most similar image.
    This is done by extracting the hash of the most similar image from the results datatype.

    
    Args:
        results: res
        output_directory (str): returned by deepface.find() function.
        score (int): perfomance score of the current video
        j (int): Current video number which we are iterating
        
    
    Returns:
        None: Just updates the df
    """

    global df

    hash = results[0].iloc[0]['hash']

    logger.info(f"Handling Duplicate image with hash: {hash} and j: {j}")

    if df.loc[df['hash'] == hash, 'recent_occurance'].iloc[0] == j:
        logger.info("This face has already occured in this video")
        return

    
    # Update the 'score' and 'no_of_occurances' aswell as 'recent_occurance'  of the row where 'hash' is hash
    df.loc[df['hash'] == hash, 'score'] += score
    df.loc[df['hash'] == hash, 'no_of_occurance'] += 1
    df.loc[df['hash'] == hash, 'recent_occurance'] = j
    
    logger.info("Duplicate image row has been updated !!!")
    


def process_video_frames(frame_directory,j, metric , output_directory="unique_faces", 
                         model_name="Facenet", 
                         distance_metric="cosine", 
                         max_unique_faces = 4,
                         threshold=0.8):
    """
    Process frames from a directory to extract and store unique faces using DeepFace
    
    Args:
        frame_directory (str): Path to directory containing video frames
        output_directory (str): Path to store unique faces
        model_name (str): Face recognition model to use
        distance_metric (str): Metric for face comparison
        threshold (float): Similarity threshold for unique face detection
    """
    # Create directories
    os.makedirs(output_directory, exist_ok=True)
    os.makedirs("temp_unique_faces", exist_ok=True)

    # Track unique faces
    unique_faces_count = 0


    # Iterate through frames
    for frame_filename in sorted(os.listdir(frame_directory)):
        if frame_filename.endswith(('.jpg', '.png', '.jpeg')):
            frame_path = os.path.join(frame_directory, frame_filename)
            
            try:
                # Detect faces using DeepFace
                detections = DeepFace.extract_faces(
                    frame_path, 
                    detector_backend="mtcnn", 
                    enforce_detection=True,
                    align=True
                )
                
                print(f"found faces in this frame")
                
                # Process each detected face
                for i, detection in enumerate(detections):
                    if unique_faces_count >= max_unique_faces:
                        return
                    
                    # Extract the face
                    if detection['confidence'] > 0.95:
                        face = detection['face']
                    else:
                        print("low face confidence")
                        continue
                    
                    # Ensure face is valid
                    if face is None or face.size == 0:
                        print(f"Invalid face detected in {frame_filename}")
                        continue
                    
                    # Convert to uint8 if needed
                    if face.dtype != np.uint8:
                        face = (255 * face).astype(np.uint8)
                    
                    # Convert to PIL Image
                    pil_face = Image.fromarray(face)
                    
                    # Enhance face image
                    enhanced_face = enhance_face_image(pil_face)
                    
                    # Temporary path for current face
                    temp_face_path = os.path.join("temp_unique_faces", f"temp_face.jpg")
                    enhanced_face.save(temp_face_path)

                    
                    

                    # Check if the face is unique
                    is_unique , results = check_unique_face(
                        temp_face_path, 
                        output_directory, 
                        model_name, 
                        distance_metric, 
                        threshold
                    )
                    
                    # Save unique face
                    if is_unique:
                        print("unique face found")
                        unique_filename = f"unique_face_{unique_faces_count}_{j}_{frame_filename}"
                        unique_path = os.path.join(output_directory, unique_filename)
                        print(f"unique_path : {unique_path}")
                        enhanced_face.save(unique_path)
                        unique_faces_count += 1
                        
                        # Call the function to update the df
                        score = metric[j-1]
                        handle_unique(output_directory , unique_path , j , model_name , distance_metric , score)
                    else:
                        # Face is not unique and already exists
                        logger.info("face is not unique")
                        
                        score = metric[j-1]
                        if results is not None:
                            
                            # Call the function to update the df
                            logger.info("result variable is not none and calling the handle_not_unique function")
                            handle_not_unique(results , score , j)
                
                    
                    # Clean up temp face
                    os.remove(temp_face_path)
            
            except Exception as e:
                print(f"Error processing {frame_filename} : No face in this image or frame")

def check_unique_face(face_path, output_directory, model_name, distance_metric, threshold):
    """
    Check if the face is unique compared to existing faces in the output directory
    
    Args:
        face_path (str): Path to the current face image
        output_directory (str): Directory containing existing unique faces
        model_name (str): Face recognition model
        distance_metric (str): Metric for face comparison
        threshold (float): Similarity threshold
    
    Returns:
        bool: True if face is unique, False otherwise
    """
    # If output directory is empty, face is unique
    if not os.listdir(output_directory):
        return True , None
    
    try:
        # Use DeepFace to find similar faces
        results = DeepFace.find(
            img_path=face_path,
            db_path=output_directory,
            model_name=model_name,
            threshold = 0.8,
            distance_metric=distance_metric
        )
        
        # Check if any similar faces are found
        if results is None or (isinstance(results, pd.DataFrame) and results.empty):
            return True , None
        
        # If results exist, check the distance
        if isinstance(results, list) and len(results) > 0:
            # Get the minimum distance
            min_distance = results[0]['distance'].min() if not results[0].empty else float('inf')
            return min_distance > threshold , (None if min_distance > threshold else results)
        

        return False , results
    
    except ValueError as e:
        # If an error occurs (e.g., no faces found), consider it unique
        print(f"Face comparison error: {e}")
        return False , None

def check_unique_face2(face_path, output_directory, model_name, distance_metric, threshold=0.6):
    """
    Improved unique face checking using multiple verification methods
    """
    # If output directory is empty, face is unique
    existing_faces = os.listdir(output_directory)
    if not existing_faces:
        return True
    
    try:
        # Iterate through existing faces and verify each
        for existing_face in existing_faces:
            existing_face_path = os.path.join(output_directory, existing_face)
            
            try:
                # Use verify method for more precise comparison
                verification_result = DeepFace.verify(
                    img1_path=face_path,
                    img2_path=existing_face_path,                    
                    threshold=0.8  # Adjusted threshold
                )
                
                # If verification returns True (similar face found), it's not unique
                if verification_result['verified']:
                    return False
            
            except Exception as verify_error:
                # If verification fails for a specific image, continue checking others
                print(f"Verification error with {existing_face}: {verify_error}")
                continue
                #return False
        
        # If no similar faces found after checking all existing faces
        return True
    
    except Exception as e:
        print(f"Unique face check error: {e}")
        return False

def enhance_face_image(pil_image, resize_dim=(256, 256)):
    """
    Enhance face image quality
    
    Args:
        pil_image (PIL.Image): Input face image
        resize_dim (tuple): Target resize dimensions
    
    Returns:
        PIL.Image: Enhanced face image
    """
    # Resize image
    resized_image = pil_image.resize(resize_dim, Image.LANCZOS)
    
    # Enhance sharpness
    sharpness_enhancer = ImageEnhance.Sharpness(resized_image)
    sharpened_image = sharpness_enhancer.enhance(2.0)
    
    # Enhance contrast
    contrast_enhancer = ImageEnhance.Contrast(sharpened_image)
    enhanced_image = contrast_enhancer.enhance(1.2)
    
    return enhanced_image

# Function to create the main directory for storing unique faces
def create_main_directory(directory="unique_faces"):
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory

# Function to create/clear the extracted frames directory
def create_extracted_frames_directory(directory="extracted_frames"):
    if os.path.exists(directory):
        shutil.rmtree(directory)  # Remove existing frames
    os.makedirs(directory)
    return directory

# Function to extract frames from a video
def extract_frames(video_path, frame_count=20):
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    interval = max(total_frames // frame_count, 1)  # Calculate frame interval
    
    for i in range(frame_count):
        frame_index = i * interval
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
    cap.release()
    return frames
# Function to process videos and extract unique faces
def process_videos(video_urls, metric , main_dir="unique_faces", frame_count=30, model_name="Facenet", distance_metric="cosine", threshold=0.4):
    main_dir = create_main_directory(main_dir)
    
    # Iterate over each video URL
    j = 1
    for video_url in video_urls:
        print(f"Processing video: {video_url}")

        # Create/clear the extracted frames directory for each video
        extracted_frames_dir = create_extracted_frames_directory("extracted_frames")
        
        # Download the video to a temporary file
        with tempfile.NamedTemporaryFile(delete=True, suffix=".mp4") as temp_video:
            temp_video.write(urlopen(video_url).read())
            temp_video.flush()

            # Extract frames from the video
            frames = extract_frames(temp_video.name, frame_count=frame_count)
            
            # Store extracted frames temporarily in the extracted_frames directory
            for frame_index, frame in enumerate(frames):
                frame_path = os.path.join(extracted_frames_dir, f"frame_{frame_index}.jpg")
                cv2.imwrite(frame_path, frame)
            
            # Iterate through extracted frames to detect faces and store unique faces
            process_video_frames(extracted_frames_dir, j , metric)
            
            # Clean up the extracted frames directory after processing
            shutil.rmtree(extracted_frames_dir)
            j += 1

    print("Processing completed.")

# List of video URLs
video_urls = [
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-987643572956494",
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-904174908300812",
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-992064161877405",
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-905739711170399",
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-901695498551491",
    "https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-879631723735807"
]

metric = [0.4 , 0.3 , 0.5 , 0.76 , 1,2 , 2.0]

# Run the process
process_videos(video_urls , metric)


Processing video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-987643572956494
found faces in this frame
low face confidence
found faces in this frame
unique face found
unique_path : unique_faces/unique_face_0_1_frame_1.jpg
[2024-12-01 17:01:21,739: INFO: 1708445196: Adding unique image in the dataframe...]
24-12-01 17:01:21 - Found 1 newly added image(s), 0 removed image(s), 0 replaced image(s).


Finding representations: 100%|██████████| 1/1 [00:00<00:00,  5.16it/s]


24-12-01 17:01:21 - 🔴 Exception while extracting faces from unique_faces/unique_face_0_1_frame_1.jpg: Face could not be detected in unique_faces/unique_face_0_1_frame_1.jpg.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
24-12-01 17:01:21 - There are now 1 representations in ds_model_facenet_detector_opencv_aligned_normalization_base_expand_0.pkl
24-12-01 17:01:21 - Searching unique_faces/unique_face_0_1_frame_1.jpg in 1 length datastore
Error processing frame_1.jpg : No face in this image or frame
found faces in this frame
24-12-01 17:01:23 - Searching temp_unique_faces/temp_face.jpg in 1 length datastore
24-12-01 17:01:24 - find function duration 1.5142741203308105 seconds
unique face found
unique_path : unique_faces/unique_face_1_1_frame_10.jpg
[2024-12-01 17:01:24,559: INFO: 1708445196: Adding unique image in the dataframe...]
24-12-01 17:01:24 - Found 1 newly added image(s), 0 removed image(s), 0 replaced image(s).


Finding representations: 100%|██████████| 1/1 [00:00<00:00,  7.30it/s]

24-12-01 17:01:24 - There are now 2 representations in ds_model_facenet_detector_opencv_aligned_normalization_base_expand_0.pkl
24-12-01 17:01:24 - Searching unique_faces/unique_face_1_1_frame_10.jpg in 2 length datastore





24-12-01 17:01:24 - find function duration 0.2735939025878906 seconds
[2024-12-01 17:01:24,833: INFO: 1708445196: New row for unique image added successfully !!! 
 {'image_path': 'unique_faces/unique_face_1_1_frame_10.jpg', 'hash': '44032fbb50de8c1ab7b2aa186cc8ef492ea951e4', 'score': 0.4, 'no_of_occurance': 1, 'recent_occurance': 1} ]
low face confidence
found faces in this frame
24-12-01 17:01:26 - Searching temp_unique_faces/temp_face.jpg in 2 length datastore
24-12-01 17:01:26 - find function duration 0.15949416160583496 seconds
[2024-12-01 17:01:26,880: INFO: 1708445196: face is not unique]
[2024-12-01 17:01:26,881: INFO: 1708445196: result variable is not none and calling the handle_not_unique function]
[2024-12-01 17:01:26,881: INFO: 1708445196: Handling Duplicate image with hash: 44032fbb50de8c1ab7b2aa186cc8ef492ea951e4 and j: 1]
[2024-12-01 17:01:26,883: INFO: 1708445196: This face has already occured in this video]
found faces in this frame
24-12-01 17:01:27 - Searching temp_u

Finding representations: 100%|██████████| 1/1 [00:00<00:00,  8.73it/s]

24-12-01 17:01:42 - There are now 3 representations in ds_model_facenet_detector_opencv_aligned_normalization_base_expand_0.pkl
24-12-01 17:01:42 - Searching unique_faces/unique_face_2_1_frame_26.jpg in 3 length datastore





24-12-01 17:01:42 - find function duration 0.22615981101989746 seconds
[2024-12-01 17:01:42,654: INFO: 1708445196: New row for unique image added successfully !!! 
 {'image_path': 'unique_faces/unique_face_2_1_frame_26.jpg', 'hash': '7d542ed6eba2baf2648ba9cd13328a86b591ed26', 'score': 0.4, 'no_of_occurance': 1, 'recent_occurance': 1} ]
found faces in this frame
24-12-01 17:01:43 - Searching temp_unique_faces/temp_face.jpg in 3 length datastore
24-12-01 17:01:43 - find function duration 0.1009218692779541 seconds
[2024-12-01 17:01:43,736: INFO: 1708445196: face is not unique]
[2024-12-01 17:01:43,736: INFO: 1708445196: result variable is not none and calling the handle_not_unique function]
[2024-12-01 17:01:43,737: INFO: 1708445196: Handling Duplicate image with hash: 44032fbb50de8c1ab7b2aa186cc8ef492ea951e4 and j: 1]
[2024-12-01 17:01:43,737: INFO: 1708445196: This face has already occured in this video]
Error processing frame_28.jpg : No face in this image or frame
Error processing fr

Finding representations: 100%|██████████| 1/1 [00:00<00:00,  9.28it/s]

24-12-01 17:03:20 - There are now 4 representations in ds_model_facenet_detector_opencv_aligned_normalization_base_expand_0.pkl
24-12-01 17:03:20 - Searching unique_faces/unique_face_0_3_frame_7.jpg in 4 length datastore





24-12-01 17:03:20 - find function duration 0.20979809761047363 seconds
[2024-12-01 17:03:20,588: INFO: 1708445196: New row for unique image added successfully !!! 
 {'image_path': 'unique_faces/unique_face_0_3_frame_7.jpg', 'hash': '61a9a4660d826b66df21da160c40ebc1480e50ba', 'score': 0.5, 'no_of_occurance': 1, 'recent_occurance': 3} ]
found faces in this frame
24-12-01 17:03:22 - Searching temp_unique_faces/temp_face.jpg in 4 length datastore
24-12-01 17:03:22 - find function duration 0.14484310150146484 seconds
[2024-12-01 17:03:22,220: INFO: 1708445196: face is not unique]
[2024-12-01 17:03:22,220: INFO: 1708445196: result variable is not none and calling the handle_not_unique function]
[2024-12-01 17:03:22,221: INFO: 1708445196: Handling Duplicate image with hash: 44032fbb50de8c1ab7b2aa186cc8ef492ea951e4 and j: 3]
[2024-12-01 17:03:22,222: INFO: 1708445196: This face has already occured in this video]
24-12-01 17:03:22 - Searching temp_unique_faces/temp_face.jpg in 4 length datastor

Finding representations: 100%|██████████| 1/1 [00:00<00:00,  7.49it/s]

24-12-01 17:03:36 - There are now 5 representations in ds_model_facenet_detector_opencv_aligned_normalization_base_expand_0.pkl
24-12-01 17:03:36 - Searching unique_faces/unique_face_0_4_frame_16.jpg in 5 length datastore





24-12-01 17:03:36 - find function duration 0.26538801193237305 seconds
[2024-12-01 17:03:36,764: INFO: 1708445196: New row for unique image added successfully !!! 
 {'image_path': 'unique_faces/unique_face_0_4_frame_16.jpg', 'hash': '2ccf5189faa9117122684ad5c73baa48e494bc8c', 'score': 0.76, 'no_of_occurance': 1, 'recent_occurance': 4} ]
low face confidence
found faces in this frame
24-12-01 17:03:37 - Searching temp_unique_faces/temp_face.jpg in 5 length datastore
Face comparison error: Face could not be detected in temp_unique_faces/temp_face.jpg.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
[2024-12-01 17:03:37,785: INFO: 1708445196: face is not unique]
low face confidence
found faces in this frame
24-12-01 17:03:38 - Searching temp_unique_faces/temp_face.jpg in 5 length datastore
24-12-01 17:03:38 - find function duration 0.1470811367034912 seconds
[2024-12-01 17:03:38,951: INFO: 1708445196: face is not unique]
[2024-12-01 17:03

In [5]:
df

Unnamed: 0,image_path,hash,score,no_of_occurance,recent_occurance
0,unique_faces/unique_face_1_1_frame_10.jpg,44032fbb50de8c1ab7b2aa186cc8ef492ea951e4,1.2,3,3
1,unique_faces/unique_face_2_1_frame_26.jpg,7d542ed6eba2baf2648ba9cd13328a86b591ed26,2.96,5,5
2,unique_faces/unique_face_0_3_frame_7.jpg,61a9a4660d826b66df21da160c40ebc1480e50ba,1.26,2,4
3,unique_faces/unique_face_0_4_frame_16.jpg,2ccf5189faa9117122684ad5c73baa48e494bc8c,0.76,1,4
