In [None]:
!pip3 install face_recognition

## Data Loading

In [None]:
#Mount our google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls

drive  sample_data


In [None]:
import os

def count_mp4_files(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".mp4"):
                count += 1
    return count

directories = [
    '/content/drive/MyDrive/Final Year Project/DFDC_Dataset',
    '/content/drive/MyDrive/Final Year Project/DFDC_FAKE_Face_only_data',
    '/content/drive/MyDrive/Final Year Project/DFDC_REAL_Face_only_data',
    '/content/drive/MyDrive/Final Year Project/DFDC_sample_face_only_data',
    '/content/drive/MyDrive/Final Year Project/test_videos',
    '/content/drive/MyDrive/Final Year Project/validation_set'
]

for directory in directories:
    count = count_mp4_files(directory)
    print(f'Number of .mp4 files in {directory}: {count}')

Number of .mp4 files in /content/drive/MyDrive/Final Year Project/DFDC_Dataset: 400
Number of .mp4 files in /content/drive/MyDrive/Final Year Project/DFDC_FAKE_Face_only_data: 1566
Number of .mp4 files in /content/drive/MyDrive/Final Year Project/DFDC_REAL_Face_only_data: 1727
Number of .mp4 files in /content/drive/MyDrive/Final Year Project/DFDC_sample_face_only_data: 0
Number of .mp4 files in /content/drive/MyDrive/Final Year Project/test_videos: 400
Number of .mp4 files in /content/drive/MyDrive/Final Year Project/validation_set: 400


## Import Libraries

In [None]:
import glob
import os
import cv2
import numpy as np
import face_recognition
from tqdm import tqdm

## Sample Videos Dataset

In [None]:
# Define the path to the video files
video_files_path = '/content/drive/MyDrive/Final Year Project/DFDC_Dataset/train_sample_videos/*.mp4'
video_files = glob.glob(video_files_path)

## Average Frame Count

In [None]:
# Initialize a list to store the frame counts
frame_count = []

# Iterate over each video file
for video_file in video_files:
    cap = cv2.VideoCapture(video_file)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Check if the video has less than 150 frames, skip if so
    if num_frames < 150:
        continue

    frame_count.append(num_frames)
    cap.release()  # Release the video capture object

# Calculate and print the total number of videos and the average frame count
print("frames", frame_count)
print("Total number of videos:", len(frame_count))
if frame_count:
    average_frames = np.mean(frame_count)
    print('Average frame per video:', average_frames)
else:
    print('No videos with more than 150 frames found.')


frames [300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 298, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 298, 300, 300, 300, 298, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 298, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 298, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 298, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 30

## Frame Extraction

In [None]:
# Function to extract frames from a video file
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)  # Open the video file
    success = 1
    while success:
        success, image = vidObj.read()  # Read the next frame
        if success:
            yield image  # Yield the frame if read successfully

## Frames Processing

In [None]:
# Function to process video files, detect faces, and save the face images as separate video files
def create_face_videos(path_list, out_dir):
    # Ensure the output directory exists, create if it does not
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Check how many videos are already processed and present in the output directory
    already_present_count = glob.glob(os.path.join(out_dir, '*.mp4'))
    print("No of videos already present:", len(already_present_count))

    # Process each video file in the provided list
    for path in tqdm(path_list):
        out_path = os.path.join(out_dir, os.path.basename(path))  # Define the output video file path

        # Skip processing if the output file already exists
        if os.path.exists(out_path):
            print("File Already exists:", out_path)
            continue

        # Prepare to write the output video with the detected faces
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (112,112))

        frames = []  # List to hold a batch of frames for batch face detection
        for idx, frame in enumerate(frame_extract(path)):
            if idx <= 150:  # Limit the processing to the first 150 frames
                frames.append(frame)  # Add the frame to the batch
                # Perform batch face detection for efficiency
                if len(frames) == 4:  # Check if we have enough frames for a batch
                    faces = face_recognition.batch_face_locations(frames)  # Detect faces in the batch
                    for i, face_locations in enumerate(faces):
                        for face in face_locations:  # Iterate through each detected face
                            top, right, bottom, left = face
                            try:
                                # Extract the face region, resize it, and write to the output video
                                out.write(cv2.resize(frames[i][top:bottom, left:right, :], (112,112)))
                            except Exception as e:
                                print("Error processing frame:", e)
                    frames = []  # Clear the frame list after processing a batch

        out.release()  # Release the video writer

In [None]:
# Define the list of video paths
path_list = video_files
# Define the output directory
out_dir = '/content/drive/MyDrive/Final Year Project/DFDC_sample_face_only_data'

# Process the videos to extract faces and save them
create_face_videos(path_list, out_dir)

No of videos already present: 1


 34%|███▍      | 138/400 [1:57:15<3:43:04, 51.08s/it]