## Imports

In [10]:
import os
import cv2
import json
import pandas as pd

## Step 1: Load the video

In [8]:
def load_video(video_name, folder_path="../../data/train_sample_videos"):
    video_path = os.path.join(folder_path, video_name)
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video {video_name} not found in {folder_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open the video: {video_name}")

    print(f"Successfully loaded video: {video_name}")
    return cap

In [9]:
# Testing the function
video_name = "aagfhgtpmv.mp4"  # Replace with an actual video filename in your folder
cap = load_video(video_name)

Successfully loaded video: aagfhgtpmv.mp4


## Step 2: Convert video into pictures

In [13]:
def extract_frames(cap, frame_interval=1):
    frames = []
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break  # Exit if no more frames are available

        if frame_count % frame_interval == 0:  # Check if the current frame should be extracted
            # Resize frame to (224, 224) for CNN input
            resized_frame = cv2.resize(frame, (224, 224)) / 255.0  # Normalize to [0, 1]
            frames.append(resized_frame)

        frame_count += 1

    return frames

In [14]:
# Test usage
video_name = "aagfhgtpmv.mp4"  # Replace with an actual video filename in your folder
cap = load_video(video_name)
frames = extract_frames(cap, frame_interval=5)  # Extract every 5th frame
print(f"Extracted {len(frames)} frames.")

Successfully loaded video: aagfhgtpmv.mp4
Extracted 60 frames.


In [15]:
def save_frames(frames, output_folder="./extracted_frames"):
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    for frame_count, frame in enumerate(frames):
        # Save the frame as an image file
        output_path = os.path.join(output_folder, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(output_path, (frame * 255).astype('uint8'))  # Convert back to [0, 255] for saving

    print(f"Saved {len(frames)} frames to {output_folder}")

In [16]:
save_frames(frames)

Saved 60 frames to ./extracted_frames


## Step 3: Create dataset (train, test, val)

In [18]:
def create_datasets(base_dir="../../data"):
    train_data = []
    val_data = []
    test_data = []

    # Define the folders and subfolders to iterate through
    for folder in ['train', 'validation', 'test']:
        for subfolder in ['real', 'deepfake']:
            subfolder_path = os.path.join(base_dir, folder, subfolder)

            # Check if the subfolder exists
            if not os.path.exists(subfolder_path):
                print(f"Subfolder {subfolder_path} does not exist.")
                continue

            # Loop through each file in the subfolder
            for filename in os.listdir(subfolder_path):
                if filename.endswith(('.mp4', '.avi', '.mov')):  # Add any other video formats if needed
                    # Create a label based on the subfolder
                    label = 1 if subfolder == 'deepfake' else 0
                    # Append the file path and label to the appropriate dataset list
                    if folder == 'train':
                        train_data.append({
                            'file_path': os.path.join(subfolder_path, filename),
                            'label': label
                        })
                    elif folder == 'validation':
                        val_data.append({
                            'file_path': os.path.join(subfolder_path, filename),
                            'label': label
                        })
                    elif folder == 'test':
                        test_data.append({
                            'file_path': os.path.join(subfolder_path, filename),
                            'label': label
                        })

    # Create DataFrames from the collected data
    train_df = pd.DataFrame(train_data)
    val_df = pd.DataFrame(val_data)
    test_df = pd.DataFrame(test_data)

    return train_df, val_df, test_df

In [20]:
train_df, val_df, test_df = create_datasets()

In [21]:
train_df

Unnamed: 0,file_path,label
0,../../data\train\real\abarnvbtwb.mp4,0
1,../../data\train\real\aelfnikyqj.mp4,0
2,../../data\train\real\afoovlsmtx.mp4,0
3,../../data\train\real\asaxgevnnp.mp4,0
4,../../data\train\real\atvmxvwyns.mp4,0
...,...,...
257,../../data\train\deepfake\esckbnkkvb.mp4,1
258,../../data\train\deepfake\esgftaficx.mp4,1
259,../../data\train\deepfake\esxrvsgpvb.mp4,1
260,../../data\train\deepfake\esyrimvzsa.mp4,1
