In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
def extract_features(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None

    # Initialize lists to hold frame-by-frame features
    mean_intensities = []
    std_intensities = []
    max_intensities = []
    min_intensities = []
    frame_diffs = []
    motion_intensities = []

    ret, prev_frame = cap.read()
    if prev_frame is not None:
        prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert current frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Pixel Intensity-based Features
        mean_intensities.append(np.mean(gray_frame))
        std_intensities.append(np.std(gray_frame))
        max_intensities.append(np.max(gray_frame))
        min_intensities.append(np.min(gray_frame))

        # Motion-based Features
        if prev_frame is not None:
            # Compute difference between current and previous frame
            frame_diff = cv2.absdiff(prev_frame_gray, gray_frame)
            frame_diffs.append(np.mean(frame_diff))  # Mean of absolute differences

            # Compute motion intensity using Optical Flow
            flow = cv2.calcOpticalFlowFarneback(
                prev_frame_gray, gray_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0
            )
            motion_magnitude = np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2)
            motion_intensities.append(
                np.mean(motion_magnitude)
            )  # Average motion magnitude

        # Update previous frame
        prev_frame_gray = gray_frame

    cap.release()

    # Compute statistics over the entire video
    features = {
        "mean_intensity": np.mean(mean_intensities),
        "std_intensity": np.mean(std_intensities),
        "max_intensity": np.mean(max_intensities),
        "min_intensity": np.mean(min_intensities),
        "mean_frame_diff": np.mean(frame_diffs) if frame_diffs else 0,
        "motion_intensity": np.mean(motion_intensities) if motion_intensities else 0,
    }

    return features

In [3]:
def process_videos(directory, label):
    video_features = []
    video_files = [
        f for f in os.listdir(directory) if f.endswith((".mp4", ".avi", ".mov"))
    ]

    for video_file in tqdm(video_files, desc=f"Processing {label} videos"):
        video_path = os.path.join(directory, video_file)
        features = extract_features(video_path)
        if features:
            features["label"] = label
            features["video_name"] = video_file
            video_features.append(features)

    return video_features

In [4]:
REAL_DIR = "REAL"
FAKE_DIR = "FAKE"

In [6]:
real_features = process_videos(REAL_DIR, "REAL")
fake_features = process_videos(FAKE_DIR, "FAKE")

Processing REAL videos:   3%|▎         | 2/74 [06:10<3:42:19, 185.27s/it]


KeyboardInterrupt: 

In [None]:
all_features = real_features + fake_features

In [None]:
df = pd.DataFrame(all_features)

In [None]:
df.to_csv("video_features.csv", index=False)
print("Features saved to video_features.csv")