In [2]:
# Loading the dataset from the specified paths.
# Preprocessing frames to extract faces in batch.

# Updated paths to REAL and FAKE videos
REAL_VIDEOS_PATH = "/Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/original_sequences/actors/c40/videos"
FAKE_VIDEOS_PATHS = ["/Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDetection/c40/videos"]

LABELS = {"REAL": 0, "FAKE": 1}

import os
import cv2
import numpy as np

In [4]:

def extract_faces_from_video(video_path, label, cascade_classifier, frame_skip=20):
    """
    Estrae i volti da un video e li salva in una cartella.

    Args:
        video_path: Percorso del video.
        label: Etichetta del video (REAL o FAKE).
        cascade_classifier: Classificatore di volti.
        frame_skip: Numero di frame da saltare tra un'elaborazione e l'altra.
    """
    faces = []
    labels = []

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Errore nell'apertura del video: {video_path}")
        return np.array([]), np.array([])

    video_dir = os.path.dirname(video_path)
    faces_output_dir = os.path.join(os.path.dirname(video_dir), "faces")
    if not os.path.exists(faces_output_dir):
        print(f"Creazione della cartella faces: {faces_output_dir}")
        os.makedirs(faces_output_dir, exist_ok=True)
    print(f"Cartella faces confermata: {faces_output_dir}")
    frames_output_dir = os.path.join(os.path.dirname(video_dir), "frames")
    if not os.path.exists(frames_output_dir):
        print(f"Creazione della cartella frames: {frames_output_dir}")
        os.makedirs(frames_output_dir, exist_ok=True)
    print(f"Cartella frames confermata: {frames_output_dir}")

    video_file_name = os.path.splitext(os.path.basename(video_path))[0]
    frame_count = 0
    saved_frames = 0
    saved_faces = 0
    print(f"Inizio elaborazione video: {video_file_name}")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        #frame_output_path = os.path.join(frames_output_dir, f"{video_file_name}_{frame_count:03d}.jpeg")
        #if not cv2.imwrite(frame_output_path, frame):
        #    print(f"Errore nel salvataggio del frame: {frame_output_path}")
        saved_frames += 1

        if frame_count % frame_skip == 0:
            frame = cv2.resize(frame, (640, 480))
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces_detected = cascade_classifier.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)
            for i, (x, y, w, h) in enumerate(faces_detected):
                face = frame[y:y + h, x:x + w]
                face_resized = cv2.resize(face, (224, 224))
                face_output_path = os.path.join(faces_output_dir, f"{video_file_name}_{frame_count:03d}_face{i}.jpeg")
                if cv2.imwrite(face_output_path, face_resized):
                    faces.append(face_resized)
                    labels.append(label)
                    saved_faces += 1
                else:
                    print(f"Errore nel salvataggio della faccia: {face_output_path}")

        frame_count += 1
    cap.release()
    print(f"Video {video_file_name} elaborato: {saved_frames} frames salvati, {saved_faces} facce salvate.")
    return np.array(faces), np.array(labels)

def process_videos_in_batch(video_path, cascade_classifier, frame_skip=20):
    """
    Processa tutti i video in batch e salva frame e volti.

    Args:
        video_path: Cartella contenente i video.
        cascade_classifier: Classificatore di volti.
        frame_skip: Numero di frame da saltare tra un'elaborazione e l'altra.
    """
    faces = []
    labels = []
    video_files = sorted([f for f in os.listdir(video_path) if f.endswith('.mp4')])

    for video_file in video_files:
        video_full_path = os.path.join(video_path, video_file)
        if os.path.isfile(video_full_path):
            face_frames, face_labels = extract_faces_from_video(video_full_path, LABELS["REAL"], cascade_classifier, frame_skip)
            faces.extend(face_frames)
            labels.extend(face_labels)
    return np.array(faces), np.array(labels)


In [None]:

# Esegui l'elaborazione batch sui video REALI
cascade_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
faces, labels = process_videos_in_batch(REAL_VIDEOS_PATH, cascade_classifier)
print(f"Elaborazione completata: {len(faces)} volti estratti da {len(os.listdir(REAL_VIDEOS_PATH))} video.")


In [None]:
# Esegui l'elaborazione batch sui video FALSI
cascade_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
for fake_path in FAKE_VIDEOS_PATHS:
    fake_faces, fake_labels = process_videos_in_batch(fake_path, cascade_classifier)
    print(f"Elaborazione completata: {len(fake_faces)} volti estratti da {len(os.listdir(fake_path))} video FAKE.")
print(f"Elaborazione completata: {len(faces)} volti estratti da {len(os.listdir(REAL_VIDEOS_PATH))} video.")

Cartella faces confermata: /Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDetection/c40/faces
Cartella frames confermata: /Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDetection/c40/frames
Inizio elaborazione video: 01_02__exit_phone_room__YVGY8LOK
Video 01_02__exit_phone_room__YVGY8LOK elaborato: 210 frames salvati, 10 facce salvate.
Cartella faces confermata: /Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDetection/c40/faces
Cartella frames confermata: /Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDetection/c40/frames
Inizio elaborazione video: 01_02__hugging_happy__YVGY8LOK
Video 01_02__hugging_happy__YVGY8LOK elaborato: 578 frames salvati, 18 facce salvate.
Cartella faces confermata: /Volumes/HDD2 - 12Tb/Developer/GitHub/PhD-Projects/dataset/dataset/manipulated_sequences/DeepFakeDete