In [4]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import numpy as np
import json
import os
import cv2
from dlib import get_frontal_face_detector 


In [13]:
import json
import os

# Dataset Paths (Modify with the correct paths for your system)
train_frame_folder_old = r'C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos' 
train_frame_folder_new = r'C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\dfdc_train_part_0'  

# Metadata Paths
metadata_path_old = os.path.join(train_frame_folder_old, 'metadata.json')
metadata_path_new = os.path.join(train_frame_folder_new, 'metadata.json') 

# Load Metadata 
def load_and_combine_metadata(path1, path2):
    datasets = ['old', 'new']  # For more informative messages

    combined_data = {}
    for i, path in enumerate([path1, path2]):
        with open(path, 'r') as f:
            data = json.load(f)

        print(f"Loaded {len(data)} entries from {datasets[i]} dataset metadata.")

        for filename, info in data.items():
            if filename in combined_data:
                handle_duplicate(filename, combined_data[filename], info) 
            else:
                combined_data[filename] = info

    return combined_data

# Duplicate Handling (Keeps the existing information)
def handle_duplicate(filename, existing_info, new_info):
    print(f"Duplicate metadata entry for {filename}. Keeping existing information.")


metadata = load_and_combine_metadata(metadata_path_old, metadata_path_new) 




Loaded 400 entries from old dataset metadata.
Loaded 1334 entries from new dataset metadata.


In [18]:
import cv2
import os
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load face detector
detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def load_and_process_image(vid_path, input_shape=(128, 128, 3)):
    try:
        cap = cv2.VideoCapture(vid_path)
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print(f"Error reading video: {vid_path}")
                return None

            # Frame Sampling (Example: Process every 10th frame)
            if frame_count % 10 == 0:
                print(f"Frame shape for {vid_path}: {frame.shape}")

                # Convert frame to grayscale
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # Detect faces in the frame
                faces = detector.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

                for (x, y, w, h) in faces:
                    # Resize and normalize face region
                    crop_img = cv2.resize(frame[y:y+h, x:x+w], (input_shape[0], input_shape[1]))
                    crop_img = crop_img.astype(np.float32) / 255.0

                    return crop_img

            frame_count += 1

        cap.release()

    except cv2.error as e:
        print(f"OpenCV Error ({e}): Using moviepy for {vid_path}")
        return None

def create_data_arrays(data_folder, input_shape=(128, 128, 3)):
    X = []
    Y = []

    for vid in os.listdir(data_folder):
        vid_path = os.path.join(data_folder, vid)

        processed_image = load_and_process_image(vid_path, input_shape)
        if processed_image is not None:
            X.append(processed_image)
            Y.append(1 if 'FAKE' in vid else 0)  # Assuming file names contain 'FAKE' for fake videos

    return np.array(X), to_categorical(np.array(Y), num_classes=2)

# Dataset Paths (Modify with the correct paths for your system)
train_frame_folder_old = r'C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos' 
train_frame_folder_new = r'C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\dfdc_train_part_0'  


# Load data from both datasets
X_old, Y_old = create_data_arrays(train_frame_folder_old)
X_new, Y_new = create_data_arrays(train_frame_folder_new)

# Concatenate data from both datasets
X = np.concatenate((X_old, X_new))
Y = np.concatenate((Y_old, Y_new))

# Train-Test split
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=5)

# Print shapes for verification
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_val shape:", X_val.shape)
print("Y_val shape:", Y_val.shape)


Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\aagfhgtpmv.mp4: (1080, 1920, 3)
Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\aapnvogymq.mp4: (1080, 1920, 3)
Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\aapnvogymq.mp4: (1080, 1920, 3)
Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\abarnvbtwb.mp4: (1080, 1920, 3)
Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\abarnvbtwb.mp4: (1080, 1920, 3)
Frame shape for C:\Users\Vaibhav\OneDrive\Desktop\DeepFake-20240214T144104Z-001\DeepFake\deepfake-detection-challenge\train_sample_videos\abarnvbtwb.m

In [None]:
import os

# Define the directory to search
search_dir = '.'

# Define the file names to search for
file_names = ['preprocessed_data_X_old.npy', 'preprocessed_data_Y_old.npy', 'preprocessed_data_X_new.npy', 'preprocessed_data_Y_new.npy']

# Search for the files
found_files = []
for root, dirs, files in os.walk(search_dir):
    for file_name in file_names:
        if file_name in files:
            found_files.append(os.path.join(root, file_name))

# Print the found file paths
for file_path in found_files:
    print("Found:", file_path)
