In [1]:
import tensorflow as tf
print("GPU:", tf.config.list_physical_devices('GPU'))


2025-12-19 20:27:24.790923: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766176044.996734      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766176045.050000      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766176045.537789      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766176045.537845      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766176045.537850      55 computation_placer.cc:177] computation placer alr

GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
import kagglehub
import os

path = kagglehub.dataset_download("xdxd003/ff-c23")

print("Dataset downloaded to:")
print(path)

print("\nContents:")
print(os.listdir(path))


Dataset downloaded to:
/kaggle/input/ff-c23

Contents:
['FaceForensics++_C23']


In [4]:
import os

DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"
print(os.listdir(DATASET_PATH))


['Face2Face', 'csv', 'Deepfakes', 'DeepFakeDetection', 'original', 'NeuralTextures', 'FaceShifter', 'FaceSwap']


In [6]:
import os

DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"

REAL_DIR = os.path.join(DATASET_PATH, "original")
FAKE_DIR = os.path.join(DATASET_PATH, "Deepfakes")

print("Real videos:", len(os.listdir(REAL_DIR)))
print("Fake videos:", len(os.listdir(FAKE_DIR)))


Real videos: 1000
Fake videos: 1000


In [7]:
import cv2
import numpy as np
import os

# parameters (small on purpose)
NUM_FRAMES = 20
IMG_SIZE = 224

DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"
REAL_DIR = os.path.join(DATASET_PATH, "original")

# pick ONE real video
video_path = os.path.join(REAL_DIR, os.listdir(REAL_DIR)[0])
print("Using video:", os.path.basename(video_path))

def extract_frames(video_path, num_frames=NUM_FRAMES):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    frame_idxs = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    frames = []

    for idx in frame_idxs:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        frame = frame / 255.0
        frames.append(frame)

    cap.release()

    frames = np.array(frames, dtype=np.float32)
    return frames

frames = extract_frames(video_path)

print("Extracted frames shape:", frames.shape)
print("Min pixel value:", frames.min())
print("Max pixel value:", frames.max())


Using video: 123.mp4
Extracted frames shape: (20, 224, 224, 3)
Min pixel value: 0.0
Max pixel value: 1.0


Video
  ↓
  
On-the-fly frame sampling  ✅
  ↓
  
CNN input-ready tensors


In [8]:
import os
import numpy as np

DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"
REAL_DIR = os.path.join(DATASET_PATH, "original")
FAKE_DIR = os.path.join(DATASET_PATH, "Deepfakes")

# reuse the SAME parameters as before
NUM_FRAMES = 20
IMG_SIZE = 224

# reuse the SAME extract_frames function
def extract_frames(video_path, num_frames=NUM_FRAMES):
    import cv2
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_idxs = np.linspace(0, total_frames - 1, num_frames, dtype=int)

    frames = []
    for idx in frame_idxs:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        frames.append(frame / 255.0)

    cap.release()
    while len(frames) < num_frames:
        frames.append(frames[-1])

    return np.array(frames, dtype=np.float32)

# ---- load ONE real and ONE fake ----
X = []
y = []

real_video = os.path.join(REAL_DIR, os.listdir(REAL_DIR)[0])
fake_video = os.path.join(FAKE_DIR, os.listdir(FAKE_DIR)[0])

X.append(extract_frames(real_video))
y.append(0)   # REAL

X.append(extract_frames(fake_video))
y.append(1)   # FAKE

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)
print("y labels:", y)


X shape: (2, 20, 224, 224, 3)
y labels: [0 1]


In [9]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import (
    Input,
    Dense,
    Dropout,
    TimeDistributed,
    GlobalAveragePooling2D,
    GlobalAveragePooling1D
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# parameters must MATCH previous steps
NUM_FRAMES = 20
IMG_SIZE = 224

# ---- base CNN (frozen) ----
base_cnn = MobileNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
base_cnn.trainable = False  # baseline: do NOT unfreeze

# ---- video input ----
video_input = Input(shape=(NUM_FRAMES, IMG_SIZE, IMG_SIZE, 3))

# ---- spatial feature extraction ----
x = TimeDistributed(base_cnn)(video_input)
x = TimeDistributed(GlobalAveragePooling2D())(x)

# ---- temporal aggregation ----
x = GlobalAveragePooling1D()(x)

# ---- classifier ----
x = Dense(128, activation="relu")(x)
x = Dropout(0.5)(x)
output = Dense(1, activation="sigmoid")(x)

# ---- model ----
model = Model(video_input, output)

# ---- compile (no training yet) ----
model.compile(
    optimizer=Adam(1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()


I0000 00:00:1766176417.386915      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
history = model.fit(
    X,
    y,
    epochs=1,
    batch_size=1,
    verbose=1
)


I0000 00:00:1766176539.202751     146 service.cc:152] XLA service 0x7dfddc0901f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1766176539.202819     146 service.cc:160]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1766176556.415325     146 cuda_dnn.cc:529] Loaded cuDNN version 91002
I0000 00:00:1766176569.864745     146 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 67ms/step - accuracy: 1.0000 - loss: 0.2913


In [11]:
import os
import numpy as np

# use same settings
NUM_FRAMES = 20
IMG_SIZE = 224

DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"
REAL_DIR = os.path.join(DATASET_PATH, "original")
FAKE_DIR = os.path.join(DATASET_PATH, "Deepfakes")

# reuse extract_frames from before
def extract_frames(video_path, num_frames=NUM_FRAMES):
    import cv2
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_idxs = np.linspace(0, total_frames - 1, num_frames, dtype=int)

    frames = []
    for idx in frame_idxs:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        frames.append(frame / 255.0)

    cap.release()
    while len(frames) < num_frames:
        frames.append(frames[-1])

    return np.array(frames, dtype=np.float32)

# ---- load 4 real + 4 fake ----
X, y = [], []

for v in os.listdir(REAL_DIR)[:4]:
    X.append(extract_frames(os.path.join(REAL_DIR, v)))
    y.append(0)

for v in os.listdir(FAKE_DIR)[:4]:
    X.append(extract_frames(os.path.join(FAKE_DIR, v)))
    y.append(1)

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)
print("y:", y)


X shape: (8, 20, 224, 224, 3)
y: [0 0 0 0 1 1 1 1]


In [12]:
# recompile just to be safe (clean state)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    X,
    y,
    epochs=5,
    batch_size=2,
    verbose=1
)


Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 56ms/step - accuracy: 0.2667 - loss: 1.2304  
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.7333 - loss: 0.6709
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.7667 - loss: 0.3917
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.2167 - loss: 0.8500   
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.8167 - loss: 0.5201


In [None]:
from sklearn.model_selection import train_test_split

# X shape: (N, 20, 224, 224, 3)
# y shape: (N,)

X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

print("Train shape:", X_train.shape, y_train.shape)
print("Val shape:", X_val.shape, y_val.shape)


In [None]:
from sklearn.model_selection import train_test_split

# X shape: (N, 20, 224, 224, 3)
# y shape: (N,)

X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

print("Train shape:", X_train.shape, y_train.shape)
print("Val shape:", X_val.shape, y_val.shape)


In [13]:
import os
import subprocess
from pathlib import Path

# ---------------- CONFIG ----------------
DATASET_PATH = "/kaggle/input/ff-c23/FaceForensics++_C23"
OUTPUT_PATH = "/kaggle/working/processed_frames"

FRAMES_PER_VIDEO = 50
VIDEO_EXTENSIONS = (".mp4", ".avi", ".mov")

REAL_FOLDER = "original"
FAKE_FOLDERS = [
    "Deepfakes",
    "Face2Face",
    "FaceSwap",
    "FaceShifter",
    "NeuralTextures"
]

os.makedirs(OUTPUT_PATH, exist_ok=True)

# ------------- FFmpeg FUNCTION -------------
def extract_uniform_frames(video_path, out_dir, frames=FRAMES_PER_VIDEO):
    os.makedirs(out_dir, exist_ok=True)

    cmd = [
        "ffmpeg",
        "-i", video_path,
        "-vf", f"select='eq(pict_type\\,I)+not(eq(pict_type\\,I))',"
               f"scale=224:224",
        "-vsync", "vfr",
        "-frames:v", str(frames),
        os.path.join(out_dir, "frame_%04d.jpg"),
        "-loglevel", "error"
    ]

    subprocess.run(cmd, check=False)

# ------------- REAL VIDEOS -------------
real_src = os.path.join(DATASET_PATH, REAL_FOLDER)
real_dst = os.path.join(OUTPUT_PATH, "real")

for video in os.listdir(real_src):
    if video.lower().endswith(VIDEO_EXTENSIONS):
        extract_uniform_frames(
            os.path.join(real_src, video),
            os.path.join(real_dst, Path(video).stem)
        )

# ------------- FAKE VIDEOS -------------
for folder in FAKE_FOLDERS:
    fake_src = os.path.join(DATASET_PATH, folder)
    fake_dst = os.path.join(OUTPUT_PATH, "fake")

    for video in os.listdir(fake_src):
        if video.lower().endswith(VIDEO_EXTENSIONS):
            extract_uniform_frames(
                os.path.join(fake_src, video),
                os.path.join(fake_dst, Path(video).stem)
            )

print("✅ 50 uniformly sampled frames per video extracted & labeled")


✅ 50 uniformly sampled frames per video extracted & labeled


In [14]:
import shutil
import os

SOURCE_DIR = "/kaggle/working/processed_frames"
ZIP_PATH = "/kaggle/working/processed_frames_50frames.zip"

# Create zip
shutil.make_archive(
    ZIP_PATH.replace(".zip", ""),
    'zip',
    SOURCE_DIR
)

print("✅ ZIP created at:", ZIP_PATH)


✅ ZIP created at: /kaggle/working/processed_frames_50frames.zip


In [17]:
!zip -r processed_frames.zip /kaggle/working/processed_frames_50frames.zip


  adding: kaggle/working/processed_frames_50frames.zip (stored 0%)
