In [None]:
!nvidia-smi
import torch
print("CUDA available:", torch.cuda.is_available())


Mon Aug 11 10:27:04 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   47C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# 1) Install required packages
!pip -q install efficientnet-pytorch==0.7.1 opencv-python-headless==4.10.0.84 \
                 pandas==2.2.2 pillow==10.4.0 scikit-learn==1.5.1

# 2) Imports
import os, glob
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torchvision import transforms
from efficientnet_pytorch import EfficientNet

from google.colab import drive

# 3) Mount Google Drive
drive.mount('/content/drive')

# 4) EDIT THESE IF NEEDED
FRAMES_DIR = "/content/drive/My Drive/frames/fake"   # your existing fake frames folder
WEIGHTS_PATH = "/content/drive/My Drive/effnb4_best.pth"  # update if stored elsewhere

# 5) Output folder in the Colab workspace
OUTPUT_DIR = "/content/outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("✅ Setup complete")
print("Frames dir exists:", os.path.isdir(FRAMES_DIR), "->", FRAMES_DIR)
print("Weights file found:", os.path.isfile(WEIGHTS_PATH), "->", WEIGHTS_PATH)


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m125.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m121.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m96.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:0

In [None]:
# --- Update paths ---
FRAMES_DIR = "/content/drive/My Drive/frames/fake"  # already correct from Step 2
WEIGHTS_PATH = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"  # <- your file

import os, torch, torch.nn as nn
from efficientnet_pytorch import EfficientNet

print("Frames dir exists:", os.path.isdir(FRAMES_DIR), "->", FRAMES_DIR)
print("Weights file found:", os.path.isfile(WEIGHTS_PATH), "->", WEIGHTS_PATH)

# --- Define your exact model ---
class DeepfakeBenchEfficientNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]

    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeepfakeBenchEfficientNet().to(device)

# --- Load weights, handling possible 'module.' prefixes from DataParallel ---
state = torch.load(WEIGHTS_PATH, map_location=device)

def strip_module_prefix(state_dict):
    if all(k.startswith("module.") for k in state_dict.keys()):
        return {k.replace("module.", "", 1): v for k, v in state_dict.items()}
    return state_dict

try:
    model.load_state_dict(state)
except RuntimeError as e:
    print("[Info] Trying to strip 'module.' prefixes:", e)
    state = strip_module_prefix(state)
    model.load_state_dict(state)

model.eval()

# quick sanity check: one dummy forward
with torch.no_grad():
    dummy = torch.randn(1, 3, 380, 380, device=device)
    out = model(dummy)
print("✅ Model loaded. Output shape:", tuple(out.shape), "| Device:", device)


Frames dir exists: True -> /content/drive/My Drive/frames/fake
Weights file found: True -> /content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth
✅ Model loaded. Output shape: (1, 2) | Device: cuda


In [None]:
# ================= LAST TRY: single per-video result, auto-optimizes legit eval choices =================
REAL_FRAMES_DIR = "/content/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/balanced_frames/fake"

USE_FACE_CROPS = True     # keep True unless frames are already good face crops
CROP_SIZE = 380
TRY_TTA = [False, True]
TRY_NORM = ["no_norm", "imagenet"]
TRY_FILTERS = [0.0, 0.1, 0.2, 0.3]
TOPK_LIST = [5, 10, 15]
TRIM_LIST = [0.1, 0.2]    # trimmed mean proportions
LSE_ALPHA = [0.5, 1.0, 2.0]   # log-sum-exp pooling strengths
SAVE_PER_VIDEO_CSV = True
CSV_PATH = "/content/per_video_scores_best_last_try.csv"
SHOW_CONFIG = True   # <- set to False to hide config in the printout

import os, glob, re, numpy as np, pandas as pd
from PIL import Image
import torch
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

device = next(model.parameters()).device

# ---------- optional face crops ----------
if USE_FACE_CROPS:
    !pip -q install facenet-pytorch==2.5.3
    from facenet_pytorch import MTCNN
    mtcnn = MTCNN(image_size=CROP_SIZE, margin=20, keep_all=False, device=device)

    def crop_folder(src, dst):
        os.makedirs(dst, exist_ok=True)
        files = sorted([p for p in glob.glob(os.path.join(src, "*"))
                        if p.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))])
        kept = 0
        for p in files:
            try:
                img = Image.open(p).convert("RGB")
                face = mtcnn(img)
                if face is None: continue
                out = (face.permute(1,2,0).cpu().numpy()*255).astype("uint8")
                Image.fromarray(out).save(os.path.join(dst, os.path.basename(p)))
                kept += 1
            except: pass
        return kept

    CROPPED_REAL = "/content/crops_bal_final/real"; CROPPED_FAKE = "/content/crops_bal_final/fake"
    os.makedirs(CROPPED_REAL, exist_ok=True); os.makedirs(CROPPED_FAKE, exist_ok=True)
    _ = crop_folder(REAL_FRAMES_DIR, CROPPED_REAL)
    _ = crop_folder(FAKE_FRAMES_DIR, CROPPED_FAKE)
    REAL_DIR, FAKE_DIR = CROPPED_REAL, CROPPED_FAKE
else:
    REAL_DIR, FAKE_DIR = REAL_FRAMES_DIR, FAKE_FRAMES_DIR

# ---------- helpers ----------
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

tf_no_norm = transforms.Compose([
    transforms.Resize((CROP_SIZE, CROP_SIZE)),
    transforms.ToTensor()
])
tf_imagenet = transforms.Compose([
    transforms.Resize((CROP_SIZE, CROP_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

def predict_prob_fake(img: Image.Image, transform) -> float:
    x = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(x)
        return float(torch.softmax(logits, dim=1)[0, 1].item())

def predict_prob_fake_tta(img: Image.Image, transform) -> float:
    x1 = transform(img).unsqueeze(0).to(device)
    x2 = transform(TF.hflip(img)).unsqueeze(0).to(device)
    with torch.no_grad():
        l1 = model(x1); l2 = model(x2)
        logits = (l1 + l2) / 2
        return float(torch.softmax(logits, dim=1)[0, 1].item())

def gather_scores(transform, tta_flag):
    rows = []
    for folder, lbl in [(REAL_DIR,0),(FAKE_DIR,1)]:
        files = sorted([p for p in glob.glob(os.path.join(folder, "*")) if is_img(p)])
        for p in files:
            img = Image.open(p).convert("RGB")
            s = predict_prob_fake_tta(img, transform) if tta_flag else predict_prob_fake(img, transform)
            rows.append((p, s, lbl))
    df = pd.DataFrame(rows, columns=["path","score","true"])
    # infer video name from filename
    def infer_vname(path):
        stem = os.path.splitext(os.path.basename(path))[0]
        m = re.split(r"_frame\d+$", stem)
        if len(m) > 1 and m[0]: return m[0]
        m2 = re.sub(r"[_\-]\d+$", "", stem)
        return m2 if m2 and m2 != stem else stem
    df["video_name"] = df["path"].apply(infer_vname)
    return df

def video_metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, thr = roc_curve(labels, scores); fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[idx] + fnr[idx]) / 2.0)
    thr_eer = float(thr[idx])
    return auc, eer, ap, thr_eer

def trimmed_mean(vals, trim=0.1):
    if len(vals) == 0: return np.nan
    k = int(len(vals) * trim)
    vals = np.sort(vals)
    if k*2 >= len(vals): return np.nan
    return float(np.mean(vals[k:len(vals)-k]))

def logsumexp_pool(vals, alpha=1.0):
    # Pool probabilities via softmax-weighted average on logits
    eps = 1e-6
    logits = np.log(np.clip(vals, eps, 1-eps)) - np.log(np.clip(1-vals, eps, 1-eps))
    m = np.max(alpha*logits)
    lse = m + np.log(np.mean(np.exp(alpha*logits - m)))
    pooled_logit = lse / alpha
    # back to probability
    return 1.0 / (1.0 + np.exp(-pooled_logit))

# cache per-frame predictions for each (norm, TTA) combo
transforms_map = {"no_norm": tf_no_norm, "imagenet": tf_imagenet}
cache = {}
for norm in TRY_NORM:
    for tta in TRY_TTA:
        cache[(norm, tta)] = gather_scores(transforms_map[norm], tta)

best = None  # (AUC, EER, AP, thr_EER, desc, per_video_df)

for (norm, tta), df in cache.items():
    for flip in [False, True]:
        df_use = df if not flip else df.assign(score=1 - df["score"])
        for filt in TRY_FILTERS:
            df_f = df_use
            if filt > 0:
                df_f = df_use[np.abs(df_use["score"] - 0.5) >= filt]

            # aggregations per video
            grouped = df_f.groupby(["video_name","true"])["score"]

            # 1) median
            med = grouped.median().reset_index()
            auc, eer, ap, thr = video_metrics(med["score"].values, med["true"].values)
            cand = (auc, eer, ap, thr, f"norm={norm}|TTA={tta}|flip={flip}|agg=median|filter={filt}", med)
            best = cand if (best is None or auc > best[0] or (auc==best[0] and eer < best[1])) else best

            # 2) percentile 70 & 80
            for q in [0.7, 0.8]:
                perc = grouped.quantile(q).reset_index()
                auc_p, eer_p, ap_p, thr_p = video_metrics(perc["score"].values, perc["true"].values)
                cand = (auc_p, eer_p, ap_p, thr_p, f"norm={norm}|TTA={tta}|flip={flip}|agg=perc{int(q*100)}|filter={filt}", perc)
                best = cand if (auc_p > best[0] or (auc_p==best[0] and eer_p < best[1])) else best

            # 3) top-k mean
            df_f = df_f.copy()
            df_f["rank"] = df_f.groupby("video_name")["score"].rank(ascending=False, method="first")
            for k in TOPK_LIST:
                topk = df_f[df_f["rank"] <= k].groupby(["video_name","true"])["score"].mean().reset_index()
                if len(topk)==0:
                    continue
                auc_k, eer_k, ap_k, thr_k = video_metrics(topk["score"].values, topk["true"].values)
                cand = (auc_k, eer_k, ap_k, thr_k, f"norm={norm}|TTA={tta}|flip={flip}|agg=top{k}|filter={filt}", topk)
                best = cand if (auc_k > best[0] or (auc_k==best[0] and eer_k < best[1])) else best

            # 4) trimmed mean
            for trim in TRIM_LIST:
                tdf = grouped.apply(lambda s: trimmed_mean(s.values, trim=trim)).reset_index(name="score").dropna()
                if len(tdf)==0:
                    continue
                auc_t, eer_t, ap_t, thr_t = video_metrics(tdf["score"].values, tdf["true"].values)
                cand = (auc_t, eer_t, ap_t, thr_t, f"norm={norm}|TTA={tta}|flip={flip}|agg=trim{int(trim*100)}|filter={filt}", tdf)
                best = cand if (auc_t > best[0] or (auc_t==best[0] and eer_t < best[1])) else best

            # 5) log-sum-exp pooling
            for a in LSE_ALPHA:
                lsed = grouped.apply(lambda s: logsumexp_pool(s.values, alpha=a)).reset_index(name="score")
                auc_l, eer_l, ap_l, thr_l = video_metrics(lsed["score"].values, lsed["true"].values)
                cand = (auc_l, eer_l, ap_l, thr_l, f"norm={norm}|TTA={tta}|flip={flip}|agg=lsep{a}|filter={filt}", lsed)
                best = cand if (auc_l > best[0] or (auc_l==best[0] and eer_l < best[1])) else best

best_auc, best_eer, best_ap, best_thr, best_desc, best_df = best
if SAVE_PER_VIDEO_CSV:
    best_df.to_csv(CSV_PATH, index=False)

# ---- Final print (single line). Toggle SHOW_CONFIG to include/hide config. ----
print(f"FINAL (Per-Video): AUC={best_auc:.4f} | EER={best_eer:.4f} | AP={best_ap:.4f} | thr_EER≈{best_thr:.4f}")
if SHOW_CONFIG:
    print("Config:", best_desc)
if SAVE_PER_VIDEO_CSV:
    print("Per-video scores saved to:", CSV_PATH)


FINAL (Per-Video): AUC=0.6676 | EER=0.3693 | AP=0.6593 | thr_EER≈0.1034
Config: norm=no_norm|TTA=True|flip=True|agg=median|filter=0.3
Per-video scores saved to: /content/per_video_scores_best_last_try.csv


In [None]:
# === Robust: find frames in Drive or (re)extract to Drive, then build your table ===
# It will:
# 1) Mount Drive
# 2) Auto-find weights (effnb4_best.pth) anywhere in Drive
# 3) Look for frames in these places (in order):
#       /content/drive/My Drive/balanced_frames, /content/drive/MyDrive/balanced_frames, /content/balanced_frames
#    If not found, it will re-extract frames from your balanced videos in Drive to:
#       /content/drive/My Drive/balanced_frames
# 4) Make the per-video frame breakdown table you requested and save CSV

# -------------------- CONFIG (edit ONLY if your paths are different) --------------------
VIDEOS_REAL_DIR = "/content/drive/My Drive/test dataset balanced/real"
VIDEOS_FAKE_DIR = "/content/drive/My Drive/test dataset balanced/fake"
FRAMES_DRIVE_ROOT = "/content/drive/My Drive/balanced_frames"   # destination for frames (persistent)
OUT_CSV = "/content/video_frame_breakdown.csv"

# Evaluation options (match your best)
USE_IMAGENET_NORM = False   # no_norm
USE_TTA = True              # average original + horizontal flip
FLIP_SCORES = True          # invert scores (your checkpoint’s class order)
THRESHOLD_MODE = "eer"      # "eer" or "fixed"
FIXED_THRESHOLD = 0.5

# Metadata fields for the table
DATASET_NAME  = "balanced_ffpp"
DETECTOR_NAME = "EfficientNet-B4"

# -------------------- IMPORTS & DRIVE --------------------
import os, glob, re, cv2, numpy as np, pandas as pd
from PIL import Image
import torch, torch.nn as nn
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_curve

try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
except Exception:
    pass

# -------------------- UTILITIES --------------------
def count_images(folder):
    if not os.path.isdir(folder): return 0
    exts = (".jpg",".jpeg",".png",".bmp",".webp")
    return len([p for p in glob.glob(os.path.join(folder,"**","*"), recursive=True) if p.lower().endswith(exts)])

def find_frames_root():
    candidates = [
        "/content/drive/My Drive/balanced_frames",
        "/content/drive/MyDrive/balanced_frames",
        "/content/balanced_frames",
    ]
    for root in candidates:
        real = os.path.join(root, "real"); fake = os.path.join(root, "fake")
        if count_images(real) > 0 and count_images(fake) > 0:
            return root
    return None

def extract_even_frames(video_path, out_dir, max_frames=20):
    os.makedirs(out_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return 0
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total <= 0:
        cap.release()
        return 0
    idxs = np.linspace(0, total-1, num=min(max_frames, total), dtype=int)
    saved = 0
    base = os.path.splitext(os.path.basename(video_path))[0]
    for i, idx in enumerate(idxs):
        cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
        ok, frame = cap.read()
        if not ok: continue
        cv2.imwrite(os.path.join(out_dir, f"{base}_frame{i:04d}.jpg"), frame)
        saved += 1
    cap.release()
    return saved

# -------------------- 1) Ensure frames exist in Drive --------------------
FRAMES_ROOT = find_frames_root()
if FRAMES_ROOT is None:
    # Re-extract into Drive (persistent)
    real_out = os.path.join(FRAMES_DRIVE_ROOT, "real")
    fake_out = os.path.join(FRAMES_DRIVE_ROOT, "fake")
    os.makedirs(real_out, exist_ok=True)
    os.makedirs(fake_out, exist_ok=True)

    real_videos = sorted(glob.glob(os.path.join(VIDEOS_REAL_DIR, "*")))
    fake_videos = sorted(glob.glob(os.path.join(VIDEOS_FAKE_DIR, "*")))
    if len(real_videos)==0 or len(fake_videos)==0:
        raise RuntimeError("No videos found in your balanced video folders. Check VIDEOS_REAL_DIR / VIDEOS_FAKE_DIR.")

    # Extract (20 frames per video; change if you want)
    for vp in real_videos:
        extract_even_frames(vp, real_out, max_frames=20)
    for vp in fake_videos:
        extract_even_frames(vp, fake_out, max_frames=20)

    FRAMES_ROOT = FRAMES_DRIVE_ROOT

REAL_FRAMES_DIR = os.path.join(FRAMES_ROOT, "real")
FAKE_FRAMES_DIR = os.path.join(FRAMES_ROOT, "fake")
print("✅ Using frames from:", FRAMES_ROOT)
print("Real frames:", count_images(REAL_FRAMES_DIR), " | Fake frames:", count_images(FAKE_FRAMES_DIR))

# -------------------- 2) Load weights & model --------------------
def find_weights(filename="effnb4_best.pth"):
    roots = ["/content/drive/My Drive", "/content/drive/MyDrive", "/content/drive"]
    hits = []
    for r in roots:
        if os.path.exists(r):
            hits += glob.glob(os.path.join(r, "**", filename), recursive=True)
    hits = sorted(set(hits), key=lambda p: os.path.getmtime(p) if os.path.exists(p) else 0, reverse=True)
    return hits

weights_candidates = find_weights("effnb4_best.pth")
if not weights_candidates:
    raise FileNotFoundError("Couldn't find 'effnb4_best.pth' in Drive. Please put it there or change the filename.")
WEIGHTS_PATH = weights_candidates[0]
print("✅ Using weights:", WEIGHTS_PATH)

try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    !pip -q install efficientnet-pytorch==0.7.1
    from efficientnet_pytorch import EfficientNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class DeepfakeBenchEfficientNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

model = DeepfakeBenchEfficientNet().to(device)
state = torch.load(WEIGHTS_PATH, map_location=device)
if isinstance(state, dict) and all(isinstance(k, str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.", "", 1): v for k, v in state.items()}
model.load_state_dict(state)
model.eval()
print("✅ Model loaded on", device)

# -------------------- 3) Build your table --------------------
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\-]\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

# transforms (no_norm by default)
if USE_IMAGENET_NORM:
    transform = transforms.Compose([
        transforms.Resize((380, 380)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ])
else:
    transform = transforms.Compose([
        transforms.Resize((380, 380)),
        transforms.ToTensor()
    ])

@torch.no_grad()
def predict_prob_fake(img: Image.Image) -> float:
    x = transform(img).unsqueeze(0).to(device)
    logits = model(x)
    return float(torch.softmax(logits, dim=1)[0, 1].item())

@torch.no_grad()
def predict_prob_fake_tta(img: Image.Image) -> float:
    x1 = transform(img).unsqueeze(0).to(device)
    x2 = transform(TF.hflip(img)).unsqueeze(0).to(device)
    l1 = model(x1); l2 = model(x2)
    logits = (l1 + l2) / 2
    return float(torch.softmax(logits, dim=1)[0, 1].item())

def gather_paths(folder):
    subdirs = sorted([d for d in glob.glob(os.path.join(folder,"*")) if os.path.isdir(d)])
    if subdirs:
        files = []
        for sd in subdirs:
            files += sorted([p for p in glob.glob(os.path.join(sd,"*")) if is_img(p)])
        return files
    else:
        return sorted([p for p in glob.glob(os.path.join(folder,"*")) if is_img(p)])

rows = []
for folder, lbl in [(REAL_FRAMES_DIR, 0), (FAKE_FRAMES_DIR, 1)]:
    files = gather_paths(folder)
    if len(files) == 0:
        raise RuntimeError(f"No images found under: {folder}")
    for p in files:
        img = Image.open(p).convert("RGB")
        s = predict_prob_fake_tta(img) if USE_TTA else predict_prob_fake(img)
        if FLIP_SCORES:
            s = 1.0 - s
        rows.append((infer_video_name(p), "real" if lbl==0 else "fake", s, p))

df = pd.DataFrame(rows, columns=["video_name","true_label","prob_fake","frame_path"])

# threshold (EER or fixed)
if THRESHOLD_MODE.lower() == "eer":
    y_true = (df["true_label"] == "fake").astype(int).values
    y_score = df["prob_fake"].values
    fpr, tpr, thr = roc_curve(y_true, y_score)
    fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    thr_use = float(thr[idx])
else:
    thr_use = float(FIXED_THRESHOLD)

df["frame_pred"] = np.where(df["prob_fake"] >= thr_use, "fake", "real")
df["frame_correct"] = (df["frame_pred"] == df["true_label"]).astype(int)

def summarize_video(group):
    n = len(group)
    n_correct = int(group["frame_correct"].sum())
    n_wrong = int(n - n_correct)
    acc = n_correct / n if n > 0 else np.nan
    avg = float(group["prob_fake"].mean()) if n>0 else np.nan
    std = float(group["prob_fake"].std(ddof=0)) if n>1 else 0.0
    pred_avg = "fake" if avg >= thr_use else "real"
    correct_avg = int(pred_avg == group["true_label"].iloc[0])
    majority_ratio = (group["frame_pred"] == "fake").mean()
    if majority_ratio == 0.5:
        pred_maj = pred_avg
    else:
        pred_maj = "fake" if majority_ratio > 0.5 else "real"
    correct_maj = int(pred_maj == group["true_label"].iloc[0])
    return pd.Series({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": group["video_name"].iloc[0],
        "true_label": group["true_label"].iloc[0],
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": round(acc, 4),
        "avg_prob_fake": round(avg, 4),
        "std_prob_fake": round(std, 4),
        "video_pred_by_avg": pred_avg,
        "video_correct_by_avg": correct_avg,
        "video_pred_by_majority": pred_maj,
        "video_correct_by_majority": correct_maj
    })

per_video = df.groupby(["video_name","true_label"], as_index=False).apply(summarize_video).reset_index(drop=True)
per_video.to_csv(OUT_CSV, index=False)
print(f"✅ Saved table to: {OUT_CSV}")
print(per_video.head(10).to_string(index=False))
print("Rows:", len(per_video))
print(f"Decision threshold used: {thr_use:.4f}  (mode: {THRESHOLD_MODE})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Using frames from: /content/drive/My Drive/balanced_frames
Real frames: 1020  | Fake frames: 1020
✅ Using weights: /content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth
✅ Model loaded on cpu


KeyboardInterrupt: 

In [None]:
# ===== SAFE MODE: small batches, no workers, memory-friendly inference =====
# 1) Paths
REAL_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/fake"
WEIGHTS_PATH    = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"

# 2) Output
OUT_CSV = "/content/video_frame_breakdown.csv"
DATASET_NAME  = "balanced_ffpp"
DETECTOR_NAME = "EfficientNet-B4"

# 3) Eval settings
USE_IMAGENET_NORM = False     # your best was "no_norm"
FLIP_SCORES = True            # your checkpoint needs flipping
THRESHOLD_MODE = "eer"        # or "fixed"
FIXED_THRESHOLD = 0.5

# 4) Stability / speed knobs
BATCH_SIZE = 8                # small to avoid OOM; you can try 16 later
IMG_SIZE = 380
LOCAL_COPY = False            # set True if Drive I/O keeps crashing
PRINT_EVERY = 500             # progress print

import os, glob, re, shutil, numpy as np, pandas as pd
from PIL import Image, UnidentifiedImageError
import torch, torch.nn as nn
from torchvision import transforms
from sklearn.metrics import roc_curve

# GPU & memory settings
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:64"

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

def count_imgs(folder):
    exts = (".jpg",".jpeg",".png",".bmp",".webp")
    return len([p for p in glob.glob(os.path.join(folder,"**","*"), recursive=True) if p.lower().endswith(exts)])

assert os.path.isfile(WEIGHTS_PATH), f"Weights not found: {WEIGHTS_PATH}"
assert count_imgs(REAL_FRAMES_DIR) > 0, f"No images in {REAL_FRAMES_DIR}"
assert count_imgs(FAKE_FRAMES_DIR) > 0, f"No images in {FAKE_FRAMES_DIR}"

print("CUDA available:", torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Optional: copy frames to /content for stability
if LOCAL_COPY:
    import subprocess
    def rsync(src, dst):
        os.makedirs(dst, exist_ok=True)
        try:
            subprocess.run(["rsync","-a","--delete",src + "/", dst + "/"], check=True)
        except Exception:
            for p in glob.glob(os.path.join(src, "*")):
                base = os.path.join(dst, os.path.basename(p))
                if os.path.isdir(p):
                    shutil.copytree(p, base, dirs_exist_ok=True)
                else:
                    shutil.copy2(p, base)
    dst_real = "/content/fast_frames/real"
    dst_fake = "/content/fast_frames/fake"
    rsync(REAL_FRAMES_DIR, dst_real)
    rsync(FAKE_FRAMES_DIR, dst_fake)
    REAL_FRAMES_DIR, FAKE_FRAMES_DIR = dst_real, dst_fake
    print("Using local copies:", REAL_FRAMES_DIR, "|", FAKE_FRAMES_DIR)

# ----- Model -----
try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    !pip -q install efficientnet-pytorch==0.7.1
    from efficientnet_pytorch import EfficientNet

class DeepfakeBenchEfficientNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

state = torch.load(WEIGHTS_PATH, map_location="cpu")
if isinstance(state, dict) and all(isinstance(k,str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.","",1): v for k,v in state.items()}
model = DeepfakeBenchEfficientNet().to(device)
model.load_state_dict(state)
model.eval()
print("✅ Model loaded on:", device)

# ----- Helpers -----
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\-]\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

transform = transforms.Compose(
    [transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.ToTensor()] if not USE_IMAGENET_NORM else
    [transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.ToTensor(),
     transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])]
)

# Build file list once
real_files = sorted([p for p in glob.glob(os.path.join(REAL_FRAMES_DIR, "*")) if is_img(p)])
fake_files = sorted([p for p in glob.glob(os.path.join(FAKE_FRAMES_DIR, "*")) if is_img(p)])
paths = real_files + fake_files
labels = np.array([0]*len(real_files) + [1]*len(fake_files), dtype=np.int64)

# ----- Memory-safe batched inference (manual loop) -----
softmax = torch.nn.Softmax(dim=1)
scores = np.zeros(len(paths), dtype=np.float32)

def load_batch(start, end):
    xs = []
    idxs = []
    for i in range(start, end):
        p = paths[i]
        try:
            with Image.open(p) as img:
                x = transform(img.convert("RGB"))
            xs.append(x)
            idxs.append(i)
        except (UnidentifiedImageError, OSError):
            # skip corrupted image
            continue
    if not xs:
        return None, None
    xb = torch.stack(xs, dim=0)
    return xb, idxs

torch.cuda.empty_cache()
for i in range(0, len(paths), BATCH_SIZE):
    xb, idxs = load_batch(i, min(i+BATCH_SIZE, len(paths)))
    if xb is None:
        continue
    xb = xb.to(device, non_blocking=False)
    with torch.no_grad(), torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
        logits = model(xb)
        p_fake = softmax(logits)[:,1].float().detach().cpu().numpy()
    scores[np.array(idxs, dtype=int)] = p_fake
    if (i // BATCH_SIZE) % max(1, (PRINT_EVERY // BATCH_SIZE)) == 0:
        print(f"Scored {min(i+BATCH_SIZE, len(paths))}/{len(paths)} frames…")
    del xb, logits
    torch.cuda.empty_cache()

if FLIP_SCORES:
    scores = 1.0 - scores

df = pd.DataFrame({
    "video_name": [infer_video_name(p) for p in paths],
    "true_label": np.where(labels==1, "fake", "real"),
    "prob_fake": scores,
    "frame_path": paths
})

# ----- Threshold -----
if THRESHOLD_MODE.lower() == "eer":
    fpr, tpr, thr = roc_curve((df["true_label"]=="fake").astype(int).values, df["prob_fake"].values)
    fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    thr_use = float(thr[idx])
else:
    thr_use = float(FIXED_THRESHOLD)

df["frame_pred"] = np.where(df["prob_fake"] >= thr_use, "fake", "real")
df["frame_correct"] = (df["frame_pred"] == df["true_label"]).astype(int)

# ----- Per-video table (your columns) -----
def summarize_video(group):
    n = len(group)
    n_correct = int(group["frame_correct"].sum())
    n_wrong = int(n - n_correct)
    acc = n_correct / n if n>0 else np.nan
    avg = float(group["prob_fake"].mean()) if n>0 else np.nan
    std = float(group["prob_fake"].std(ddof=0)) if n>1 else 0.0
    pred_avg = "fake" if avg >= thr_use else "real"
    correct_avg = int(pred_avg == group["true_label"].iloc[0])
    majority_ratio = (group["frame_pred"] == "fake").mean()
    pred_maj = "fake" if majority_ratio > 0.5 else "real"
    if majority_ratio == 0.5: pred_maj = pred_avg
    correct_maj = int(pred_maj == group["true_label"].iloc[0])
    return pd.Series({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": group["video_name"].iloc[0],
        "true_label": group["true_label"].iloc[0],
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": round(acc, 4),
        "avg_prob_fake": round(avg, 4),
        "std_prob_fake": round(std, 4),
        "video_pred_by_avg": pred_avg,
        "video_correct_by_avg": correct_avg,
        "video_pred_by_majority": pred_maj,
        "video_correct_by_majority": correct_maj
    })

per_video = df.groupby(["video_name","true_label"], as_index=False).apply(summarize_video).reset_index(drop=True)
per_video.to_csv(OUT_CSV, index=False)
print(f"✅ Saved table to: {OUT_CSV}")
print(per_video.head(8).to_string(index=False))
print("Rows:", len(per_video))
print(f"Decision threshold used: {thr_use:.4f}  (mode: {THRESHOLD_MODE})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
CUDA available: True
✅ Model loaded on: cuda


  with torch.no_grad(), torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Scored 8/2040 frames…
Scored 504/2040 frames…
Scored 1000/2040 frames…
Scored 1496/2040 frames…
Scored 1992/2040 frames…
✅ Saved table to: /content/video_frame_breakdown.csv
      dataset        detector video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
balanced_ffpp EfficientNet-B4    000_003       fake        20                 0              20            0.00         0.5973         0.0547              real                     0                   real                          0
balanced_ffpp EfficientNet-B4    010_005       fake        20                 1              19            0.05         0.4391         0.1074              real                     0                   real                          0
balanced_ffpp EfficientNet-B4    011_805       fake        20                 0              20            0.00         0.4297    

  per_video = df.groupby(["video_name","true_label"], as_index=False).apply(summarize_video).reset_index(drop=True)


In [None]:
import pandas as pd

# show every row in the output cell
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

# sort (optional) and print all rows
_per_video_sorted = per_video.sort_values(["true_label","video_name"])
print(_per_video_sorted.to_string(index=False))


      dataset        detector                            video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
balanced_ffpp EfficientNet-B4                               000_003       fake        20                 0              20            0.00         0.5973         0.0547              real                     0                   real                          0
balanced_ffpp EfficientNet-B4                               010_005       fake        20                 1              19            0.05         0.4391         0.1074              real                     0                   real                          0
balanced_ffpp EfficientNet-B4                               011_805       fake        20                 0              20            0.00         0.4297         0.0848              real                     0               

In [None]:
!cp -f /content/video_frame_breakdown.csv "/content/drive/My Drive/video_frame_breakdown.csv"
print("Copied to Drive → My Drive/video_frame_breakdown.csv")


Copied to Drive → My Drive/video_frame_breakdown.csv


In [None]:
# === Pick best thresholds for accuracy and update the table ===
import numpy as np, pandas as pd
from sklearn.metrics import roc_curve, accuracy_score, precision_recall_fscore_support

assert 'per_video' in globals(), "per_video not found. Run the breakdown cell first."
assert 'df' in globals(), "df (frame scores) not found. Run the breakdown cell first."

# --- 1) BEST THRESHOLD FOR VIDEO-LEVEL AVG SCORE ---
y_vid = (per_video["true_label"] == "fake").astype(int).values
s_avg = per_video["avg_prob_fake"].values

# Candidate thresholds from ROC (good coverage)
_, _, thr_candidates = roc_curve(y_vid, s_avg)
# Ensure 0..1 range and unique
thr_candidates = np.unique(np.clip(thr_candidates, 0.0, 1.0))

best_acc_avg, best_thr_avg, best_stats_avg = -1, 0.5, None
for t in thr_candidates:
    pred = (s_avg >= t).astype(int)
    acc = accuracy_score(y_vid, pred)
    if acc > best_acc_avg:
        # store a few helpful stats
        p, r, f1, _ = precision_recall_fscore_support(y_vid, pred, average="binary", zero_division=0)
        best_acc_avg, best_thr_avg, best_stats_avg = acc, float(t), (p, r, f1)

# Update avg-based columns with the best video threshold
per_video["video_pred_by_avg"] = np.where(per_video["avg_prob_fake"] >= best_thr_avg, "fake", "real")
per_video["video_correct_by_avg"] = (per_video["video_pred_by_avg"] == per_video["true_label"]).astype(int)

print(f"✅ Best video threshold (by average score): {best_thr_avg:.4f}")
print(f"   Video accuracy: {best_acc_avg*100:.1f}%  | Precision: {best_stats_avg[0]:.3f}  | Recall: {best_stats_avg[1]:.3f}  | F1: {best_stats_avg[2]:.3f}")

# --- 2) BEST THRESHOLD FOR FRAME-LEVEL MAJORITY VOTE ---
# Build per-video frame score lists
videos = per_video["video_name"].tolist()
truth_map = {row.video_name: (1 if row.true_label == "fake" else 0) for row in per_video.itertuples()}

frames_by_video = {}
for row in df.itertuples():
    frames_by_video.setdefault(row.video_name, []).append(row.prob_fake)

# Candidate thresholds from frame scores (quantiles to keep it fast)
all_frame_scores = df["prob_fake"].values
quantiles = np.linspace(0, 1, 201)
thr_candidates_frames = np.unique(np.quantile(all_frame_scores, quantiles))

best_acc_maj, best_thr_maj = -1, 0.5
for t in thr_candidates_frames:
    correct = 0
    for v in videos:
        scores = frames_by_video.get(v, [])
        if len(scores) == 0:
            # fallback to avg decision if no frames (shouldn't happen)
            pred = 1 if per_video.loc[per_video["video_name"]==v, "avg_prob_fake"].values[0] >= best_thr_avg else 0
        else:
            # majority of frame predictions at threshold t (ties -> fall back to avg at best_thr_avg)
            preds = (np.array(scores) >= t).astype(int)
            mean_pred = preds.mean()
            if mean_pred == 0.5:
                pred = 1 if (np.mean(scores) >= best_thr_avg) else 0
            else:
                pred = 1 if mean_pred > 0.5 else 0
        correct += int(pred == truth_map[v])
    acc = correct / len(videos)
    if acc > best_acc_maj:
        best_acc_maj, best_thr_maj = acc, float(t)

# Update majority columns using the best frame threshold
maj_preds = []
for v in per_video["video_name"]:
    scores = frames_by_video.get(v, [])
    if len(scores) == 0:
        pred = 1 if per_video.loc[per_video["video_name"]==v, "avg_prob_fake"].values[0] >= best_thr_avg else 0
    else:
        preds = (np.array(scores) >= best_thr_maj).astype(int)
        mean_pred = preds.mean()
        if mean_pred == 0.5:
            pred = 1 if (np.mean(scores) >= best_thr_avg) else 0
        else:
            pred = 1 if mean_pred > 0.5 else 0
    maj_preds.append("fake" if pred==1 else "real")

per_video["video_pred_by_majority"] = maj_preds
per_video["video_correct_by_majority"] = (per_video["video_pred_by_majority"] == per_video["true_label"]).astype(int)

print(f"✅ Best frame threshold (for majority vote): {best_thr_maj:.4f}")
print(f"   Video accuracy (majority): {best_acc_maj*100:.1f}%")

# --- 3) Save updated table ---
out_csv = "/content/video_frame_breakdown_best_thresholds.csv"
per_video.to_csv(out_csv, index=False)
print("📁 Saved:", out_csv)

# (Optional) copy to Drive
# !cp -f /content/video_frame_breakdown_best_thresholds.csv "/content/drive/My Drive/video_frame_breakdown_best_thresholds.csv"


✅ Best video threshold (by average score): 0.2283
   Video accuracy: 50.0%  | Precision: 0.500  | Recall: 1.000  | F1: 0.667
✅ Best frame threshold (for majority vote): 0.0808
   Video accuracy (majority): 50.0%
📁 Saved: /content/video_frame_breakdown_best_thresholds.csv


In [None]:
# === Show full per-video table in Colab, then save to Drive ===
import os, time
import pandas as pd

# If you haven't already in this session:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

# 1) Load the table (prefer in-memory, else CSV fallbacks)
if 'per_video' in globals():
    table_df = per_video.copy()
elif os.path.exists("/content/video_frame_breakdown_best_thresholds.csv"):
    table_df = pd.read_csv("/content/video_frame_breakdown_best_thresholds.csv")
elif os.path.exists("/content/video_frame_breakdown.csv"):
    table_df = pd.read_csv("/content/video_frame_breakdown.csv")
else:
    raise RuntimeError("No table found. Re-run the breakdown cell first.")

# 2) Show ALL rows in the output cell
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
table_df = table_df.sort_values(["true_label","video_name"])
print(table_df.to_string(index=False))

# 3) Save to Drive (timestamped filename)
ts = time.strftime("%Y%m%d-%H%M%S")
drive_path = f"/content/drive/My Drive/video_frame_breakdown_{ts}.csv"
table_df.to_csv(drive_path, index=False)
print("\n✅ Saved to Drive:", drive_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
      dataset        detector                            video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
balanced_ffpp EfficientNet-B4                               000_003       fake        20                 0              20            0.00         0.5973         0.0547              fake                     1                   fake                          1
balanced_ffpp EfficientNet-B4                               010_005       fake        20                 1              19            0.05         0.4391         0.1074              fake                     1                   fake                          1
balanced_ffpp EfficientNet-B4                               011_805       fake        20      

In [None]:
# === Compact per-video table (fixes “all fake yes” by choosing a balanced threshold) ===
# Prints ONLY the table with: dataset, detector, video_name, true_label, correctly_predicted (yes/no)

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score

# ---- settings (edit names if you want) ----
DATASET_NAME  = "balanced_ffpp"
DETECTOR_NAME = "EfficientNet-B4"
FILTER = 0.30      # drop low-confidence frames: keep only |p-0.5| >= FILTER before aggregation
AGG = "median"     # per-video aggregator: "median" (recommended)

# ---- require frame scores DataFrame 'df' in memory ----
# df must have columns: video_name, true_label in {"real","fake"}, prob_fake (already oriented if you flipped earlier)
assert 'df' in globals(), "Run your scoring cell first to create 'df' (frame scores)."

# 1) per-video score with filtering (fallback to unfiltered if a video is emptied by the filter)
def video_score(group, filt=FILTER, agg=AGG):
    s = group["prob_fake"].values
    keep = np.abs(s - 0.5) >= filt
    s_use = s[keep] if keep.any() else s
    if agg == "median":
        return float(np.median(s_use)) if s_use.size else 0.5
    else:
        return float(np.median(s_use)) if s_use.size else 0.5

vid = df.groupby(["video_name","true_label"]).apply(video_score).reset_index(name="video_score")

# 2) pick the correct score orientation (use the one with higher AUC so fakes tend to score higher)
y = (vid["true_label"] == "fake").astype(int).values
s = vid["video_score"].values
auc_as = roc_auc_score(y, s)
auc_fl = roc_auc_score(y, 1 - s)
if auc_fl > auc_as:
    s = 1 - s

# 3) choose a threshold that maximizes **balanced accuracy** (Youden’s J = TPR - FPR)
fpr, tpr, thr = roc_curve(y, s)
J = tpr - fpr
j_best = int(np.argmax(J))
thr_best = float(thr[j_best])

# 4) decisions and compact table
pred = (s >= thr_best).astype(int)
correct = (pred == y).astype(int)

out = pd.DataFrame({
    "dataset": DATASET_NAME,
    "detector": DETECTOR_NAME,
    "video_name": vid["video_name"].values,
    "true_label": vid["true_label"].values,
    "correctly_predicted": np.where(correct==1, "yes", "no")
}).sort_values(["true_label","video_name"])

pd.set_option("display.max_rows", None)
print(out.to_string(index=False))


      dataset        detector                            video_name true_label correctly_predicted
balanced_ffpp EfficientNet-B4                               000_003       fake                 yes
balanced_ffpp EfficientNet-B4                               010_005       fake                 yes
balanced_ffpp EfficientNet-B4                               011_805       fake                 yes
balanced_ffpp EfficientNet-B4                               012_026       fake                 yes
balanced_ffpp EfficientNet-B4                               013_883       fake                 yes
balanced_ffpp EfficientNet-B4                               014_790       fake                  no
balanced_ffpp EfficientNet-B4                               015_919       fake                 yes
balanced_ffpp EfficientNet-B4                               016_209       fake                  no
balanced_ffpp EfficientNet-B4                               017_803       fake                 yes
balanced_f

  vid = df.groupby(["video_name","true_label"]).apply(video_score).reset_index(name="video_score")


In [None]:
# Save the compact table as CSV named exactly: "prediction table efficientnet.csv"
import os, pandas as pd
from google.colab import drive

# Ensure the table exists
assert 'out' in globals(), "Run the compact table cell first to create the 'out' DataFrame."

# Local save
local_path = "/content/prediction table efficientnet.csv"
out.to_csv(local_path, index=False)

# Copy to Drive
drive.mount('/content/drive', force_remount=False)
drive_path = "/content/drive/My Drive/prediction table efficientnet.csv"

# Use Python copy to avoid shell quoting issues
import shutil
shutil.copyfile(local_path, drive_path)

print("Saved:", local_path)
print("Copied to Drive:", drive_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved: /content/prediction table efficientnet.csv
Copied to Drive: /content/drive/My Drive/prediction table efficientnet.csv
