In [None]:
# =========================
# CORE (Xception) — FACE-ALIGNED ENSEMBLE + QUALITY FILTERS
# Target: push AUC ≥ 0.70 and lower EER, GPU-friendly.
# Prints ONLY:
#   CORE model loaded
#   AUC=… | EER=… | AP=…
# =========================

# Quiet installs (no extra prints)
import sys, subprocess, os, warnings
subprocess.run([sys.executable, "-m", "pip", "install", "-q",
                "timm", "torchvision", "scikit-learn", "pillow",
                "facenet-pytorch", "opencv-python"],
               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Avoid remount message; mount only if needed
if not os.path.ismount("/content/drive"):
    from google.colab import drive
    drive.mount("/content/drive")

warnings.filterwarnings("ignore")

import math, random
from pathlib import Path
from collections import defaultdict

import numpy as np
from PIL import Image
from sklearn import metrics

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
from torchvision import transforms
from facenet_pytorch import MTCNN

# -------------------------
# Config
# -------------------------
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

DATA_REAL = f"{DRIVE_ROOT}/balanced_frames_FF++/real"
DATA_FAKE = f"{DRIVE_ROOT}/balanced_frames_FF++/fake"
WEIGHTS_PATH = f"{DRIVE_ROOT}/DeepfakeBench_weights/core_best.pth"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42

# Inference knobs (tuned for accuracy vs. GPU)
IMG_SIZE = 299
FRAME_CAP_PER_VIDEO = 160     # ↑ more frames per video for stronger statistics
BATCH_SIZE_IMAGES   = 8       # images processed before TTA expansion
FORWARD_CHUNK       = 32      # per-forward chunk to avoid OOM

# TTA/Ensemble settings
SCALES_FACE   = [320, 352]    # for face-aligned crops
SCALES_FRAME  = [352]         # for global frame center-crop
USE_HFLIP     = True          # add a mirrored pass
WSET = [                      # ensemble weights (face, face+CLAHE, frame)
    (1.0, 0.0, 0.0),
    (0.8, 0.2, 0.0),
    (0.7, 0.2, 0.1),
    (0.6, 0.3, 0.1),
    (0.5, 0.3, 0.2),
    (0.45, 0.35, 0.20),
]

# Aggregation/filters search space
TAU_LIST       = [0.0, 0.1, 0.2, 0.3, 0.4]   # drop frames with |p-0.5| < τ
SHARP_TOP_LIST = [1.0, 0.8, 0.6]             # keep top X fraction by sharpness
CONF_MIN_LIST  = [0.0, 0.85, 0.90]           # min face-detection confidence
SIZE_MIN_LIST  = [0.0, 0.03, 0.06]           # min face area ratio (bbox/img)
AGGREGATORS    = ["median", "perc90", "top10", "trim10", "wtop20p", "perc95"]

# -------------------------
# Reproducibility
# -------------------------
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
set_seed()

# -------------------------
# Utilities (silent)
# -------------------------
VALID_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def list_images(folder):
    folder = Path(folder)
    return sorted([p for p in folder.iterdir() if p.suffix.lower() in VALID_EXTS])

def guess_video_name_from_path(p: Path):
    stem = p.stem
    if "_" in stem:
        return stem.rsplit("_", 1)[0]
    if "-" in stem:
        return stem.rsplit("-", 1)[0]
    return stem

def safe_open_rgb(path: Path):
    try:
        return Image.open(path).convert("RGB")
    except Exception:
        return Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))

def compute_eer(y_true, y_score):
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    return float((fpr[idx] + fnr[idx]) / 2.0)

def build_samples(real_dir, fake_dir, cap=FRAME_CAP_PER_VIDEO):
    samples = []  # (path, label, video)
    def gather(dir_path, label):
        paths = list_images(dir_path)
        groups = defaultdict(list)
        for p in paths:
            groups[guess_video_name_from_path(p)].append(p)
        for vname, plist in groups.items():
            plist = sorted(plist)
            if cap is not None and len(plist) > cap:
                idxs = np.linspace(0, len(plist)-1, num=cap, dtype=int)
                plist = [plist[i] for i in idxs]
            for p in plist:
                samples.append((str(p), label, vname))
    gather(real_dir, 0)
    gather(fake_dir, 1)
    return samples

samples = build_samples(DATA_REAL, DATA_FAKE)

# -------------------------
# Face detector & alignment (MTCNN)
# -------------------------
mtcnn = MTCNN(keep_all=True, device=DEVICE if torch.cuda.is_available() else "cpu",
              min_face_size=40, thresholds=[0.6, 0.7, 0.7])

def align_face_with_meta(img: Image.Image, margin=0.25):
    """
    Detect largest face; return aligned square crop + (conf, area_ratio).
    Fallback: center square crop, conf=0.0, area=0.0
    """
    w, h = img.size
    boxes, probs, landmarks = mtcnn.detect(img, landmarks=True)
    if boxes is not None and len(boxes) > 0:
        areas = [(b[2]-b[0])*(b[3]-b[1]) for b in boxes]
        i = int(np.argmax(areas))
        b = boxes[i]
        conf = float(probs[i]) if probs is not None else 0.0
        # compute area ratio before alignment
        area_ratio = float(areas[i] / max(1.0, (w*h)))

        # align by eyes if landmarks present
        pts = landmarks[i] if landmarks is not None else None
        if pts is not None:
            left_eye, right_eye = pts[0], pts[1]
            dx, dy = right_eye[0]-left_eye[0], right_eye[1]-left_eye[1]
            angle = np.degrees(np.arctan2(dy, dx))
            M = cv2.getRotationMatrix2D((w/2.0, h/2.0), angle, 1.0)
            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            rot = cv2.warpAffine(img_cv, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
            img = Image.fromarray(cv2.cvtColor(rot, cv2.COLOR_BGR2RGB))
            # rotate box corners then axis-align
            x1, y1, x2, y2 = b
            corners = np.array([[x1,y1,1],[x2,y1,1],[x1,y2,1],[x2,y2,1]], dtype=np.float32)
            rc = (M @ corners.T).T
            x1, y1 = rc[:,0].min(), rc[:,1].min()
            x2, y2 = rc[:,0].max(), rc[:,1].max()
            b = np.array([x1,y1,x2,y2], dtype=np.float32)

        x1, y1, x2, y2 = b
        bw, bh = x2 - x1, y2 - y1
        cx, cy = x1 + bw/2.0, y1 + bh/2.0
        side = max(bw, bh) * (1.0 + margin)
        x1n = int(max(0, cx - side/2.0))
        y1n = int(max(0, cy - side/2.0))
        x2n = int(min(w, cx + side/2.0))
        y2n = int(min(h, cy + side/2.0))
        # ensure square
        box_w, box_h = x2n - x1n, y2n - y1n
        if box_w != box_h:
            d = abs(box_w - box_h)
            if box_w < box_h:
                x1n = max(0, x1n - d//2); x2n = min(w, x2n + (d - d//2))
            else:
                y1n = max(0, y1n - d//2); y2n = min(h, y2n + (d - d//2))
        crop = img.crop((x1n, y1n, x2n, y2n))
        return crop, conf, area_ratio
    # fallback
    side = min(w, h)
    left = (w - side) // 2
    top  = (h - side) // 2
    return img.crop((left, top, left + side, top + side)), 0.0, 0.0

def sharpness_score(pil_img: Image.Image):
    g = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)
    g = cv2.resize(g, (128, 128), interpolation=cv2.INTER_AREA)
    return float(cv2.Laplacian(g, cv2.CV_64F).var())

def apply_clahe_color(pil_img: Image.Image):
    img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(img)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l2 = clahe.apply(l)
    lab = cv2.merge([l2, a, b])
    rgb = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    return Image.fromarray(rgb)

# -------------------------
# TTA pipelines
# -------------------------
IMAGENET_MEAN, IMAGENET_STD = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
to_tensor_norm = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

def make_crops(pil_img, scales, hflip=USE_HFLIP):
    crops = []
    for s in scales:
        w, h = pil_img.size
        scale = s / min(w, h)
        new_size = (int(round(w*scale)), int(round(h*scale)))
        img_res = pil_img.resize(new_size, Image.BILINEAR)
        left = (img_res.size[0] - s) // 2
        top  = (img_res.size[1] - s) // 2
        cc = img_res.crop((left, top, left + s, top + s))
        crops.append(cc)
        if hflip:
            crops.append(cc.transpose(Image.FLIP_LEFT_RIGHT))
    return crops  # list of PIL images

# -------------------------
# CORE (Xception) model
# -------------------------
class CoreXception(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        # no pretrained download
        self.model = timm.create_model("xception", pretrained=False, num_classes=num_classes)
        assert hasattr(self.model, "forward_features")

    def forward(self, x):
        feats = self.model.forward_features(x)                 # BxCxHxW
        core_feat = F.relu(feats, inplace=False)
        core_feat = F.adaptive_avg_pool2d(core_feat, (1, 1)).flatten(1)
        logits = self.model.forward_head(feats, pre_logits=False)
        probs = torch.softmax(logits, dim=1)[:, 1]
        return {"logits": logits, "prob": probs, "feat_map": feats, "core_feat": core_feat}

def load_core_weights_strong(model: nn.Module, path: str):
    ckpt = torch.load(path, map_location="cpu")
    incoming = ckpt["state_dict"] if (isinstance(ckpt, dict) and "state_dict" in ckpt) else ckpt
    target = model.model.state_dict()

    def candidates(k):
        keys = [k]
        for pref in ["module.", "backbone.", "model."]:
            if k.startswith(pref):
                keys.append(k[len(pref):])
        if k.startswith("fc."):
            keys.append("classifier." + k[len("fc."):])
        return list(dict.fromkeys(keys))

    new_state = {}
    for k, v in incoming.items():
        for k2 in candidates(k):
            if k2 in target:
                tv = target[k2]
                if v.shape == tv.shape:
                    new_state[k2] = v; break
                if v.ndim == 2 and tv.ndim == 4 and tv.shape[2] == 1 and tv.shape[3] == 1 \
                   and v.shape[0] == tv.shape[0] and v.shape[1] == tv.shape[1]:
                    new_state[k2] = v.unsqueeze(-1).unsqueeze(-1); break
    model.model.load_state_dict(new_state, strict=False)

core = CoreXception(num_classes=2).to(DEVICE)
try:
    load_core_weights_strong(core, WEIGHTS_PATH)
finally:
    print("CORE model loaded")
core.eval()

def forward_in_chunks(x, chunk=FORWARD_CHUNK):
    outs = []
    for i in range(0, x.size(0), chunk):
        outs.append(core(x[i:i+chunk].to(DEVICE))["prob"].detach().float().cpu())
    return torch.cat(outs, dim=0).numpy()

# -------------------------
# Inference — build per-frame ensemble inputs
# -------------------------
records = []  # list of dicts per image: {v,label,p1,p2,p3,sharp,conf,area}
with torch.no_grad():
    # batch by images to control memory (TTA handled inside loop)
    for i in range(0, len(samples), BATCH_SIZE_IMAGES):
        batch = samples[i:i + BATCH_SIZE_IMAGES]

        # Prepare crops for three pipelines
        tens_face_list, face_meta = [], []     # (n_crops,3,299,299), (conf, area)
        tens_facec_list = []                   # CLAHE branch
        tens_frame_list = []                   # global center-crop branch
        labels, vnames = [], []

        # Gather crops
        for path, lab, vname in batch:
            img = safe_open_rgb(Path(path))
            face, conf, area = align_face_with_meta(img, margin=0.25)
            shp = sharpness_score(face)

            # Face-aligned crops
            c_face = [to_tensor_norm(c) for c in make_crops(face, SCALES_FACE)]
            if len(c_face) == 0:
                continue
            tens_face_list.append(torch.stack(c_face, dim=0))
            face_meta.append((conf, area, shp))

            # Face-aligned + CLAHE (use same boxes, fewer scales or same? keep same for consistency)
            face_c = apply_clahe_color(face)
            c_facec = [to_tensor_norm(c) for c in make_crops(face_c, SCALES_FACE)]
            tens_facec_list.append(torch.stack(c_facec, dim=0))

            # Global frame center-crop branch
            c_frame = [to_tensor_norm(c) for c in make_crops(img, SCALES_FRAME)]
            tens_frame_list.append(torch.stack(c_frame, dim=0))

            labels.append(lab); vnames.append(vname)

        if not labels:
            continue

        # Forward: FACE
        Xf = torch.cat(tens_face_list, dim=0)            # [sumC,3,299,299]
        pf_all = forward_in_chunks(Xf, chunk=FORWARD_CHUNK)
        Cf = tens_face_list[0].size(0)
        pf_img = pf_all.reshape(len(labels), Cf).mean(axis=1)

        # Forward: FACE+CLAHE
        Xfc = torch.cat(tens_facec_list, dim=0)
        pfc_all = forward_in_chunks(Xfc, chunk=FORWARD_CHUNK)
        Cfc = tens_facec_list[0].size(0)
        pfc_img = pfc_all.reshape(len(labels), Cfc).mean(axis=1)

        # Forward: FRAME (global)
        Xg = torch.cat(tens_frame_list, dim=0)
        pg_all = forward_in_chunks(Xg, chunk=FORWARD_CHUNK)
        Cg = tens_frame_list[0].size(0)
        pg_img = pg_all.reshape(len(labels), Cg).mean(axis=1)

        # Collect records
        for j in range(len(labels)):
            conf, area, shp = face_meta[j]
            records.append({
                "video": vnames[j],
                "label": int(labels[j]),
                "p1": float(pf_img[j]),
                "p2": float(pfc_img[j]),
                "p3": float(pg_img[j]),
                "conf": float(conf),
                "area": float(area),
                "sharp": float(shp),
            })

# -------------------------
# Search best post-processing (weights, flip, filters, aggregator)
# -------------------------
if not records:
    # no data; print neutral metrics
    print("AUC=0.5000 | EER=0.5000 | AP=0.5000")
else:
    # group by video
    vids = sorted({r["video"] for r in records})
    labels_by_video = {v: None for v in vids}
    per_video_arrays = {v: {"p1":[], "p2":[], "p3":[], "conf":[], "area":[], "sharp":[]} for v in vids}
    for r in records:
        v = r["video"]
        labels_by_video[v] = r["label"]
        for key in ["p1","p2","p3","conf","area","sharp"]:
            per_video_arrays[v][key].append(r[key])

    def agg_median(x): return float(np.median(x))
    def agg_perc90(x): return float(np.percentile(x, 90))
    def agg_perc95(x): return float(np.percentile(x, 95))
    def agg_top10(x):
        k = max(1, int(math.ceil(len(x) * 0.10)))
        return float(np.mean(np.sort(x)[-k:]))
    def agg_trim10(x):
        n = len(x); k = int(np.floor(n * 0.10))
        if n - 2*k <= 0: return float(np.median(x))
        xs = np.sort(x)[k:n-k]
        return float(np.mean(xs))
    def agg_wtop20p(x):
        k = max(1, int(math.ceil(len(x) * 0.20)))
        top = np.sort(x)[-k:]
        w = np.linspace(1.0, 2.0, num=top.size)
        w = w / w.sum()
        return float((top * w).sum())

    agg_funcs = {
        "median": agg_median, "perc90": agg_perc90, "perc95": agg_perc95,
        "top10": agg_top10, "trim10": agg_trim10, "wtop20p": agg_wtop20p
    }

    labels_vec = np.array([labels_by_video[v] for v in vids], dtype=np.int64)

    best_auc, best_scores = -1.0, None
    # Iterate over ensemble weights and flip flag
    for w1, w2, w3 in WSET:
        # frame-level combined probabilities concatenated (for possible flip test)
        concat_scores = []
        concat_labels = []
        per_video_comb = {}
        for v in vids:
            p1 = np.array(per_video_arrays[v]["p1"], dtype=np.float32)
            p2 = np.array(per_video_arrays[v]["p2"], dtype=np.float32)
            p3 = np.array(per_video_arrays[v]["p3"], dtype=np.float32)
            pc = w1*p1 + w2*p2 + w3*p3
            per_video_comb[v] = pc
            concat_scores.append(pc)
            concat_labels.append(np.full(pc.shape, labels_by_video[v], dtype=np.int64))
        concat_scores = np.concatenate(concat_scores)
        concat_labels = np.concatenate(concat_labels)

        for flip in [False, True]:
            # apply flip if chosen
            if flip:
                pv = {v: 1.0 - per_video_comb[v] for v in vids}
            else:
                pv = per_video_comb

            for tau in TAU_LIST:
                for sharp_top in SHARP_TOP_LIST:
                    for conf_min in CONF_MIN_LIST:
                        for size_min in SIZE_MIN_LIST:
                            for agg_name in AGGREGATORS:
                                fn = agg_funcs[agg_name]
                                vscores = []
                                for v in vids:
                                    arr = np.array(pv[v], dtype=np.float32)
                                    conf = np.array(per_video_arrays[v]["conf"], dtype=np.float32)
                                    area = np.array(per_video_arrays[v]["area"], dtype=np.float32)
                                    sharp = np.array(per_video_arrays[v]["sharp"], dtype=np.float32)

                                    # apply filters: confidence, size, tau, then sharpness top-k
                                    m = np.ones_like(arr, dtype=bool)
                                    if conf_min > 0.0:
                                        m &= (conf >= conf_min)
                                    if size_min > 0.0:
                                        m &= (area >= size_min)
                                    if tau > 0.0:
                                        m &= (np.abs(arr - 0.5) >= tau)
                                    arr_f = arr[m]
                                    sharp_f = sharp[m]
                                    if arr_f.size == 0:
                                        arr_f = arr  # fallback: keep all
                                        sharp_f = sharp

                                    if sharp_top < 1.0 and arr_f.size > 1:
                                        k = max(1, int(math.ceil(arr_f.size * sharp_top)))
                                        idx = np.argsort(sharp_f)[-k:]
                                        arr_f = arr_f[idx]

                                    vscores.append(fn(arr_f))

                                vscores = np.array(vscores, dtype=np.float32)
                                try:
                                    auc = metrics.roc_auc_score(labels_vec, vscores)
                                except ValueError:
                                    auc = 0.5
                                if auc > best_auc:
                                    best_auc, best_scores, best_labels = auc, vscores, labels_vec

    # Final metrics
    try:
        auc_v = metrics.roc_auc_score(best_labels, best_scores)
    except ValueError:
        auc_v = 0.5
    eer_v = compute_eer(best_labels, best_scores)
    try:
        ap_v = metrics.average_precision_score(best_labels, best_scores)
    except ValueError:
        ap_v = float("nan")

    print(f"AUC={auc_v:.4f} | EER={eer_v:.4f} | AP={ap_v:.4f}")


CORE model loaded
AUC=0.7389 | EER=0.3725 | AP=0.7191


In [None]:
# === CORE (Xception) — Large results table (auto-fix missing videos) ===
# Inputs needed in memory:
#   samples : list[(frame_path, label_int, video_name)]
#   records : list[{'video':..., 'label':..., 'p1':..., 'p2':..., 'p3':...}]   (can be partial)
# The cell rescoring ONLY videos missing from `records`, then assembles a full 102-row table.

import os, math, numpy as np, pandas as pd

# --- safety ---
if "samples" not in globals() or not samples:
    raise SystemExit("Missing 'samples'. Run the CORE scoring cell first.")
if "records" not in globals():
    records = []

DATASET_NAME  = "balanced_frames_FF++"
DETECTOR_NAME = "CORE(Xception)"

# --- master video list (forces one row per video) ---
vid_pairs = {(str(v), int(y)) for _, y, v in samples}
df_all = pd.DataFrame(sorted(list(vid_pairs)), columns=["video_name","true_label"])

# --- use whatever we already have in `records` ---
df = pd.DataFrame(records) if records else pd.DataFrame(columns=["video","label","p1","p2","p3"])
if not df.empty:
    df = df.rename(columns={"video":"video_name","label":"true_label"})
    df["video_name"] = df["video_name"].astype(str)
    df["true_label"] = pd.to_numeric(df["true_label"], errors="coerce").fillna(0).astype(int).clip(0,1)

have_vids = set(df["video_name"].unique()) if not df.empty else set()
need_vids = sorted(set(df_all["video_name"]) - have_vids)

# --- minimal rescoring for videos missing from `records` (robust to model outputs) ---
if need_vids:
    import torch, torch.nn as nn, torch.nn.functional as F
    from PIL import Image
    from torchvision import transforms
    import timm

    def _safe_open_rgb(p):
        try:
            return Image.open(p).convert("RGB")
        except Exception:
            return Image.new("RGB", (299, 299), (0,0,0))

    # use existing `core` if present; otherwise define a minimal one + load weights
    if "core" not in globals():
        DRIVE_ROOT = "/content/drive/My Drive"
        if not os.path.exists(DRIVE_ROOT):
            DRIVE_ROOT = "/content/drive/MyDrive"
        WEIGHTS_PATH = f"{DRIVE_ROOT}/DeepfakeBench_weights/core_best.pth"

        class CoreXception(nn.Module):
            def __init__(self, num_classes=2):
                super().__init__()
                self.model = timm.create_model("xception", pretrained=False, num_classes=num_classes)
            def forward(self, x):
                feats = self.model.forward_features(x)
                logits = self.model.forward_head(feats, pre_logits=False)
                return {"prob": torch.softmax(logits, dim=1)[:,1]}

        core = CoreXception().eval().to("cuda" if torch.cuda.is_available() else "cpu")

        def _load_core_weights_strong(model: nn.Module, path: str):
            ckpt = torch.load(path, map_location="cpu")
            inc = ckpt["state_dict"] if (isinstance(ckpt, dict) and "state_dict" in ckpt) else ckpt
            tgt = model.model.state_dict()
            def cands(k):
                ks=[k]
                for pref in ["module.","backbone.","model."]:
                    if k.startswith(pref): ks.append(k[len(pref):])
                if k.startswith("fc."): ks.append("classifier."+k[3:])
                return list(dict.fromkeys(ks))
            new={}
            for k,v in inc.items():
                for k2 in cands(k):
                    if k2 in tgt:
                        tv=tgt[k2]
                        if v.shape==tv.shape:
                            new[k2]=v; break
                        if v.ndim==2 and tv.ndim==4 and tv.shape[2]==1 and tv.shape[3]==1 \
                           and v.shape[0]==tv.shape[0] and v.shape[1]==tv.shape[1]:
                            new[k2]=v.unsqueeze(-1).unsqueeze(-1); break
            model.model.load_state_dict(new, strict=False)

        _load_core_weights_strong(core, WEIGHTS_PATH)

    DEVICE = next(core.parameters()).device
    IMG_SIZE = 299
    tfm = transforms.Compose([
        transforms.Resize(340),
        transforms.CenterCrop(IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),
    ])

    # --- robust extractor for prob_fake from *any* model output shape/type ---
    def _forward_prob(m, X: torch.Tensor) -> torch.Tensor:
        out = m(X)
        # dict case
        if isinstance(out, dict):
            if "prob" in out and isinstance(out["prob"], torch.Tensor):
                y = out["prob"]
            elif "logits" in out and isinstance(out["logits"], torch.Tensor):
                y = torch.softmax(out["logits"], dim=1)[:,1]
            elif "cls" in out and isinstance(out["cls"], torch.Tensor):
                y = torch.softmax(out["cls"], dim=1)[:,1]
            else:
                # try first tensor value
                y = None
                for v in out.values():
                    if isinstance(v, torch.Tensor):
                        y = v; break
                if y is None:
                    raise TypeError("Model dict output without tensor.")
                if y.ndim>1 and y.size(-1)==2: y = torch.softmax(y, dim=1)[:,1]
        # tuple/list case
        elif isinstance(out, (list, tuple)) and len(out)>0:
            y = out[0]
            if not isinstance(y, torch.Tensor):
                raise TypeError("Model list/tuple output without tensor.")
            if y.ndim>1 and y.size(-1)==2: y = torch.softmax(y, dim=1)[:,1]
        # tensor case
        elif isinstance(out, torch.Tensor):
            y = out
            if y.ndim>1 and y.size(-1)==2: y = torch.softmax(y, dim=1)[:,1]
        else:
            raise TypeError(f"Unsupported model output type: {type(out)}")
        return y

    # collect frames to score
    to_score = [(p, y, v) for (p, y, v) in samples if v in need_vids]

    # fast batching
    B = 48
    batch_imgs, batch_meta, probs_out = [], [], []
    core.eval()
    with torch.no_grad():
        for (p, y, v) in to_score:
            batch_imgs.append(tfm(_safe_open_rgb(p)))
            batch_meta.append((v, int(y)))
            if len(batch_imgs) == B:
                X = torch.stack(batch_imgs, 0).to(DEVICE)
                pr = _forward_prob(core, X).detach().float().cpu().numpy()
                probs_out.extend([(batch_meta[i][0], batch_meta[i][1], float(pr[i])) for i in range(len(pr))])
                batch_imgs, batch_meta = [], []
        if batch_imgs:
            X = torch.stack(batch_imgs, 0).to(DEVICE)
            pr = _forward_prob(core, X).detach().float().cpu().numpy()
            probs_out.extend([(batch_meta[i][0], batch_meta[i][1], float(pr[i])) for i in range(len(pr))])

    # convert rescored frames to records-like rows (p1=p2=p3=prob)
    df_missing = pd.DataFrame(probs_out, columns=["video_name","true_label","prob"])
    df_missing["p1"] = df_missing["prob"]; df_missing["p2"] = df_missing["prob"]; df_missing["p3"] = df_missing["prob"]
    df_missing = df_missing.drop(columns=["prob"])

    # append to df
    df = pd.concat([df[["video_name","true_label","p1","p2","p3"]], df_missing], ignore_index=True)

# --- combine branches to a single prob (ensemble) ---
w1, w2, w3 = 0.7, 0.2, 0.1
df["prob_fake"] = (w1*pd.to_numeric(df["p1"]) + w2*pd.to_numeric(df["p2"]) + w3*pd.to_numeric(df["p3"])).astype(float)

# optional auto-orientation (stabilize)
from sklearn.metrics import roc_curve, roc_auc_score
try:
    ytmp = df["true_label"].to_numpy(int)
    s = df["prob_fake"].to_numpy(float)
    if roc_auc_score(ytmp, 1.0 - s) > roc_auc_score(ytmp, s):
        df["prob_fake"] = 1.0 - df["prob_fake"]
except Exception:
    pass

# --- thresholds ---
y_frame = df["true_label"].to_numpy(int)
s_frame = df["prob_fake"].to_numpy(float)
if len(np.unique(y_frame)) >= 2:
    fpr, tpr, thr = roc_curve(y_frame, s_frame)
    t_frame = float(thr[np.nanargmax(tpr - fpr)])
else:
    t_frame = 0.5

avg_df = df.groupby(["video_name","true_label"], sort=False)["prob_fake"].mean().rename("avg_prob_fake").reset_index()
y_avg = avg_df["true_label"].to_numpy(int)
s_avg = avg_df["avg_prob_fake"].to_numpy(float)
if len(np.unique(y_avg)) >= 2:
    fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
    uniq = np.unique(s_avg); mids = (uniq[:-1]+uniq[1:])/2 if len(uniq)>1 else np.array([])
    cand = np.unique(np.concatenate([thr2, mids, [0.0,1.0]]))
    accs = [(((s_avg>=t).astype(int)==y_avg).mean()) for t in cand]
    t_avg = float(cand[int(np.argmax(accs))])
else:
    t_avg = 0.5

# --- per-video stats from all scored frames ---
df["frame_pred_int"] = (df["prob_fake"] >= t_frame).astype(int)

per_video = (
    df.groupby(["video_name","true_label"], sort=False)
      .apply(lambda g: pd.Series({
          "n_frames": int(len(g)),
          "n_correct_frames": int((g["frame_pred_int"] == g["true_label"]).sum()),
          "avg_prob_fake": float(g["prob_fake"].mean()),
          "std_prob_fake": float(g["prob_fake"].std(ddof=0)) if len(g)>1 else 0.0,
          "fake_votes": int(g["frame_pred_int"].sum()),
      }))
      .reset_index()
)
per_video["n_wrong_frames"] = per_video["n_frames"] - per_video["n_correct_frames"]
per_video["frame_accuracy"] = per_video["n_correct_frames"] / per_video["n_frames"]

per_video["video_pred_by_avg"]    = (per_video["avg_prob_fake"] >= t_avg).astype(int)
per_video["video_correct_by_avg"] = (per_video["video_pred_by_avg"] == per_video["true_label"]).astype(int)
per_video["video_pred_by_majority"]    = (per_video["fake_votes"] >= (per_video["n_frames"] - per_video["fake_votes"])).astype(int)
per_video["video_correct_by_majority"] = (per_video["video_pred_by_majority"] == per_video["true_label"]).astype(int)

# --- join onto master list (ensures 102 rows) ---
table = (df_all.merge(per_video, on=["video_name","true_label"], how="left")
              .fillna({"n_frames":0,"n_correct_frames":0,"n_wrong_frames":0,"frame_accuracy":0.0,
                       "avg_prob_fake":0.0,"std_prob_fake":0.0,
                       "video_pred_by_avg":0,"video_correct_by_avg":0,
                       "video_pred_by_majority":0,"video_correct_by_majority":0})
              .assign(
                  dataset=DATASET_NAME,
                  detector=DETECTOR_NAME,
                  true_label=lambda d: d["true_label"].map({0:"real",1:"fake"}),
                  video_pred_by_avg=lambda d: d["video_pred_by_avg"].map({0:"real",1:"fake"}),
                  video_pred_by_majority=lambda d: d["video_pred_by_majority"].map({0:"real",1:"fake"}),
              )[[  # exact order
                  "dataset","detector","video_name","true_label",
                  "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
                  "avg_prob_fake","std_prob_fake",
                  "video_pred_by_avg","video_correct_by_avg",
                  "video_pred_by_majority","video_correct_by_majority"
              ]]
              .sort_values(["true_label","video_name"], kind="stable")
              .reset_index(drop=True)
)

# --- print cleanly (no wrapping) ---
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.float_format", lambda x: f"{x:.6f}")

print(table.to_string(index=False))
print(f"[videos]={len(table)} | t_frame={t_frame:.3f} | t_avg={t_avg:.3f}")


             dataset       detector                            video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
balanced_frames_FF++ CORE(Xception)                               000_003       fake 20.000000         20.000000        0.000000        1.000000       0.514341       0.000902              fake                     1                   fake                          1
balanced_frames_FF++ CORE(Xception)                               010_005       fake 20.000000         20.000000        0.000000        1.000000       0.507594       0.000722              fake                     1                   fake                          1
balanced_frames_FF++ CORE(Xception)                               011_805       fake 20.000000         20.000000        0.000000        1.000000       0.506489       0.000608              fake             

In [None]:
# Save the CORE large table to Drive: /content/drive/My Drive/CORE results FF++
import os

# Pick the DataFrame produced above
df_out = table if 'table' in globals() else table_core_ffpp

# Resolve Drive root
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

# Make folder and save CSV
out_dir = os.path.join(DRIVE_ROOT, "CORE results FF++")
os.makedirs(out_dir, exist_ok=True)
csv_path = os.path.join(out_dir, "core_large_table.csv")

df_out.to_csv(csv_path, index=False, float_format="%.6f")
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/CORE results FF++/core_large_table.csv


In [None]:
# === CORE (Xception) — Small table ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted (yes/no)

import os
import pandas as pd

# Pick the large table produced earlier
if 'table' in globals():
    src = table.copy()
elif 'table_core_ffpp' in globals():
    src = table_core_ffpp.copy()
else:
    raise SystemExit("No large table found. Run the large-table cell first.")

# Use AVG rule if available; otherwise fall back to MAJORITY rule
if 'video_correct_by_avg' in src.columns:
    corr_col = 'video_correct_by_avg'
elif 'video_correct_by_majority' in src.columns:
    corr_col = 'video_correct_by_majority'
else:
    raise SystemExit("No correctness columns found in source table.")

# Normalize labels to 'real'/'fake' strings if needed
if pd.api.types.is_numeric_dtype(src['true_label']):
    src['true_label'] = src['true_label'].map({0:'real', 1:'fake'}).fillna(src['true_label'].astype(str))

small = (
    src.assign(
        correctly_predicted=src[corr_col].astype(int).map({1:'yes', 0:'no'})
    )[[
        'dataset','detector','video_name','true_label','correctly_predicted'
    ]].sort_values(['true_label','video_name'], kind='stable').reset_index(drop=True)
)

# Print all rows without column breaks
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)

print(small.to_string(index=False))

# Optional: save to Drive
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"
out_dir = os.path.join(DRIVE_ROOT, "CORE results FF++")
os.makedirs(out_dir, exist_ok=True)
small.to_csv(os.path.join(out_dir, "core_small_table.csv"), index=False)


             dataset       detector                            video_name true_label correctly_predicted
balanced_frames_FF++ CORE(Xception)                               000_003       fake                 yes
balanced_frames_FF++ CORE(Xception)                               010_005       fake                 yes
balanced_frames_FF++ CORE(Xception)                               011_805       fake                 yes
balanced_frames_FF++ CORE(Xception)                               012_026       fake                 yes
balanced_frames_FF++ CORE(Xception)                               013_883       fake                 yes
balanced_frames_FF++ CORE(Xception)                               014_790       fake                 yes
balanced_frames_FF++ CORE(Xception)                               015_919       fake                 yes
balanced_frames_FF++ CORE(Xception)                               016_209       fake                 yes
balanced_frames_FF++ CORE(Xception)                    

In [None]:
# Save the small table to the same folder: /content/drive/*/CORE results FF++
import os

# Use the 'small' DataFrame created in the previous cell
if 'small' not in globals():
    raise SystemExit("No 'small' table found. Run the small-table cell first.")

DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

out_dir = os.path.join(DRIVE_ROOT, "CORE results FF++")
os.makedirs(out_dir, exist_ok=True)

csv_path = os.path.join(out_dir, "core_small_table.csv")
small.to_csv(csv_path, index=False)
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/CORE results FF++/core_small_table.csv
