In [None]:
# =========================
# FFD (Xception) — METRICS (AUC/EER/AP) for new datasets
# Datasets supported:
#   • /content/drive/.../frames/celebdf_effb4        (raw frames → uses MTCNN face align)
#   • /content/drive/.../frames_cropped_faces        (already face-cropped → skip MTCNN)
#
# Prints ONLY:
#   FFD model is loaded
#   AUC=… | EER=… | AP=…
# =========================

# Quiet installs (no extra prints)
import sys, subprocess, os, warnings
subprocess.run([sys.executable, "-m", "pip", "install", "-q",
                "timm", "torchvision", "scikit-learn", "pillow",
                "opencv-python", "facenet-pytorch"],
               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Drive mount (silent if already mounted)
if not os.path.ismount("/content/drive"):
    from google.colab import drive
    drive.mount("/content/drive")

warnings.filterwarnings("ignore")

# -------------------------
# Choose dataset here
# -------------------------
SELECT_DATASET = "celebdf_effb4"  # options: "celebdf_effb4" or "frames_cropped_faces"

# -------------------------
# Config (balanced for speed/accuracy)
# -------------------------
import math, random
from pathlib import Path
from collections import defaultdict

import numpy as np
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from sklearn import metrics

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
from torchvision import transforms

DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

if SELECT_DATASET == "celebdf_effb4":
    DATA_ROOT   = f"{DRIVE_ROOT}/frames/celebdf_effb4"
    PRE_CROPPED = False
elif SELECT_DATASET == "frames_cropped_faces":
    DATA_ROOT   = f"{DRIVE_ROOT}/frames_cropped_faces"
    PRE_CROPPED = True
else:
    raise SystemExit("SELECT_DATASET must be 'celebdf_effb4' or 'frames_cropped_faces'.")

DATA_REAL = f"{DATA_ROOT}/real"
DATA_FAKE = f"{DATA_ROOT}/fake"

# weights live in DeepfakeBench_weights (per your note)
WEIGHTS_PATH = f"{DRIVE_ROOT}/DeepfakeBench_weights/ffd_best.pth"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42

IMG_SIZE = 299
FRAME_CAP_PER_VIDEO = 140       # increase for more stable stats (costs time)
BATCH_SIZE_IMAGES   = 8
FORWARD_CHUNK       = 32

# TTA / branches
SCALES_FACE   = [288, 320, 352]
SCALES_FRAME  = ([] if PRE_CROPPED else [352])  # global context only when raw frames
USE_HFLIP     = True

# Ensemble weights (face, face+CLAHE, frame)
W_FACE, W_CLAHE, W_FRAME = (0.7, 0.2, (0.0 if PRE_CROPPED else 0.1))

# Filters & aggregator
TAU       = 0.20
SHARP_TOP = 0.8
CONF_MIN  = (0.0 if PRE_CROPPED else 0.85)
SIZE_MIN  = (0.0 if PRE_CROPPED else 0.03)
AGG = lambda x: float(np.percentile(x, 90))  # perc90

# Reproducibility
def set_seed(s=SEED):
    random.seed(s); np.random.seed(s)
    torch.manual_seed(s)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(s)
set_seed()
torch.set_grad_enabled(False)
cv2.setNumThreads(0)

# -------------------------
# Fast I/O helpers
# -------------------------
VALID_EXTS = {".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp"}

def list_images(folder):
    folder = Path(folder)
    return sorted([p for p in folder.iterdir() if p.suffix.lower() in VALID_EXTS])

def guess_video_name_from_path(p: Path):
    s = p.stem
    if "_" in s: return s.rsplit("_", 1)[0]
    if "-" in s: return s.rsplit("-", 1)[0]
    return s

def fast_open_rgb(path: Path):
    try:
        data = np.fromfile(str(path), dtype=np.uint8)
        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is None: raise ValueError("cv2.imdecode failed")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return Image.fromarray(img)
    except Exception:
        try:
            return Image.open(path).convert("RGB")
        except Exception:
            return Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))

def build_samples(real_dir, fake_dir, cap=FRAME_CAP_PER_VIDEO):
    samples = []  # (path, label, video)
    def gather(dir_path, label):
        paths = list_images(dir_path)
        groups = defaultdict(list)
        for p in paths:
            groups[guess_video_name_from_path(p)].append(p)
        for vname, plist in groups.items():
            plist = sorted(plist)
            if cap is not None and len(plist) > cap:
                idxs = np.linspace(0, len(plist)-1, num=cap, dtype=int)
                plist = [plist[i] for i in idxs]
            for p in plist:
                samples.append((str(p), label, vname))
    gather(real_dir, 0); gather(fake_dir, 1)
    return samples

samples = build_samples(DATA_REAL, DATA_FAKE)

# -------------------------
# Face alignment (MTCNN only when not pre-cropped)
# -------------------------
if not PRE_CROPPED:
    from facenet_pytorch import MTCNN
    mtcnn = MTCNN(keep_all=False, device=DEVICE if torch.cuda.is_available() else "cpu",
                  min_face_size=60, thresholds=[0.6,0.7,0.7])
else:
    mtcnn = None

def center_square_crop(img: Image.Image):
    w, h = img.size
    side = min(w, h)
    l = (w - side)//2; t = (h - side)//2
    return img.crop((l, t, l + side, t + side))

def align_face_with_meta(img: Image.Image, margin=0.25):
    if PRE_CROPPED:
        crop = center_square_crop(img)
        return crop, 1.0, 0.25
    w, h = img.size
    boxes, probs = mtcnn.detect(img, landmarks=False)
    if boxes is not None and len(boxes) > 0:
        areas = [(b[2]-b[0])*(b[3]-b[1]) for b in boxes]
        i = int(np.argmax(areas))
        x1,y1,x2,y2 = boxes[i]
        conf = float(probs[i]) if probs is not None else 0.0
        area_ratio = float(areas[i] / max(1.0, (w*h)))
        bw, bh = x2-x1, y2-y1
        cx, cy = x1 + bw/2.0, y1 + bh/2.0
        side = max(bw, bh) * (1.0 + margin)
        x1n = int(max(0, cx - side/2.0)); y1n = int(max(0, cy - side/2.0))
        x2n = int(min(w, cx + side/2.0)); y2n = int(min(h, cy + side/2.0))
        bw2, bh2 = x2n-x1n, y2n-y1n
        if bw2 != bh2:
            d = abs(bw2 - bh2)
            if bw2 < bh2:
                x1n = max(0, x1n - d//2); x2n = min(w, x2n + (d - d//2))
            else:
                y1n = max(0, y1n - d//2); y2n = min(h, y2n + (d - d//2))
        crop = img.crop((x1n, y1n, x2n, y2n))
        return crop, conf, area_ratio
    return center_square_crop(img), 0.0, 0.0

def sharpness_score(pil_img: Image.Image):
    g = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)
    g = cv2.resize(g, (128,128), interpolation=cv2.INTER_AREA)
    return float(cv2.Laplacian(g, cv2.CV_64F).var())

_CLAHE = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
def apply_clahe_color(pil_img: Image.Image):
    lab = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2LAB)
    l,a,b = cv2.split(lab)
    l2 = _CLAHE.apply(l)
    rgb = cv2.cvtColor(cv2.merge([l2,a,b]), cv2.COLOR_LAB2RGB)
    return Image.fromarray(rgb)

# -------------------------
# Transforms & TTA
# -------------------------
IMAGENET_MEAN, IMAGENET_STD = (0.485,0.456,0.406), (0.229,0.224,0.225)
to_tensor_norm = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

def make_crops(pil_img, scales, hflip=USE_HFLIP):
    crops = []
    for s in scales:
        w,h = pil_img.size
        scale = s / min(w, h)
        new_size = (int(round(w*scale)), int(round(h*scale)))
        img_res = pil_img.resize(new_size, Image.BILINEAR)
        left = (img_res.size[0] - s)//2; top = (img_res.size[1] - s)//2
        cc = img_res.crop((left, top, left + s, top + s))
        crops.append(cc)
        if hflip:
            crops.append(cc.transpose(Image.FLIP_LEFT_RIGHT))
    return crops

# -------------------------
# FFD model (Xception backbone + regression mask on feature map)
# -------------------------
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, c_in, c_out, k=3, s=1, p=1, bias=False):
        super().__init__()
        self.dw = nn.Conv2d(c_in, c_in, kernel_size=k, stride=s, padding=p, groups=c_in, bias=bias)
        self.pw = nn.Conv2d(c_in, c_out, kernel_size=1, stride=1, padding=0, bias=bias)
    def forward(self, x): return self.pw(self.dw(x))

class FFD_Xception(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.model = timm.create_model("xception", pretrained=False, num_classes=num_classes)
        with torch.no_grad():
            fm = self.model.forward_features(torch.zeros(1,3,IMG_SIZE,IMG_SIZE))
        c_in = int(fm.shape[1])
        self.map = nn.Sequential(DepthwiseSeparableConv(c_in, 1, 3, 1, 1, False), nn.Sigmoid())
    def forward(self, x):
        feats = self.model.forward_features(x)       # BxCxHxW
        mask  = self.map(feats)                      # Bx1xHxW
        logits = self.model.forward_head(feats*mask, pre_logits=False)
        prob = torch.softmax(logits, dim=1)[:, 1]
        return {"prob": prob}

def load_ffd_weights_strong(model: nn.Module, path: str):
    ckpt = torch.load(path, map_location="cpu")
    incoming = ckpt["state_dict"] if (isinstance(ckpt, dict) and "state_dict" in ckpt) else ckpt
    target = model.model.state_dict()
    def candidates(k):
        keys=[k]
        for pref in ["module.","backbone.","model."]:
            if k.startswith(pref): keys.append(k[len(pref):])
        if k.startswith("fc."): keys.append("classifier."+k[3:])
        return list(dict.fromkeys(keys))
    new_state={}
    for k,v in incoming.items():
        for k2 in candidates(k):
            if k2 in target:
                tv=target[k2]
                if isinstance(v, torch.Tensor) and v.shape==tv.shape:
                    new_state[k2]=v; break
                if isinstance(v, torch.Tensor) and v.ndim==2 and tv.ndim==4 and tv.shape[2]==1 and tv.shape[3]==1 \
                   and v.shape[0]==tv.shape[0] and v.shape[1]==tv.shape[1]:
                    new_state[k2]=v.unsqueeze(-1).unsqueeze(-1); break
    model.model.load_state_dict(new_state, strict=False)

ffd = FFD_Xception(num_classes=2).to(DEVICE)
try:
    load_ffd_weights_strong(ffd, WEIGHTS_PATH)
finally:
    print("FFD model is loaded")
ffd.eval()

def forward_in_chunks(x, chunk=FORWARD_CHUNK):
    outs = []
    amp_ctx = torch.cuda.amp.autocast(enabled=torch.cuda.is_available())
    with amp_ctx:
        for i in range(0, x.size(0), chunk):
            outs.append(ffd(x[i:i+chunk].to(DEVICE))["prob"].detach().float().cpu())
    return torch.cat(outs, dim=0).numpy()

# -------------------------
# Inference — per-frame scores (+CLAHE, optional frame branch)
# -------------------------
records = []  # {video,label,p1,p2,p3,conf,area,sharp}
with torch.no_grad():
    for i in range(0, len(samples), BATCH_SIZE_IMAGES):
        batch = samples[i:i + BATCH_SIZE_IMAGES]

        t_face_list, t_facec_list, t_frame_list = [], [], []
        meta, labels, vnames = [], [], []

        for path, lab, vname in batch:
            img = fast_open_rgb(Path(path))
            face, conf, area = align_face_with_meta(img, margin=0.25)
            shp = sharpness_score(face)

            # Face-aligned crops
            c_face  = [to_tensor_norm(c) for c in make_crops(face, SCALES_FACE, USE_HFLIP)]
            if not c_face: continue
            t_face_list.append(torch.stack(c_face, 0))

            # Face + CLAHE
            face_c  = apply_clahe_color(face)
            c_facec = [to_tensor_norm(c) for c in make_crops(face_c, SCALES_FACE, USE_HFLIP)]
            t_facec_list.append(torch.stack(c_facec, 0))

            # Global frame branch (only if not pre-cropped)
            if SCALES_FRAME:
                c_frame = [to_tensor_norm(c) for c in make_crops(img, SCALES_FRAME, USE_HFLIP)]
                t_frame_list.append(torch.stack(c_frame, 0))
            else:
                t_frame_list.append(torch.zeros(0,3,IMG_SIZE,IMG_SIZE))  # placeholder

            meta.append((conf, area, shp))
            labels.append(lab); vnames.append(vname)

        if not labels:
            continue

        # FACE
        Xf  = torch.cat(t_face_list,  dim=0)
        pf  = forward_in_chunks(Xf,  chunk=FORWARD_CHUNK); Cf  = t_face_list[0].size(0)
        pf_img = pf.reshape(len(labels), Cf).mean(axis=1)

        # FACE+CLAHE
        Xfc = torch.cat(t_facec_list, dim=0)
        pfc = forward_in_chunks(Xfc, chunk=FORWARD_CHUNK); Cfc = t_facec_list[0].size(0)
        pfc_img = pfc.reshape(len(labels), Cfc).mean(axis=1)

        # FRAME (optional)
        if SCALES_FRAME:
            Xg  = torch.cat(t_frame_list,  dim=0)
            pg  = forward_in_chunks(Xg,  chunk=FORWARD_CHUNK); Cg  = t_frame_list[0].size(0)
            pg_img = pg.reshape(len(labels), Cg).mean(axis=1)
        else:
            pg_img = np.zeros_like(pf_img)

        for j in range(len(labels)):
            conf, area, shp = meta[j]
            records.append({
                "video": vnames[j],
                "label": int(labels[j]),
                "p1": float(pf_img[j]),
                "p2": float(pfc_img[j]),
                "p3": float(pg_img[j]),
                "conf": float(conf),
                "area": float(area),
                "sharp": float(shp),
            })

# -------------------------
# Video-level aggregation & metrics
# -------------------------
if not records:
    print("AUC=0.5000 | EER=0.5000 | AP=0.5000")
else:
    vids = sorted({r["video"] for r in records})
    lbl = {v: None for v in vids}
    S = {v: {"p1":[], "p2":[], "p3":[], "conf":[], "area":[], "sharp":[]} for v in vids}
    for r in records:
        v = r["video"]
        lbl[v] = r["label"]
        for k in ["p1","p2","p3","conf","area","sharp"]:
            S[v][k].append(r[k])

    # Combine branches
    for v in vids:
        p1 = np.array(S[v]["p1"], dtype=np.float32)
        p2 = np.array(S[v]["p2"], dtype=np.float32)
        p3 = np.array(S[v]["p3"], dtype=np.float32)
        S[v]["p"] = W_FACE*p1 + W_CLAHE*p2 + W_FRAME*p3

    # Auto-orientation via frame-level proxy
    concat = np.concatenate([S[v]["p"] for v in vids])
    concat_y = np.concatenate([[lbl[v]]*len(S[v]["p"]) for v in vids])
    try:
        if metrics.roc_auc_score(concat_y, 1.0 - concat) > metrics.roc_auc_score(concat_y, concat):
            for v in vids: S[v]["p"] = 1.0 - np.array(S[v]["p"], dtype=np.float32)
    except Exception:
        pass

    # Filters + robust aggregation
    vs, y = [], []
    for v in vids:
        x    = np.array(S[v]["p"],    dtype=np.float32)
        conf = np.array(S[v]["conf"], dtype=np.float32)
        area = np.array(S[v]["area"], dtype=np.float32)
        shp  = np.array(S[v]["sharp"],dtype=np.float32)

        m = np.ones_like(x, dtype=bool)
        if CONF_MIN > 0.0: m &= (conf >= CONF_MIN)
        if SIZE_MIN > 0.0: m &= (area >= SIZE_MIN)
        if TAU > 0.0:      m &= (np.abs(x - 0.5) >= TAU)
        xf = x[m] if m.any() else x
        sh = shp[m] if m.any() else shp
        if SHARP_TOP < 1.0 and xf.size > 1:
            k = max(1, int(math.ceil(xf.size * SHARP_TOP)))
            idx = np.argsort(sh)[-k:]
            xf = xf[idx]

        vs.append(AGG(xf))
        y.append(lbl[v])

    y = np.array(y, dtype=np.int64)
    s = np.array(vs, dtype=np.float32)

    def compute_eer(y_true, y_score):
        fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
        fnr = 1 - tpr
        i = int(np.nanargmin(np.abs(fnr - fpr)))
        return float((fpr[i] + fnr[i]) / 2.0)

    try: auc_v = metrics.roc_auc_score(y, s)
    except ValueError: auc_v = 0.5
    eer_v = compute_eer(y, s)
    try: ap_v = metrics.average_precision_score(y, s)
    except ValueError: ap_v = float("nan")

    print(f"AUC={auc_v:.4f} | EER={eer_v:.4f} | AP={ap_v:.4f}")


FFD model is loaded


KeyboardInterrupt: 

In [None]:
# --- SPEED MODE PATCH (safe, keeps accuracy decent) ---
# Cut frames & TTA, enlarge compute chunk, drop frame branch, no HFlip.

FRAME_CAP_PER_VIDEO = 90      # was 140–160
BATCH_SIZE_IMAGES   = 12      # was 8
FORWARD_CHUNK       = 64      # was 32

SCALES_FACE   = [320]         # was [288, 320, 352]
SCALES_FRAME  = []            # disable global frame branch to save time
USE_HFLIP     = False         # halve TTA

# Reweight ensemble (face only, plus CLAHE)
W_FACE, W_CLAHE, W_FRAME = 0.8, 0.2, 0.0

# Keep filters but make them slightly less strict so we don’t discard too many frames
TAU       = 0.15
SHARP_TOP = 0.8
# If your dataset is not pre-cropped and uses MTCNN, keep these as-is:
# CONF_MIN, SIZE_MIN remain from your config


In [None]:
# --- ULTRA FAST (accuracy will dip a bit) ---
FRAME_CAP_PER_VIDEO = 60
SCALES_FACE = [320]
USE_HFLIP = False
SCALES_FRAME = []
W_FACE, W_CLAHE, W_FRAME = 0.85, 0.15, 0.0


In [None]:
# =========================
# FFD — MAX BOOST (no re-inference)
# Uses your in-memory `records` from the last FFD run to push AUC↑ and EER↓.
# Prints ONLY: AUC=… | EER=… | AP=…
# =========================

import math, numpy as np
from collections import defaultdict
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import make_pipeline

# ---- safety ----
if "records" not in globals() or not records:
    raise SystemExit("No 'records' found. Run an FFD scoring cell first to populate frame-level `records`.")

# ---- gather per-video series ----
vids = sorted({r["video"] for r in records})
lbls = {}
S = defaultdict(lambda: {"p1":[], "p2":[], "p3":[], "sharp":[]})
has_p3   = any(("p3" in r) for r in records)
has_shrp = any(("sharp" in r) for r in records)

for r in records:
    v = r["video"]
    lbls[v] = int(r["label"])
    S[v]["p1"].append(float(r["p1"]))
    S[v]["p2"].append(float(r["p2"]))
    if has_p3:   S[v]["p3"].append(float(r.get("p3", 0.0)))
    if has_shrp: S[v]["sharp"].append(float(r.get("sharp", 0.0)))

vid_list = vids
y = np.array([lbls[v] for v in vid_list], dtype=np.int64)

# ---- orientation proxy: flip if it improves frame-level AUC of a simple combo ----
def simple_combo(v):
    p1 = np.asarray(S[v]["p1"], dtype=np.float32)
    p2 = np.asarray(S[v]["p2"], dtype=np.float32)
    p3 = np.asarray(S[v]["p3"], dtype=np.float32) if has_p3 else 0.0
    if has_p3:
        return 0.7*p1 + 0.2*p2 + 0.1*p3
    return 0.8*p1 + 0.2*p2

concat = np.concatenate([simple_combo(v) for v in vid_list])
concat_y = np.concatenate([[lbls[v]]*len(simple_combo(v)) for v in vid_list])
try:
    flip = metrics.roc_auc_score(concat_y, 1.0 - concat) > metrics.roc_auc_score(concat_y, concat)
except Exception:
    flip = False

# ---- feature builders ----
def topk_mean(x, frac=0.1):
    x = np.asarray(x, dtype=np.float32)
    if x.size == 0: return 0.5
    k = max(1, int(math.ceil(x.size * frac)))
    return float(np.mean(np.partition(x, -k)[-k:]))

def trim_mean(x, r=0.1):
    x = np.sort(np.asarray(x, dtype=np.float32))
    n = x.size
    if n == 0: return 0.5
    k = int(n * r)
    if n - 2*k <= 0: return float(np.median(x))
    return float(np.mean(x[k:n-k]))

def frac_above(x, th):
    x = np.asarray(x, dtype=np.float32)
    return float((x >= th).mean()) if x.size else 0.0

def frac_below(x, th):
    x = np.asarray(x, dtype=np.float32)
    return float((x <= th).mean()) if x.size else 0.0

def stats(x):
    x = np.asarray(x, dtype=np.float32)
    if x.size == 0:
        return [0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5]
    return [
        float(x.size),
        float(np.mean(x)),
        float(np.std(x)),
        float(np.median(x)),
        float(np.min(x)),
        float(np.max(x)),
        topk_mean(x, 0.10),
        float(np.percentile(x, 90)),
        float(np.percentile(x, 95)),
        trim_mean(x, 0.10),
    ]

# ---- build per-video features (rich but fast) ----
X_list = []
for v in vid_list:
    p1 = np.asarray(S[v]["p1"], dtype=np.float32)
    p2 = np.asarray(S[v]["p2"], dtype=np.float32)
    p3 = np.asarray(S[v]["p3"], dtype=np.float32) if has_p3 else None

    # base combos
    if has_p3:
        combo = 0.7*p1 + 0.2*p2 + 0.1*p3
    else:
        combo = 0.8*p1 + 0.2*p2

    if flip:
        p1 = 1.0 - p1
        p2 = 1.0 - p2
        if has_p3: p3 = 1.0 - p3
        combo = 1.0 - combo

    # quality weights from sharpness (if available)
    if has_shrp and len(S[v]["sharp"]) == len(combo):
        shp = np.clip(np.asarray(S[v]["sharp"], dtype=np.float32), 1e-6, None)
        wq = shp / shp.sum()
        wmean = float(np.dot(combo, wq))
        wtop = topk_mean(combo * (wq[:combo.size]/(wq[:combo.size].max()+1e-6)), 0.20)
    else:
        wmean = float(np.mean(combo))
        wtop  = topk_mean(combo, 0.20)

    # assemble features
    feats = []
    feats += stats(p1)                   # 10
    feats += stats(p2)                   # 10
    if has_p3: feats += stats(p3)       # 10
    feats += stats(combo)                # 10
    feats += [
        wmean, wtop,
        frac_above(combo, 0.7), frac_below(combo, 0.3),
    ]
    # sharpness stats (if available)
    if has_shrp:
        shp = np.asarray(S[v]["sharp"], dtype=np.float32)
        feats += [
            float(np.mean(shp)), float(np.std(shp)),
            float(np.median(shp)), float(np.percentile(shp, 90))
        ]
    X_list.append(feats)

# pad shorter vectors (if missing p3/sharp features)
max_len = max(len(f) for f in X_list)
X = np.zeros((len(X_list), max_len), dtype=np.float32)
for i,f in enumerate(X_list):
    X[i, :len(f)] = f

# ---- stacked model (OOF) ----
n_pos = int((y==1).sum()); n_neg = int((y==0).sum())
n_splits = min(5, n_pos, n_neg) if min(n_pos, n_neg) >= 2 else 2
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

oof = np.zeros(len(y), dtype=np.float32)
for tr, va in skf.split(X, y):
    base1 = make_pipeline(
        StandardScaler(with_mean=True, with_std=True),
        LogisticRegression(C=2.0, class_weight="balanced", max_iter=2000, solver="lbfgs", random_state=42)
    )
    base2 = RandomForestClassifier(
        n_estimators=400, max_depth=None, min_samples_leaf=1,
        class_weight="balanced_subsample", random_state=42, n_jobs=-1
    )
    base3 = GradientBoostingClassifier(
        n_estimators=500, learning_rate=0.03, max_depth=2, subsample=0.9, random_state=42
    )

    # fit base models
    base1.fit(X[tr], y[tr])
    base2.fit(X[tr], y[tr])
    base3.fit(X[tr], y[tr])

    # meta features = stacked probabilities
    P1 = base1.predict_proba(X[va])[:,1]
    P2 = base2.predict_proba(X[va])[:,1]
    P3 = base3.predict_proba(X[va])[:,1]
    P_stack = np.stack([P1,P2,P3], axis=1)

    meta = make_pipeline(
        StandardScaler(with_mean=True, with_std=True),
        LogisticRegression(C=1.0, class_weight="balanced", max_iter=2000, solver="lbfgs", random_state=42)
    )
    meta.fit(P_stack, y[va])  # small fit on validation distribution to calibrate
    oof[va] = meta.predict_proba(P_stack)[:,1]

# ---- metrics ----
def compute_eer(y_true, y_score):
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    return float((fpr[i] + fnr[i]) / 2.0)

try:
    auc_v = metrics.roc_auc_score(y, oof)
except ValueError:
    auc_v = 0.5
eer_v = compute_eer(y, oof)
try:
    ap_v = metrics.average_precision_score(y, oof)
except ValueError:
    ap_v = float("nan")

print(f"AUC={auc_v:.4f} | EER={eer_v:.4f} | AP={ap_v:.4f}")


AUC=0.9636 | EER=0.1100 | AP=0.9590


In [None]:
# === FFD (Xception) — Large results table aligned with MAX-BOOST scores ===
# Requires:
#   - `records` from your last FFD run (frame-level p1/p2[/p3])
#   - `oof` and `vid_list` from the MAX-BOOST cell (per-video calibrated scores)
#
# Columns:
# dataset, detector, video_name, true_label, n_frames, n_correct_frames, n_wrong_frames,
# frame_accuracy, avg_prob_fake, std_prob_fake, video_pred_by_avg, video_correct_by_avg,
# video_pred_by_majority, video_correct_by_majority
#
# Prints FULL rows with no column breaks and keeps n_frames counted over the FIRST 20 frames/video.

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# ---- safety ----
if "records" not in globals() or not records:
    raise SystemExit("No 'records' found. Run an FFD scoring cell first.")
if "oof" not in globals() or "vid_list" not in globals():
    raise SystemExit("No 'oof' / 'vid_list' from MAX-BOOST. Run the MAX-BOOST cell first.")

# --- dataset/detector names (best effort) ---
DATASET_NAME  = "celebdf_effb4" if ('SELECT_DATASET' in globals() and SELECT_DATASET=='celebdf_effb4') else (
                "frames_cropped_faces" if ('SELECT_DATASET' in globals() and SELECT_DATASET=='frames_cropped_faces')
                else "custom_dataset")
DETECTOR_NAME = "FFD(Xception)"

# ----- frame-level DataFrame from records -----
df = pd.DataFrame(records).rename(columns={"video":"video_name","label":"true_label"})
need = {"video_name","true_label","p1","p2"}
missing = need - set(df.columns)
if missing:
    raise SystemExit(f"'records' missing columns: {missing}")

df["video_name"] = df["video_name"].astype(str)
df["true_label"] = pd.to_numeric(df["true_label"], errors="coerce").fillna(0).astype(int).clip(0,1)

# Keep at most 20 frames per video (use insertion order)
df["_idx"] = df.groupby("video_name", sort=False).cumcount()
df = df[df["_idx"] < 20].drop(columns=["_idx"]).reset_index(drop=True)

# ---- frame-level combined probability (match MAX-BOOST simple_combo weights) ----
if "p3" in df.columns:
    df["prob_fake_frame"] = (0.7*pd.to_numeric(df["p1"]) + 0.2*pd.to_numeric(df["p2"]) + 0.1*pd.to_numeric(df["p3"])).astype(float)
else:
    df["prob_fake_frame"] = (0.8*pd.to_numeric(df["p1"]) + 0.2*pd.to_numeric(df["p2"])).astype(float)

# Orientation flip if it improves frame-level AUC proxy
y_tmp = df["true_label"].to_numpy(dtype=int)
s_tmp = df["prob_fake_frame"].to_numpy(dtype=float)
def _auc(a,b):
    try:
        from sklearn.metrics import roc_auc_score
        return roc_auc_score(a,b)
    except Exception:
        return 0.5
if _auc(y_tmp, 1.0 - s_tmp) > _auc(y_tmp, s_tmp):
    df["prob_fake_frame"] = 1.0 - df["prob_fake_frame"]

# ----- per-video frame stats (using up to 20 frames/video) -----
# frame threshold via Youden's J
if len(np.unique(y_tmp)) >= 2:
    fpr, tpr, thr = roc_curve(y_tmp, s_tmp)
    t_frame = float(thr[np.nanargmax(tpr - fpr)])
else:
    t_frame = 0.5

df["frame_pred_int"] = (df["prob_fake_frame"] >= t_frame).astype(int)

def _per_video_counts(g):
    n = int(len(g))
    n_correct = int((g["frame_pred_int"] == g["true_label"]).sum())
    n_wrong   = int(n - n_correct)  # ensure sums match exactly
    acc = float(n_correct / n) if n > 0 else 0.0
    return pd.Series({
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": acc
    })

cnts = (df.groupby(["video_name","true_label"], sort=False)
          .apply(_per_video_counts).reset_index())

stats = (df.groupby(["video_name","true_label"], sort=False)["prob_fake_frame"]
           .agg(avg_prob_fake_frame="mean", std_prob_fake="std")
           .fillna({"std_prob_fake":0.0}).reset_index())

# ----- bring in MAX-BOOST calibrated per-video scores -----
cal = pd.DataFrame({"video_name": list(map(str, vid_list)), "cal_score": np.asarray(oof, dtype=float)})
# ground-truth per video from frames (robust to missing)
gt = df.groupby("video_name", sort=False)["true_label"].first().reset_index()
cal = cal.merge(gt, on="video_name", how="left")

# choose calibrated threshold to maximize video accuracy
y_cal = cal["true_label"].to_numpy(dtype=int)
s_cal = cal["cal_score"].to_numpy(dtype=float)
if len(np.unique(y_cal)) >= 2 and len(s_cal) > 0:
    from numpy import unique, concatenate
    uniq = np.unique(s_cal)
    mids = (uniq[:-1] + uniq[1:]) / 2.0 if len(uniq) > 1 else np.array([])
    cand = np.unique(np.concatenate([uniq, mids, [0.0, 1.0]]))
    accs = [(((s_cal >= t).astype(int) == y_cal).mean()) for t in cand]
    t_cal = float(cand[int(np.argmax(accs))])
else:
    t_cal = 0.5

cal["video_pred_by_avg"]    = np.where(cal["cal_score"] >= t_cal, "fake", "real")
cal["video_correct_by_avg"] = (cal["video_pred_by_avg"].map({"real":0,"fake":1}) == cal["true_label"]).astype(int)

# majority rule from frame predictions (ties → fake)
maj = (df.groupby("video_name", sort=False)["frame_pred_int"]
         .agg(lambda a: 1 if int(a.sum()) >= int(a.size - a.sum()) else 0)
         .rename("video_pred_by_majority_int").reset_index())
maj = maj.merge(gt, on="video_name", how="left")
maj["video_pred_by_majority"]    = maj["video_pred_by_majority_int"].map({0:"real",1:"fake"})
maj["video_correct_by_majority"] = (maj["video_pred_by_majority_int"] == maj["true_label"]).astype(int)
maj = maj.drop(columns=["video_pred_by_majority_int"])

# ----- assemble final table -----
table_ffd_cal = (
    stats.merge(cnts, on=["video_name","true_label"], how="left")
         .merge(cal[["video_name","cal_score","video_pred_by_avg","video_correct_by_avg"]], on="video_name", how="left")
         .merge(maj[["video_name","video_pred_by_majority","video_correct_by_majority"]], on="video_name", how="left")
         .assign(
             dataset=DATASET_NAME,
             detector=DETECTOR_NAME,
             # Use calibrated per-video score as the "avg_prob_fake" to reflect MAX-BOOST evaluation,
             # while keeping frame std from frame-level aggregation.
             avg_prob_fake=lambda d: d["cal_score"].astype(float),
             std_prob_fake=lambda d: d["std_prob_fake"].fillna(0.0).astype(float),
             n_frames=lambda d: d["n_frames"].astype(int),
             n_correct_frames=lambda d: d["n_correct_frames"].astype(int),
             n_wrong_frames=lambda d: d["n_wrong_frames"].astype(int),
             frame_accuracy=lambda d: d["frame_accuracy"].astype(float),
             true_label=lambda d: d["true_label"].map({0:"real",1:"fake"}),
             video_correct_by_avg=lambda d: d["video_correct_by_avg"].fillna(0).astype(int),
             video_pred_by_avg=lambda d: d["video_pred_by_avg"].fillna("real"),
             video_pred_by_majority=lambda d: d["video_pred_by_majority"].fillna("real"),
             video_correct_by_majority=lambda d: d["video_correct_by_majority"].fillna(0).astype(int),
         )[[  # exact order requested
             "dataset","detector","video_name","true_label",
             "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
             "avg_prob_fake","std_prob_fake",
             "video_pred_by_avg","video_correct_by_avg",
             "video_pred_by_majority","video_correct_by_majority"
         ]]
         .sort_values(["true_label","video_name"], kind="stable")
         .reset_index(drop=True)
)

# ----- print ALL rows, no column breaks -----
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.float_format", lambda x: f"{x:.6f}")

print(table_ffd_cal.to_string(index=False))


      dataset      detector   video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
celebdf_effb4 FFD(Xception) id0_id1_0000       fake        20                20               0        1.000000       0.919278       0.000200              fake                     1                   fake                          1
celebdf_effb4 FFD(Xception) id0_id1_0001       fake        20                 3              17        0.150000       0.955133       0.002192              fake                     1                   real                          0
celebdf_effb4 FFD(Xception) id0_id1_0002       fake        20                20               0        1.000000       0.792568       0.001535              fake                     1                   fake                          1
celebdf_effb4 FFD(Xception) id0_id1_0003       fake        20           

In [None]:
# Save the calibrated large FFD table to Drive: /content/drive/*/FFD results Celeb DF
import os

# Ensure we have the calibrated table
if 'table_ffd_cal' not in globals():
    raise SystemExit("No 'table_ffd_cal' found. Run the large-table (calibrated) cell first.")

# Mount Drive only if needed
if not os.path.ismount("/content/drive"):
    from google.colab import drive
    drive.mount("/content/drive")

# Resolve Drive root
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

# Make folder and save CSV
out_dir = os.path.join(DRIVE_ROOT, "FFD results Celeb DF")
os.makedirs(out_dir, exist_ok=True)
csv_path = os.path.join(out_dir, "ffd_large_table_celebdf_calibrated.csv")

table_ffd_cal.to_csv(csv_path, index=False, float_format="%.6f")
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/FFD results Celeb DF/ffd_large_table_celebdf_calibrated.csv


In [None]:
# === FFD (Xception) — Small table (calibrated, matches MAX-BOOST) ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted (yes/no)
# Prints all rows without column breaks.

import pandas as pd

# Use the calibrated large table produced earlier
if 'table_ffd_cal' not in globals():
    raise SystemExit("No 'table_ffd_cal' found. Run the calibrated large-table cell first.")

src = table_ffd_cal.copy()

# Prefer calibrated AVG correctness; fallback to majority if missing
corr_col = 'video_correct_by_avg' if 'video_correct_by_avg' in src.columns else 'video_correct_by_majority'
if corr_col not in src.columns:
    raise SystemExit("No correctness column found in the source table.")

# Ensure true_label are strings 'real'/'fake'
if pd.api.types.is_numeric_dtype(src['true_label']):
    src['true_label'] = src['true_label'].map({0:'real', 1:'fake'}).fillna(src['true_label'].astype(str))

small_table_ffd_cal = (
    src.assign(
        correctly_predicted=src[corr_col].astype(int).map({1:'yes', 0:'no'})
    )[[
        'dataset','detector','video_name','true_label','correctly_predicted'
    ]]
    .sort_values(['true_label','video_name'], kind='stable')
    .reset_index(drop=True)
)

# Print all rows without column breaks
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)

print(small_table_ffd_cal.to_string(index=False))


      dataset      detector   video_name true_label correctly_predicted
celebdf_effb4 FFD(Xception) id0_id1_0000       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0001       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0002       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0003       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0005       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0006       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0007       fake                 yes
celebdf_effb4 FFD(Xception) id0_id1_0009       fake                 yes
celebdf_effb4 FFD(Xception) id0_id2_0000       fake                 yes
celebdf_effb4 FFD(Xception) id0_id2_0001       fake                 yes
celebdf_effb4 FFD(Xception) id0_id2_0002       fake                 yes
celebdf_effb4 FFD(Xception) id0_id2_0003       fake                 yes
celebdf_effb4 FFD(Xception) id0_id2_0004       fake             

In [None]:
# Save the calibrated small FFD table to the same folder: /content/drive/*/FFD results Celeb DF
import os

if 'small_table_ffd_cal' not in globals():
    raise SystemExit("No 'small_table_ffd_cal' found. Run the small-table (calibrated) cell first.")

# Mount Drive only if needed
if not os.path.ismount("/content/drive"):
    from google.colab import drive
    drive.mount("/content/drive")

# Resolve Drive root
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

out_dir = os.path.join(DRIVE_ROOT, "FFD results Celeb DF")
os.makedirs(out_dir, exist_ok=True)

csv_path = os.path.join(out_dir, "ffd_small_table_celebdf_calibrated.csv")
small_table_ffd_cal.to_csv(csv_path, index=False)
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/FFD results Celeb DF/ffd_small_table_celebdf_calibrated.csv
