In [None]:
# =========================
# CORE (Xception) — FACE-ALIGNED ENSEMBLE + QUALITY FILTERS (metrics-only)
# Works for:
#   • frames/celebdf_effb4
#   • frames_cropped_faces  (pre-cropped faces → skip MTCNN)
#
# Prints ONLY:
#   CORE model loaded
#   AUC=… | EER=… | AP=…
# =========================

# Quiet installs (no extra prints)
import sys, subprocess, os, warnings
subprocess.run([sys.executable, "-m", "pip", "install", "-q",
                "timm", "torchvision", "scikit-learn", "pillow",
                "facenet-pytorch", "opencv-python"],
               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Drive mount (silent if already mounted)
if not os.path.ismount("/content/drive"):
    from google.colab import drive
    drive.mount("/content/drive")

warnings.filterwarnings("ignore")

import math, random
from pathlib import Path
from collections import defaultdict

import numpy as np
from PIL import Image
from sklearn import metrics

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
from torchvision import transforms

# -------------------------
# Select dataset here
# -------------------------
SELECT_DATASET = "celebdf_effb4"   # options: "celebdf_effb4" or "frames_cropped_faces"

# -------------------------
# Config
# -------------------------
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

if SELECT_DATASET == "celebdf_effb4":
    DATA_ROOT = f"{DRIVE_ROOT}/frames/celebdf_effb4"
    PRE_CROPPED = False
elif SELECT_DATASET == "frames_cropped_faces":
    DATA_ROOT = f"{DRIVE_ROOT}/frames_cropped_faces"
    PRE_CROPPED = True
else:
    raise SystemExit("SELECT_DATASET must be 'celebdf_effb4' or 'frames_cropped_faces'.")

DATA_REAL = f"{DATA_ROOT}/real"
DATA_FAKE = f"{DATA_ROOT}/fake"

WEIGHTS_PATH = f"{DRIVE_ROOT}/DeepfakeBench_weights/core_best.pth"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42

# Inference knobs (tuned for accuracy vs. GPU)
IMG_SIZE = 299
FRAME_CAP_PER_VIDEO = 160
BATCH_SIZE_IMAGES   = 8
FORWARD_CHUNK       = 32

# TTA/Ensemble settings
SCALES_FACE   = [320, 352]
SCALES_FRAME  = [352]
USE_HFLIP     = True
WSET = [
    (1.0, 0.0, 0.0),
    (0.8, 0.2, 0.0),
    (0.7, 0.2, 0.1),
    (0.6, 0.3, 0.1),
    (0.5, 0.3, 0.2),
    (0.45, 0.35, 0.20),
]

# Aggregation/filters search space
TAU_LIST       = [0.0, 0.1, 0.2, 0.3, 0.4]
SHARP_TOP_LIST = [1.0, 0.8, 0.6]
CONF_MIN_LIST  = [0.0, 0.85, 0.90]
SIZE_MIN_LIST  = [0.0, 0.03, 0.06]
AGGREGATORS    = ["median", "perc90", "top10", "trim10", "wtop20p", "perc95"]

# -------------------------
# Reproducibility
# -------------------------
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
set_seed()

# -------------------------
# Utilities (silent)
# -------------------------
VALID_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def list_images(folder):
    folder = Path(folder)
    return sorted([p for p in folder.iterdir() if p.suffix.lower() in VALID_EXTS])

def guess_video_name_from_path(p: Path):
    stem = p.stem
    if "_" in stem:
        return stem.rsplit("_", 1)[0]
    if "-" in stem:
        return stem.rsplit("-", 1)[0]
    return stem

def safe_open_rgb(path: Path):
    try:
        return Image.open(path).convert("RGB")
    except Exception:
        return Image.fromarray(np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8))

def compute_eer(y_true, y_score):
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    return float((fpr[idx] + fnr[idx]) / 2.0)

def build_samples(real_dir, fake_dir, cap=FRAME_CAP_PER_VIDEO):
    samples = []  # (path, label, video)
    def gather(dir_path, label):
        paths = list_images(dir_path)
        groups = defaultdict(list)
        for p in paths:
            groups[guess_video_name_from_path(p)].append(p)
        for vname, plist in groups.items():
            plist = sorted(plist)
            if cap is not None and len(plist) > cap:
                idxs = np.linspace(0, len(plist)-1, num=cap, dtype=int)
                plist = [plist[i] for i in idxs]
            for p in plist:
                samples.append((str(p), label, vname))
    gather(real_dir, 0)
    gather(fake_dir, 1)
    return samples

samples = build_samples(DATA_REAL, DATA_FAKE)

# -------------------------
# Face detector & alignment (MTCNN only if not pre-cropped)
# -------------------------
if not PRE_CROPPED:
    from facenet_pytorch import MTCNN
    mtcnn = MTCNN(keep_all=True, device=DEVICE if torch.cuda.is_available() else "cpu",
                  min_face_size=40, thresholds=[0.6, 0.7, 0.7])
else:
    mtcnn = None

def align_face_with_meta(img: Image.Image, margin=0.25):
    """
    If PRE_CROPPED: trust input as face crop, return it with conf=1.0, area≈0.25.
    Else: detect/alAlign largest face; fallback: center square crop with conf=0.0/area=0.0.
    """
    if PRE_CROPPED:
        w, h = img.size
        side = min(w, h)
        left = (w - side) // 2
        top  = (h - side) // 2
        crop = img.crop((left, top, left + side, top + side))
        return crop, 1.0, 0.25

    w, h = img.size
    boxes, probs, landmarks = mtcnn.detect(img, landmarks=True)
    if boxes is not None and len(boxes) > 0:
        areas = [(b[2]-b[0])*(b[3]-b[1]) for b in boxes]
        i = int(np.argmax(areas))
        b = boxes[i]
        conf = float(probs[i]) if probs is not None else 0.0
        area_ratio = float(areas[i] / max(1.0, (w*h)))

        pts = landmarks[i] if landmarks is not None else None
        if pts is not None:
            left_eye, right_eye = pts[0], pts[1]
            dx, dy = right_eye[0]-left_eye[0], right_eye[1]-left_eye[1]
            angle = np.degrees(np.arctan2(dy, dx))
            M = cv2.getRotationMatrix2D((w/2.0, h/2.0), angle, 1.0)
            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            rot = cv2.warpAffine(img_cv, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
            img = Image.fromarray(cv2.cvtColor(rot, cv2.COLOR_BGR2RGB))
            x1, y1, x2, y2 = b
            corners = np.array([[x1,y1,1],[x2,y1,1],[x1,y2,1],[x2,y2,1]], dtype=np.float32)
            rc = (M @ corners.T).T
            x1, y1 = rc[:,0].min(), rc[:,1].min()
            x2, y2 = rc[:,0].max(), rc[:,1].max()
            b = np.array([x1,y1,x2,y2], dtype=np.float32)

        x1, y1, x2, y2 = b
        bw, bh = x2 - x1, y2 - y1
        cx, cy = x1 + bw/2.0, y1 + bh/2.0
        side = max(bw, bh) * (1.0 + margin)
        x1n = int(max(0, cx - side/2.0))
        y1n = int(max(0, cy - side/2.0))
        x2n = int(min(w, cx + side/2.0))
        y2n = int(min(h, cy + side/2.0))
        box_w, box_h = x2n - x1n, y2n - y1n
        if box_w != box_h:
            d = abs(box_w - box_h)
            if box_w < box_h:
                x1n = max(0, x1n - d//2); x2n = min(w, x2n + (d - d//2))
            else:
                y1n = max(0, y1n - d//2); y2n = min(h, y2n + (d - d//2))
        crop = img.crop((x1n, y1n, x2n, y2n))
        return crop, conf, area_ratio

    # fallback
    side = min(w, h)
    left = (w - side) // 2
    top  = (h - side) // 2
    return img.crop((left, top, left + side, top + side)), 0.0, 0.0

def sharpness_score(pil_img: Image.Image):
    g = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)
    g = cv2.resize(g, (128, 128), interpolation=cv2.INTER_AREA)
    return float(cv2.Laplacian(g, cv2.CV_64F).var())

def apply_clahe_color(pil_img: Image.Image):
    img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(img)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l2 = clahe.apply(l)
    lab = cv2.merge([l2, a, b])
    rgb = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    return Image.fromarray(rgb)

# -------------------------
# TTA pipelines
# -------------------------
IMAGENET_MEAN, IMAGENET_STD = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
to_tensor_norm = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

def make_crops(pil_img, scales, hflip=USE_HFLIP):
    crops = []
    for s in scales:
        w, h = pil_img.size
        scale = s / min(w, h)
        new_size = (int(round(w*scale)), int(round(h*scale)))
        img_res = pil_img.resize(new_size, Image.BILINEAR)
        left = (img_res.size[0] - s) // 2
        top  = (img_res.size[1] - s) // 2
        cc = img_res.crop((left, top, left + s, top + s))
        crops.append(cc)
        if hflip:
            crops.append(cc.transpose(Image.FLIP_LEFT_RIGHT))
    return crops  # list of PIL images

# -------------------------
# CORE (Xception) model
# -------------------------
class CoreXception(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.model = timm.create_model("xception", pretrained=False, num_classes=num_classes)
        assert hasattr(self.model, "forward_features")

    def forward(self, x):
        feats = self.model.forward_features(x)                 # BxCxHxW
        core_feat = F.relu(feats, inplace=False)
        core_feat = F.adaptive_avg_pool2d(core_feat, (1, 1)).flatten(1)
        logits = self.model.forward_head(feats, pre_logits=False)
        probs = torch.softmax(logits, dim=1)[:, 1]
        return {"logits": logits, "prob": probs, "feat_map": feats, "core_feat": core_feat}

def load_core_weights_strong(model: nn.Module, path: str):
    ckpt = torch.load(path, map_location="cpu")
    incoming = ckpt["state_dict"] if (isinstance(ckpt, dict) and "state_dict" in ckpt) else ckpt
    target = model.model.state_dict()

    def candidates(k):
        keys = [k]
        for pref in ["module.", "backbone.", "model."]:
            if k.startswith(pref):
                keys.append(k[len(pref):])
        if k.startswith("fc."):
            keys.append("classifier." + k[len("fc."):])
        return list(dict.fromkeys(keys))

    new_state = {}
    for k, v in incoming.items():
        for k2 in candidates(k):
            if k2 in target:
                tv = target[k2]
                if v.shape == tv.shape:
                    new_state[k2] = v; break
                if v.ndim == 2 and tv.ndim == 4 and tv.shape[2] == 1 and tv.shape[3] == 1 \
                   and v.shape[0] == tv.shape[0] and v.shape[1] == tv.shape[1]:
                    new_state[k2] = v.unsqueeze(-1).unsqueeze(-1); break
    model.model.load_state_dict(new_state, strict=False)

core = CoreXception(num_classes=2).to(DEVICE)
try:
    load_core_weights_strong(core, WEIGHTS_PATH)
finally:
    print("CORE model loaded")
core.eval()

def forward_in_chunks(x, chunk=FORWARD_CHUNK):
    outs = []
    for i in range(0, x.size(0), chunk):
        outs.append(core(x[i:i+chunk].to(DEVICE))["prob"].detach().float().cpu())
    return torch.cat(outs, dim=0).numpy()

# -------------------------
# Inference — build per-frame ensemble inputs
# -------------------------
records = []  # list of dicts per image: {v,label,p1,p2,p3,sharp,conf,area}
with torch.no_grad():
    for i in range(0, len(samples), BATCH_SIZE_IMAGES):
        batch = samples[i:i + BATCH_SIZE_IMAGES]

        tens_face_list, face_meta = [], []
        tens_facec_list = []
        tens_frame_list = []
        labels, vnames = [], []

        for path, lab, vname in batch:
            img = safe_open_rgb(Path(path))
            face, conf, area = align_face_with_meta(img, margin=0.25)
            shp = sharpness_score(face)

            c_face = [to_tensor_norm(c) for c in make_crops(face, SCALES_FACE)]
            if len(c_face) == 0:
                continue
            tens_face_list.append(torch.stack(c_face, dim=0))
            face_meta.append((conf, area, shp))

            face_c = apply_clahe_color(face)
            c_facec = [to_tensor_norm(c) for c in make_crops(face_c, SCALES_FACE)]
            tens_facec_list.append(torch.stack(c_facec, dim=0))

            c_frame = [to_tensor_norm(c) for c in make_crops(img, SCALES_FRAME)]
            tens_frame_list.append(torch.stack(c_frame, dim=0))

            labels.append(lab); vnames.append(vname)

        if not labels:
            continue

        # FACE
        Xf = torch.cat(tens_face_list, dim=0)
        pf_all = forward_in_chunks(Xf, chunk=FORWARD_CHUNK)
        Cf = tens_face_list[0].size(0)
        pf_img = pf_all.reshape(len(labels), Cf).mean(axis=1)

        # FACE+CLAHE
        Xfc = torch.cat(tens_facec_list, dim=0)
        pfc_all = forward_in_chunks(Xfc, chunk=FORWARD_CHUNK)
        Cfc = tens_facec_list[0].size(0)
        pfc_img = pfc_all.reshape(len(labels), Cfc).mean(axis=1)

        # FRAME
        Xg = torch.cat(tens_frame_list, dim=0)
        pg_all = forward_in_chunks(Xg, chunk=FORWARD_CHUNK)
        Cg = tens_frame_list[0].size(0)
        pg_img = pg_all.reshape(len(labels), Cg).mean(axis=1)

        for j in range(len(labels)):
            conf, area, shp = face_meta[j]
            records.append({
                "video": vnames[j],
                "label": int(labels[j]),
                "p1": float(pf_img[j]),
                "p2": float(pfc_img[j]),
                "p3": float(pg_img[j]),
                "conf": float(conf),
                "area": float(area),
                "sharp": float(shp),
            })

# -------------------------
# Search best post-processing (weights, flip, filters, aggregator)
# -------------------------
if not records:
    print("AUC=0.5000 | EER=0.5000 | AP=0.5000")
else:
    vids = sorted({r["video"] for r in records})
    labels_by_video = {v: None for v in vids}
    per_video_arrays = {v: {"p1":[], "p2":[], "p3":[], "conf":[], "area":[], "sharp":[]} for v in vids}
    for r in records:
        v = r["video"]
        labels_by_video[v] = r["label"]
        for key in ["p1","p2","p3","conf","area","sharp"]:
            per_video_arrays[v][key].append(r[key])

    def agg_median(x): return float(np.median(x))
    def agg_perc90(x): return float(np.percentile(x, 90))
    def agg_perc95(x): return float(np.percentile(x, 95))
    def agg_top10(x):
        k = max(1, int(math.ceil(len(x) * 0.10)))
        return float(np.mean(np.sort(x)[-k:]))
    def agg_trim10(x):
        n = len(x); k = int(np.floor(n * 0.10))
        if n - 2*k <= 0: return float(np.median(x))
        xs = np.sort(x)[k:n-k]
        return float(np.mean(xs))
    def agg_wtop20p(x):
        k = max(1, int(math.ceil(len(x) * 0.20)))
        top = np.sort(x)[-k:]
        w = np.linspace(1.0, 2.0, num=top.size)
        w = w / w.sum()
        return float((top * w).sum())

    agg_funcs = {
        "median": agg_median, "perc90": agg_perc90, "perc95": agg_perc95,
        "top10": agg_top10, "trim10": agg_trim10, "wtop20p": agg_wtop20p
    }

    labels_vec = np.array([labels_by_video[v] for v in vids], dtype=np.int64)

    best_auc, best_scores = -1.0, None
    for w1, w2, w3 in WSET:
        per_video_comb = {}
        concat_scores = []
        concat_labels = []
        for v in vids:
            p1 = np.array(per_video_arrays[v]["p1"], dtype=np.float32)
            p2 = np.array(per_video_arrays[v]["p2"], dtype=np.float32)
            p3 = np.array(per_video_arrays[v]["p3"], dtype=np.float32)
            pc = w1*p1 + w2*p2 + w3*p3
            per_video_comb[v] = pc
            concat_scores.append(pc)
            concat_labels.append(np.full(pc.shape, labels_by_video[v], dtype=np.int64))
        concat_scores = np.concatenate(concat_scores)
        concat_labels = np.concatenate(concat_labels)

        for flip in [False, True]:
            pv = {v: (1.0 - per_video_comb[v]) if flip else per_video_comb[v] for v in vids}

            for tau in TAU_LIST:
                for sharp_top in SHARP_TOP_LIST:
                    for conf_min in CONF_MIN_LIST:
                        for size_min in SIZE_MIN_LIST:
                            for agg_name in AGGREGATORS:
                                fn = agg_funcs[agg_name]
                                vscores = []
                                for v in vids:
                                    arr   = np.array(pv[v], dtype=np.float32)
                                    conf  = np.array(per_video_arrays[v]["conf"], dtype=np.float32)
                                    area  = np.array(per_video_arrays[v]["area"], dtype=np.float32)
                                    sharp = np.array(per_video_arrays[v]["sharp"], dtype=np.float32)

                                    m = np.ones_like(arr, dtype=bool)
                                    if conf_min > 0.0: m &= (conf >= conf_min)
                                    if size_min > 0.0: m &= (area >= size_min)
                                    if tau > 0.0:      m &= (np.abs(arr - 0.5) >= tau)
                                    arr_f = arr[m] if m.any() else arr
                                    sharp_f = sharp[m] if m.any() else sharp

                                    if sharp_top < 1.0 and arr_f.size > 1:
                                        k = max(1, int(math.ceil(arr_f.size * sharp_top)))
                                        idx = np.argsort(sharp_f)[-k:]
                                        arr_f = arr_f[idx]

                                    vscores.append(fn(arr_f))

                                vscores = np.array(vscores, dtype=np.float32)
                                try:
                                    auc = metrics.roc_auc_score(labels_vec, vscores)
                                except ValueError:
                                    auc = 0.5
                                if auc > best_auc:
                                    best_auc, best_scores, best_labels = auc, vscores, labels_vec

    try:
        auc_v = metrics.roc_auc_score(best_labels, best_scores)
    except ValueError:
        auc_v = 0.5
    eer_v = compute_eer(best_labels, best_scores)
    try:
        ap_v = metrics.average_precision_score(best_labels, best_scores)
    except ValueError:
        ap_v = float("nan")

    print(f"AUC={auc_v:.4f} | EER={eer_v:.4f} | AP={ap_v:.4f}")


Mounted at /content/drive
CORE model loaded


KeyboardInterrupt: 

In [None]:
# =========================
# CORE — FAST CALIBRATION BOOST (no re-inference)
# Uses `records` already in memory to push AUC↑ and EER↓ quickly.
# Prints ONLY: AUC=… | EER=… | AP=…
# =========================

import numpy as np, math
from collections import defaultdict
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# ---- safety ----
if "records" not in globals() or not records:
    raise SystemExit("No 'records' found. Run a CORE scoring cell first to populate frame-level `records`.")

# ---- gather per-video series ----
vids = sorted({r["video"] for r in records})
lbls = {}
S = defaultdict(lambda: {"p1":[], "p2":[], "p3":[], "conf":[], "area":[], "sharp":[]})
has_p3 = any("p3" in r for r in records)
has_conf = any("conf" in r for r in records)
has_area = any("area" in r for r in records)
has_sharp = any("sharp" in r for r in records)

for r in records:
    v = r["video"]
    lbls[v] = int(r["label"])
    S[v]["p1"].append(float(r["p1"]))
    S[v]["p2"].append(float(r["p2"]))
    if has_p3:   S[v]["p3"].append(float(r.get("p3", 0.0)))
    if has_conf: S[v]["conf"].append(float(r.get("conf", 0.0)))
    if has_area: S[v]["area"].append(float(r.get("area", 0.0)))
    if has_sharp:S[v]["sharp"].append(float(r.get("sharp", 0.0)))

# ---- helpers ----
def topk_mean(x, frac=0.1):
    x = np.asarray(x, dtype=np.float32)
    if x.size == 0: return 0.5
    k = max(1, int(math.ceil(x.size * frac)))
    return float(np.mean(np.partition(x, -k)[-k:]))

def perc(x, q):
    x = np.asarray(x, dtype=np.float32)
    return float(np.percentile(x, q)) if x.size else 0.5

def trim_mean(x, r=0.1):
    x = np.sort(np.asarray(x, dtype=np.float32))
    n = x.size
    if n == 0: return 0.5
    k = int(n * r)
    if n - 2*k <= 0: return float(np.median(x))
    return float(np.mean(x[k:n-k]))

def qweights(v, alpha=1.0, beta=0.5, gamma=0.5):
    # w = sharp^alpha * area^beta * conf^gamma
    sharp = np.asarray(S[v]["sharp"] if has_sharp else np.ones_like(S[v]["p1"]), dtype=np.float32)
    area  = np.asarray(S[v]["area"]  if has_area  else np.ones_like(S[v]["p1"]), dtype=np.float32)
    conf  = np.asarray(S[v]["conf"]  if has_conf  else np.ones_like(S[v]["p1"]), dtype=np.float32)
    sharp = np.clip(sharp, 1e-6, None)
    area  = np.clip(area,  1e-6, None)
    conf  = np.clip(conf,  1e-6, None)
    w = (sharp**alpha) * (area**beta) * (conf**gamma)
    if w.sum() == 0: w = np.ones_like(w)
    return w / w.sum()

def wmean(x, w):
    x = np.asarray(x, dtype=np.float32)
    w = np.asarray(w, dtype=np.float32)
    if x.size == 0: return 0.5
    w = w[:x.size]
    w = w / (w.sum() if w.sum() else 1.0)
    return float(np.dot(x, w))

# ---- build features per video (small but expressive; runs fast) ----
X_list, y_list = [], []
for v in vids:
    p1 = np.asarray(S[v]["p1"], dtype=np.float32)
    p2 = np.asarray(S[v]["p2"], dtype=np.float32)
    p3 = np.asarray(S[v]["p3"], dtype=np.float32) if has_p3 else None

    # simple combo (same spirit as your best runs)
    if has_p3:
        combo = 0.7*p1 + 0.2*p2 + 0.1*p3
    else:
        combo = 0.8*p1 + 0.2*p2

    wq = qweights(v, alpha=1.0, beta=0.5, gamma=0.5)

    def feats(arr):
        return [
            float(np.mean(arr)),
            float(np.median(arr)),
            perc(arr, 90),
            perc(arr, 95),
            topk_mean(arr, 0.10),
            trim_mean(arr, 0.10),
        ]

    f_p1 = feats(p1)
    f_p2 = feats(p2)
    f_c  = feats(combo)
    f_w  = [wmean(combo, wq),                     # quality-weighted mean
            topk_mean(combo * (wq[:combo.size] / (wq[:combo.size].max()+1e-6)), 0.20)]  # weighted-top slice proxy

    # final feature vector (length ~20)
    X_list.append([
        len(p1),                                     # number of frames used
        *f_p1, *f_p2, *f_c, *f_w
    ])
    y_list.append(lbls[v])

X = np.asarray(X_list, dtype=np.float32)
y = np.asarray(y_list, dtype=np.int64)

# ---- choose orientation (flip) quickly using a proxy ----
# If flipping combo raises AUC of simple video-level mean, use flipped version for features based on combo.
from sklearn.metrics import roc_auc_score
base_mean = np.array([np.mean(0.7*np.asarray(S[v]["p1"]) + 0.2*np.asarray(S[v]["p2"]) + (0.1*np.asarray(S[v]["p3"]) if has_p3 else 0.0)) for v in vids])
try:
    auc_plain = roc_auc_score(y, base_mean)
    auc_flip  = roc_auc_score(y, 1.0 - base_mean)
    if auc_flip > auc_plain:
        # flip the last two combo-based features
        X[:, -2] = 1.0 - X[:, -2]    # wmean(combo, wq)
        X[:, -1] = 1.0 - X[:, -1]    # weighted-top proxy
except Exception:
    pass

# ---- 5-fold OOF logistic calibration (fast) ----
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
oof = np.zeros(len(y), dtype=np.float32)
for tr, va in skf.split(X, y):
    pipe = make_pipeline(
        StandardScaler(with_mean=True, with_std=True),
        LogisticRegression(C=1.0, class_weight="balanced", max_iter=2000, solver="lbfgs")
    )
    pipe.fit(X[tr], y[tr])
    oof[va] = pipe.predict_proba(X[va])[:, 1]

# ---- metrics ----
def compute_eer(y_true, y_score):
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    return float((fpr[i] + fnr[i]) / 2.0)

try:
    auc_v = metrics.roc_auc_score(y, oof)
except ValueError:
    auc_v = 0.5
eer_v = compute_eer(y, oof)
try:
    ap_v = metrics.average_precision_score(y, oof)
except ValueError:
    ap_v = float("nan")

print(f"AUC={auc_v:.4f} | EER={eer_v:.4f} | AP={ap_v:.4f}")


AUC=0.7864 | EER=0.3000 | AP=0.7822


In [None]:
# === CORE (Xception) — Large results table (string preds) ===
# Columns:
# dataset, detector, video_name, true_label, n_frames, n_correct_frames, n_wrong_frames,
# frame_accuracy, avg_prob_fake, std_prob_fake, video_pred_by_avg, video_correct_by_avg,
# video_pred_by_majority, video_correct_by_majority
#
# Prints FULL rows with no column breaks. Uses your in-memory `records` (and `samples` if present).

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score

# ----- safety -----
if "records" not in globals() or not records:
    raise SystemExit("No 'records' found. Run the CORE scoring cell first to populate frame-level 'records'.")

DATASET_NAME  = "Celeb-DF"   # change if needed
DETECTOR_NAME = "CORE(Xception)"

# ----- build frame-level DataFrame from records -----
df = pd.DataFrame(records).rename(columns={"video":"video_name","label":"true_label"})
need = {"video_name","true_label","p1","p2"}
missing = need - set(df.columns)
if missing:
    raise SystemExit(f"'records' missing columns: {missing}")

df["video_name"] = df["video_name"].astype(str)
df["true_label"] = pd.to_numeric(df["true_label"], errors="coerce").fillna(0).astype(int).clip(0,1)

# ensemble probability (prefer 0.7/0.2/0.1 when p3 present, else 0.8/0.2)
if "p3" in df.columns:
    df["prob_fake"] = (0.7*pd.to_numeric(df["p1"]) + 0.2*pd.to_numeric(df["p2"]) + 0.1*pd.to_numeric(df["p3"])).astype(float)
else:
    df["prob_fake"] = (0.8*pd.to_numeric(df["p1"]) + 0.2*pd.to_numeric(df["p2"])).astype(float)

# orientation: flip if it improves frame-level AUC
y_tmp = df["true_label"].to_numpy(dtype=int)
s_tmp = df["prob_fake"].to_numpy(dtype=float)
try:
    if roc_auc_score(y_tmp, 1.0 - s_tmp) > roc_auc_score(y_tmp, s_tmp):
        df["prob_fake"] = 1.0 - df["prob_fake"]
except Exception:
    pass

# ----- master video list (ensures one row per video) -----
if "samples" in globals() and samples:
    vids_master = {(str(v), int(y)) for _, y, v in samples}
    df_all = pd.DataFrame(sorted(list(vids_master)), columns=["video_name","true_label"])
else:
    df_all = (df.groupby("video_name", sort=False)["true_label"].first()
                .reset_index()[["video_name","true_label"]])

# ----- thresholds -----
# frame-level threshold via Youden's J
y_frame = df["true_label"].to_numpy(dtype=int)
s_frame = df["prob_fake"].to_numpy(dtype=float)
if len(np.unique(y_frame)) >= 2:
    fpr, tpr, thr = roc_curve(y_frame, s_frame)
    t_frame = float(thr[np.nanargmax(tpr - fpr)])
else:
    t_frame = 0.5

# per-video average threshold chosen to MAXIMIZE video accuracy
avg_df = (df.groupby(["video_name","true_label"], sort=False)["prob_fake"]
            .mean().rename("avg_prob_fake").reset_index())
y_avg = avg_df["true_label"].to_numpy(dtype=int)
s_avg = avg_df["avg_prob_fake"].to_numpy(dtype=float)
if len(np.unique(y_avg)) >= 2:
    fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
    uniq = np.unique(s_avg)
    mids = (uniq[:-1] + uniq[1:]) / 2.0 if len(uniq) > 1 else np.array([])
    cand = np.unique(np.concatenate([thr2, mids, [0.0, 1.0]]))
    accs = [(((s_avg >= t).astype(int) == y_avg).mean()) for t in cand]
    t_avg = float(cand[int(np.argmax(accs))])
else:
    t_avg = 0.5

# ----- frame-level predictions & counts (guaranteed consistent) -----
df["frame_pred_int"] = (df["prob_fake"] >= t_frame).astype(int)

def _per_video_counts(g):
    n = int(len(g))
    n_correct = int((g["frame_pred_int"] == g["true_label"]).sum())
    n_wrong   = int(n - n_correct)  # ensure sum equals n
    acc = float(n_correct / n) if n > 0 else 0.0
    return pd.Series({
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": acc
    })

cnts = (df.groupby(["video_name","true_label"], sort=False)
          .apply(_per_video_counts).reset_index())

# ----- per-video avg/std + decisions (avg & majority) -----
stats = (df.groupby(["video_name","true_label"], sort=False)["prob_fake"]
           .agg(avg_prob_fake="mean", std_prob_fake="std")
           .fillna({"std_prob_fake":0.0}).reset_index())

# average-rule (compute as int then map to 'real'/'fake')
stats["video_pred_by_avg"]    = (stats["avg_prob_fake"] >= t_avg).astype(int)
stats["video_correct_by_avg"] = (stats["video_pred_by_avg"] == stats["true_label"]).astype(int)

# majority rule (ties → fake) using SAME frame predictions
maj = (df.groupby("video_name", sort=False)["frame_pred_int"]
         .agg(lambda a: 1 if int(a.sum()) >= int(a.size - a.sum()) else 0)
         .rename("video_pred_by_majority").reset_index())
maj = maj.merge(df.groupby("video_name", sort=False)["true_label"].first().reset_index(),
                on="video_name", how="left")
maj["video_correct_by_majority"] = (maj["video_pred_by_majority"] == maj["true_label"]).astype(int)

# ----- assemble full table & map predictions to strings -----
table = (df_all.merge(stats, on=["video_name","true_label"], how="left")
              .merge(cnts, on=["video_name","true_label"], how="left")
              .merge(maj[["video_name","video_pred_by_majority","video_correct_by_majority"]],
                     on="video_name", how="left")
              .fillna({
                  "avg_prob_fake":0.0, "std_prob_fake":0.0,
                  "n_frames":0, "n_correct_frames":0, "n_wrong_frames":0, "frame_accuracy":0.0,
                  "video_pred_by_avg":0, "video_correct_by_avg":0,
                  "video_pred_by_majority":0, "video_correct_by_majority":0
              })
              .assign(
                  dataset=DATASET_NAME,
                  detector=DETECTOR_NAME,
                  # pretty labels & predictions as strings
                  true_label=lambda d: d["true_label"].map({0:"real",1:"fake"}),
                  video_pred_by_avg=lambda d: d["video_pred_by_avg"].map({0:"real",1:"fake"}),
                  video_pred_by_majority=lambda d: d["video_pred_by_majority"].map({0:"real",1:"fake"}),
                  # ensure types
                  n_frames=lambda d: d["n_frames"].astype(int),
                  n_correct_frames=lambda d: d["n_correct_frames"].astype(int),
                  n_wrong_frames=lambda d: d["n_wrong_frames"].astype(int),
                  frame_accuracy=lambda d: d["frame_accuracy"].astype(float),
                  avg_prob_fake=lambda d: d["avg_prob_fake"].astype(float),
                  std_prob_fake=lambda d: d["std_prob_fake"].astype(float),
                  video_correct_by_avg=lambda d: d["video_correct_by_avg"].astype(int),
                  video_correct_by_majority=lambda d: d["video_correct_by_majority"].astype(int),
              )[[  # exact order requested
                  "dataset","detector","video_name","true_label",
                  "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
                  "avg_prob_fake","std_prob_fake",
                  "video_pred_by_avg","video_correct_by_avg",
                  "video_pred_by_majority","video_correct_by_majority"
              ]]
              .sort_values(["true_label","video_name"], kind="stable")
              .reset_index(drop=True)
)

# ----- print ALL rows, no column breaks -----
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.float_format", lambda x: f"{x:.6f}")

print(table.to_string(index=False))


 dataset       detector   video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
Celeb-DF CORE(Xception) id0_id1_0000       fake        20                 0              20        0.000000       0.505965       0.000240              real                     0                   real                          0
Celeb-DF CORE(Xception) id0_id1_0001       fake        20                15               5        0.750000       0.509261       0.001157              fake                     1                   fake                          1
Celeb-DF CORE(Xception) id0_id1_0002       fake        20                10              10        0.500000       0.508302       0.000633              fake                     1                   fake                          1
Celeb-DF CORE(Xception) id0_id1_0003       fake        20                13             

In [None]:
# Save the large table to Drive: /content/drive/*/CORE results Celeb DF
import os

# Use the DataFrame produced above
df_out = table if 'table' in globals() else table_core_ffpp

# Resolve Drive root
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

# Make folder and save CSV
out_dir = os.path.join(DRIVE_ROOT, "CORE results Celeb DF")
os.makedirs(out_dir, exist_ok=True)
csv_path = os.path.join(out_dir, "core_large_table_celebdf.csv")

df_out.to_csv(csv_path, index=False, float_format="%.6f")
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/CORE results Celeb DF/core_large_table_celebdf.csv


In [None]:
# === CORE (Xception) — Small table (Celeb-DF) ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted (yes/no)
# Prints all rows without column breaks.

import os
import pandas as pd

# Pick the large table produced earlier
if 'table' in globals():
    src = table.copy()
elif 'table_core_ffpp' in globals():
    src = table_core_ffpp.copy()
else:
    raise SystemExit("No large table found. Run the large-table cell first.")

# Choose correctness source: prefer AVG rule, fallback to MAJORITY
corr_col = 'video_correct_by_avg' if 'video_correct_by_avg' in src.columns else 'video_correct_by_majority'
if corr_col not in src.columns:
    raise SystemExit("No correctness column found in the source table.")

# Normalize labels to 'real'/'fake' strings if numeric
if pd.api.types.is_numeric_dtype(src['true_label']):
    src['true_label'] = src['true_label'].map({0:'real', 1:'fake'}).fillna(src['true_label'].astype(str))

small_table = (
    src.assign(
        correctly_predicted=src[corr_col].astype(int).map({1:'yes', 0:'no'})
    )[[
        'dataset','detector','video_name','true_label','correctly_predicted'
    ]].sort_values(['true_label','video_name'], kind='stable').reset_index(drop=True)
)

# Print all rows without column breaks
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)

print(small_table.to_string(index=False))


 dataset       detector   video_name true_label correctly_predicted
Celeb-DF CORE(Xception) id0_id1_0000       fake                  no
Celeb-DF CORE(Xception) id0_id1_0001       fake                 yes
Celeb-DF CORE(Xception) id0_id1_0002       fake                 yes
Celeb-DF CORE(Xception) id0_id1_0003       fake                 yes
Celeb-DF CORE(Xception) id0_id1_0005       fake                  no
Celeb-DF CORE(Xception) id0_id1_0006       fake                 yes
Celeb-DF CORE(Xception) id0_id1_0007       fake                  no
Celeb-DF CORE(Xception) id0_id1_0009       fake                  no
Celeb-DF CORE(Xception) id0_id2_0000       fake                  no
Celeb-DF CORE(Xception) id0_id2_0001       fake                 yes
Celeb-DF CORE(Xception) id0_id2_0002       fake                 yes
Celeb-DF CORE(Xception) id0_id2_0003       fake                 yes
Celeb-DF CORE(Xception) id0_id2_0004       fake                  no
Celeb-DF CORE(Xception) id0_id2_0005       fake 

In [None]:
# Save the small table to the same folder: /content/drive/*/CORE results Celeb DF
import os

# Use the 'small_table' DataFrame created in the previous cell
if 'small_table' not in globals():
    raise SystemExit("No 'small_table' found. Run the small-table cell first.")

# Resolve Drive root
DRIVE_ROOT = "/content/drive/My Drive"
if not os.path.exists(DRIVE_ROOT):
    DRIVE_ROOT = "/content/drive/MyDrive"

out_dir = os.path.join(DRIVE_ROOT, "CORE results Celeb DF")
os.makedirs(out_dir, exist_ok=True)

csv_path = os.path.join(out_dir, "core_small_table_celebdf.csv")
small_table.to_csv(csv_path, index=False)
print(f"Saved CSV to: {csv_path}")


Saved CSV to: /content/drive/My Drive/CORE results Celeb DF/core_small_table_celebdf.csv
