In [None]:
# === SPSL on balanced_frames_FF++ — Prints ONLY: AUC | EER | AP ===
# Frequency (FFT) + Xception head, partial-loads spsl_best.pth, heavy TTA [256,280],
# robust per-video aggregations (NumPy), τ filtering sweep.

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

# ---------- paths ----------
import os, re, sys, subprocess, numpy as np, pandas as pd, cv2
from PIL import Image

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
DATASET_ROOT = f"{ROOT}/balanced_frames_FF++"  # <-- your new dataset root
REAL_DIR = f"{DATASET_ROOT}/real"
FAKE_DIR = f"{DATASET_ROOT}/fake"
SPSL_WEIGHTS = f"{ROOT}/DeepfakeBench_weights/spsl_best.pth"
DATASET_NAME = "balanced_frames_FF++"

assert os.path.isdir(REAL_DIR), f"Missing: {REAL_DIR}"
assert os.path.isdir(FAKE_DIR), f"Missing: {FAKE_DIR}"
assert os.path.isfile(SPSL_WEIGHTS), f"Missing: {SPSL_WEIGHTS}"

# ---------- deps ----------
def _pipq(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs], check=True)

try:
    import timm
except Exception:
    _pipq("timm==1.0.9"); import timm

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# ---------- hardware + tuning ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = (device.type=="cuda")
softmax = torch.nn.Softmax(dim=1)

HEAVY_TTA   = True
CROP_SIZES  = [256, 280]   # stable/good
FRAME_CAP   = 120          # frames per video (raise to 150/180 if GPU time allows)
BATCH       = 24 if device.type=="cuda" else 8
NUM_WORKERS = 2 if device.type=="cuda" else 0

# ---------- list frames ----------
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def list_imgs(d):
    return sorted([os.path.join(d,f) for f in os.listdir(d) if f.lower().endswith(IMG_EXTS)]) if os.path.isdir(d) else []

reals = list_imgs(REAL_DIR); fakes = list_imgs(FAKE_DIR)
assert len(reals) and len(fakes), f"No images found. REAL={len(reals)} FAKE={len(fakes)}."

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame(\d+)$", stem)
    return m[0] if len(m)>1 and m[0] else re.sub(r"[_\-]\d+$","",stem)

def frame_index(path):
    m = re.search(r"_frame(\d+)", os.path.basename(path))
    return int(m.group(1)) if m else 10**9

def build_df(paths, label):
    rows=[]
    for p in paths:
        rows.append({"path":p,"video_name":infer_video_name(p),"idx":frame_index(p),"label":label})
    df = pd.DataFrame(rows).sort_values(["video_name","idx"])
    df["video_name"]=df["video_name"].astype(str)
    df["idx"] = pd.to_numeric(df["idx"], errors="coerce").astype(int)
    df["label"] = pd.to_numeric(df["label"], errors="coerce").astype(int)
    return df

df_r = build_df(reals, 0)
df_f = build_df(fakes, 1)
df_all = pd.concat([df_r, df_f], ignore_index=True)

# cap frames/video
df_sel = (df_all.sort_values(["video_name","idx"])
                .groupby("video_name", as_index=False)
                .head(FRAME_CAP))

# ---------- FFT -> 3ch (Xception) ----------
IMG_SIZE = 299
IMN_MEAN = [0.485, 0.456, 0.406]
IMN_STD  = [0.229, 0.224, 0.225]

def fft_logmag(gray_f32):
    F = np.fft.fft2(gray_f32); Fshift = np.fft.fftshift(F)
    mag = np.log1p(np.abs(Fshift)); mag = mag / (mag.max() + 1e-8)
    return mag.astype(np.float32)

def preprocess_freq_rgb(path, out_size=IMG_SIZE):
    im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if im is None: im = np.array(Image.open(path).convert("L"))
    im = cv2.resize(im, (out_size, out_size), interpolation=cv2.INTER_CUBIC)
    im = im.astype(np.float32) / 255.0
    mag = fft_logmag(im)
    x = np.stack([mag,mag,mag], axis=2).transpose(2,0,1)
    x = (x - np.array(IMN_MEAN)[:,None,None]) / np.array(IMN_STD)[:,None,None]
    return torch.from_numpy(x.astype(np.float32))

class FreqDataset(Dataset):
    def __init__(self, df): self.df = df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        return preprocess_freq_rgb(r["path"]), int(r["label"]), str(r["video_name"])

# ---------- model (Xception) + careful partial-load of SPSL weights ----------
model = timm.create_model('legacy_xception', pretrained=True, num_classes=2)

def try_load_spsl_weights(model, ckpt_path, min_cover=0.5):
    ok=False; cover=0.0
    try:
        sd = torch.load(ckpt_path, map_location="cpu")
        if isinstance(sd, dict):
            for k in ("state_dict","model","net","weights","model_state","ema_state_dict"):
                if k in sd and isinstance(sd[k], dict):
                    sd = sd[k]; break
        clean={}
        if isinstance(sd, dict):
            for k,v in sd.items():
                if not isinstance(k,str): continue
                k2=k
                for pref in ("module.","model.","net.","backbone."):
                    if k2.startswith(pref): k2=k2[len(pref):]
                clean[k2]=v
            ms = model.state_dict()
            matched = {k:v for k,v in clean.items() if k in ms and ms[k].shape==v.shape}
            cover = len(matched)/max(1,len(ms))
            if cover >= min_cover:
                ms.update(matched); model.load_state_dict(ms, strict=False); ok=True
    except Exception as e:
        print("[warn] weight load:", e)
    return ok, cover

weights_loaded, coverage = try_load_spsl_weights(model, SPSL_WEIGHTS, min_cover=0.5)
model = model.to(device).eval()

# ---------- heavy TTA ----------
def ten_crops(x, crop):  # x: [B,3,299,299]
    B,C,H,W = x.shape; ch=crop; cw=crop
    tl = x[..., 0:ch, 0:cw]; tr = x[..., 0:ch, W-cw:W]
    bl = x[..., H-ch:H, 0:cw]; br = x[..., H-ch:H, W-cw:W]
    cs = x[..., (H-ch)//2:(H+ch)//2, (W-cw)//2:(W+cw)//2]
    flips = [torch.flip(t, dims=[3]) for t in (tl,tr,bl,br,cs)]
    return [tl,tr,bl,br,cs] + flips  # 10

@torch.no_grad()
def forward_tta(xb):
    use_amp = (device.type=="cuda")
    logits_sum=None
    for crop in CROP_SIZES:
        for p in ten_crops(xb, crop):
            with torch.amp.autocast('cuda', enabled=use_amp):
                out = model(p)
            logits_sum = out if logits_sum is None else (logits_sum + out)
    return logits_sum / float(len(CROP_SIZES)*10)

# ---------- scoring ----------
@torch.no_grad()
def score_frames(df):
    loader = DataLoader(FreqDataset(df), batch_size=BATCH, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
    probs, labels, vnames = [], [], []
    for xb, yb, vb in loader:
        xb = xb.to(device, non_blocking=(device.type=="cuda"))
        logits = forward_tta(xb)
        p = softmax(logits)[:,1]
        probs.append(p.detach().cpu().numpy()); labels.append(np.array(yb)); vnames += list(vb)
    out = pd.DataFrame({"video_name": vnames,
                        "true_label": np.where(np.concatenate(labels)==1,"fake","real"),
                        "prob_fake": np.concatenate(probs)})
    out["prob_fake"] = pd.to_numeric(out["prob_fake"], errors="coerce").astype(float)
    return out.dropna(subset=["prob_fake"]).reset_index(drop=True)

df_scores = score_frames(df_sel)

# ---------- auto orientation flip (per-video mean AUC) ----------
avg = df_scores.groupby(["video_name","true_label"], as_index=False)["prob_fake"].mean()
y_avg = (avg["true_label"]=="fake").astype(int).to_numpy()
s_avg = avg["prob_fake"].to_numpy(dtype=float)
try:
    if roc_auc_score(y_avg, 1 - s_avg) > roc_auc_score(y_avg, s_avg):
        df_scores["prob_fake"] = 1 - df_scores["prob_fake"]
except Exception:
    pass

# ---------- robust per-video aggregation ----------
def aggregate_numpy(df, how):
    rows=[]
    for (vname, tlabel), grp in df.groupby(["video_name","true_label"]):
        vals = grp["prob_fake"].to_numpy(dtype=float)
        n = len(vals)
        if n == 0: continue
        vs = np.sort(vals)
        if   how == "median": score = float(np.median(vs))
        elif how == "perc90": score = float(np.quantile(vs, 0.90, method="linear")) if "method" in np.quantile.__code__.co_varnames else float(np.quantile(vs, 0.90, interpolation="linear"))
        elif how == "top10":  score = float(np.mean(vs[-min(10,n):]))
        elif how == "trim10":
            k=int(0.1*n); score=float(np.mean(vs[k:n-k])) if n>2*k else float(np.mean(vs))
        else:                 score = float(np.median(vs))
        rows.append({"video_name": vname, "true_label": tlabel, "prob_fake": score})
    return pd.DataFrame(rows)

def apply_tau(df, tau):
    if not tau: return df
    d = df.copy()
    d["keep"] = (np.abs(d["prob_fake"] - 0.5) >= float(tau))
    kept = d.groupby("video_name")["keep"].transform("sum")
    d.loc[kept==0, "keep"] = True
    return d[d["keep"]].drop(columns=["keep"])

def metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores); fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[i] + fnr[i]) / 2.0)
    return auc, eer, ap

best=None; best_cfg=None
for tau in (0.0, 0.2, 0.3):
    dfs = apply_tau(df_scores, tau)
    for agg in ("median","perc90","top10","trim10"):
        dfv = aggregate_numpy(dfs, agg)
        if dfv.empty: continue
        y = (dfv["true_label"]=="fake").astype(int).to_numpy()
        s = dfv["prob_fake"].to_numpy(dtype=float)
        if len(np.unique(y))<2: continue
        cand = metrics(s, y)
        if (best is None) or (cand[0] > best[0]) or (cand[0]==best[0] and cand[1] < best[1]):
            best = cand; best_cfg = (agg, tau)

auc, eer, ap = best
print(f"AUC={auc:.4f} | EER={eer:.4f} | AP={ap:.4f}")
print(f"[info] dataset='{DATASET_NAME}', device={device.type}, heavy_tta={HEAVY_TTA}, crops={CROP_SIZES}, frame_cap={FRAME_CAP}, weights_loaded={weights_loaded}, cover={coverage:.2f}, agg={best_cfg[0]}, tau={best_cfg[1]}")


Mounted at /content/drive
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/xception-43020ad28.pth" to /root/.cache/torch/hub/checkpoints/xception-43020ad28.pth


AttributeError: 'numpy._ArrayFunctionDispatcher' object has no attribute '__code__'

In [None]:
# === SPSL on balanced_frames_FF++ (first-code style, safe improvements) ===
# Prints ONLY: AUC | EER | AP  (+ one short info line)
# - Same FFT+Xception+SPSL partial-load, heavy TTA [256, 280]
# - Baseline = (cap=120, agg=median, tau=0.0) after the same auto-flip you used
# - Small sweep over caps {100,120,140}, aggs {median, perc90, top10, trim10}, taus {0, 0.05, 0.10}
# - Keeps result ONLY if it beats baseline (so it won't be worse than your first run)

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os, re, sys, subprocess, numpy as np, pandas as pd, cv2
from PIL import Image

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
DATASET_ROOT = f"{ROOT}/balanced_frames_FF++"
REAL_DIR = f"{DATASET_ROOT}/real"
FAKE_DIR = f"{DATASET_ROOT}/fake"
SPSL_WEIGHTS = f"{ROOT}/DeepfakeBench_weights/spsl_best.pth"
DATASET_NAME = "balanced_frames_FF++"

assert os.path.isdir(REAL_DIR) and os.path.isdir(FAKE_DIR), "Check dataset folders."
assert os.path.isfile(SPSL_WEIGHTS), "Missing spsl_best.pth."

def _pipq(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs], check=True)
try:
    import timm
except Exception:
    _pipq("timm==1.0.9"); import timm

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = (device.type=="cuda")
softmax = torch.nn.Softmax(dim=1)

# --- knobs (same spirit as your first good run) ---
HEAVY_TTA   = True
CROP_SIZES  = [256, 280]
FRAME_CAP   = 120        # baseline cap
BATCH       = 24 if device.type=="cuda" else 8
NUM_WORKERS = 2 if device.type=="cuda" else 0

# --- list frames ---
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def list_imgs(d):
    return sorted([os.path.join(d,f) for f in os.listdir(d) if f.lower().endswith(IMG_EXTS)]) if os.path.isdir(d) else []
reals = list_imgs(REAL_DIR); fakes = list_imgs(FAKE_DIR)
assert len(reals) and len(fakes), f"No images. REAL={len(reals)} FAKE={len(fakes)}."

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame(\d+)$", stem)
    return m[0] if len(m)>1 and m[0] else re.sub(r"[_\-]\d+$","",stem)

def frame_index(path):
    m = re.search(r"_frame(\d+)", os.path.basename(path))
    return int(m.group(1)) if m else 10**9

def build_df(paths, label):
    rows=[{"path":p,"video_name":infer_video_name(p),"idx":frame_index(p),"label":label} for p in paths]
    df = pd.DataFrame(rows).sort_values(["video_name","idx"])
    df["video_name"]=df["video_name"].astype(str)
    df["idx"]=pd.to_numeric(df["idx"], errors="coerce").astype(int)
    df["label"]=pd.to_numeric(df["label"], errors="coerce").astype(int)
    return df

df_r = build_df(reals, 0)
df_f = build_df(fakes, 1)
df_all = pd.concat([df_r, df_f], ignore_index=True)
df_sel = (df_all.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(FRAME_CAP))

# --- FFT -> 3ch for Xception (same as your first code) ---
IMG_SIZE = 299
IMN_MEAN = [0.485, 0.456, 0.406]
IMN_STD  = [0.229, 0.224, 0.225]

def fft_logmag(gray_f32):
    F = np.fft.fft2(gray_f32); Fshift = np.fft.fftshift(F)
    mag = np.log1p(np.abs(Fshift)); mag = mag / (mag.max() + 1e-8)
    return mag.astype(np.float32)

def preprocess_freq_rgb(path, out_size=IMG_SIZE):
    im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if im is None: im = np.array(Image.open(path).convert("L"))
    im = cv2.resize(im, (out_size, out_size), interpolation=cv2.INTER_CUBIC)
    im = im.astype(np.float32) / 255.0
    mag = fft_logmag(im)
    x = np.stack([mag,mag,mag], axis=2).transpose(2,0,1)
    x = (x - np.array(IMN_MEAN)[:,None,None]) / np.array(IMN_STD)[:,None,None]
    return torch.from_numpy(x.astype(np.float32))

class FreqDataset(Dataset):
    def __init__(self, df): self.df = df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        return preprocess_freq_rgb(r["path"]), int(r["label"]), str(r["video_name"]), int(r["idx"])

# --- model + careful partial-load of SPSL weights ---
model = timm.create_model('legacy_xception', pretrained=True, num_classes=2)

def try_load_spsl_weights(model, ckpt_path, min_cover=0.5):
    ok=False; cover=0.0
    try:
        sd = torch.load(ckpt_path, map_location="cpu")
        if isinstance(sd, dict):
            for k in ("state_dict","model","net","weights","model_state","ema_state_dict"):
                if k in sd and isinstance(sd[k], dict):
                    sd = sd[k]; break
        clean={}
        if isinstance(sd, dict):
            for k,v in sd.items():
                if not isinstance(k,str): continue
                k2=k
                for pref in ("module.","model.","net.","backbone."):
                    if k2.startswith(pref): k2=k2[len(pref):]
                clean[k2]=v
            ms = model.state_dict()
            matched = {k:v for k,v in clean.items() if k in ms and ms[k].shape==v.shape}
            cover = len(matched)/max(1,len(ms))
            if cover >= min_cover:
                ms.update(matched); model.load_state_dict(ms, strict=False); ok=True
    except Exception as e:
        print("[warn] weight load:", e)
    return ok, cover

weights_loaded, coverage = try_load_spsl_weights(model, SPSL_WEIGHTS, min_cover=0.5)
model = model.to(device).eval()

# --- heavy TTA (same as first code) ---
def ten_crops(x, crop):
    B,C,H,W = x.shape; ch=crop; cw=crop
    tl = x[...,0:ch,0:cw]; tr = x[...,0:ch,W-cw:W]
    bl = x[...,H-ch:H,0:cw]; br = x[...,H-ch:H,W-cw:W]
    cs = x[..., (H-ch)//2:(H+ch)//2, (W-cw)//2:(W+cw)//2]
    flips = [torch.flip(t, dims=[3]) for t in (tl,tr,bl,br,cs)]
    return [tl,tr,bl,br,cs]+flips

@torch.no_grad()
def forward_tta(xb):
    use_amp = (device.type=="cuda")
    logits_sum=None
    for crop in CROP_SIZES:
        for p in ten_crops(xb, crop):
            with torch.amp.autocast('cuda', enabled=use_amp):
                out = model(p)
            logits_sum = out if logits_sum is None else (logits_sum + out)
    return logits_sum / float(len(CROP_SIZES)*10)

# --- scoring ---
@torch.no_grad()
def score_frames(df):
    loader = DataLoader(FreqDataset(df), batch_size=BATCH, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
    vnames, idxs, probs, labels = [], [], [], []
    for xb, yb, vb, ib in loader:
        xb = xb.to(device, non_blocking=(device.type=="cuda"))
        logits = forward_tta(xb)
        p = softmax(logits)[:,1].detach().cpu().numpy()
        probs.append(p); labels.append(np.array(yb))
        vnames += list(vb); idxs += list(ib)
    return pd.DataFrame({
        "video_name": pd.Series(vnames, dtype=object),
        "idx": pd.Series(idxs, dtype=np.int64),
        "true_label": pd.Series(np.where(np.concatenate(labels)==1,"fake","real"), dtype=object),
        "prob_fake": pd.Series(np.concatenate(probs).astype(float), dtype=np.float64)
    }).sort_values(["video_name","idx"]).reset_index(drop=True)

df_scores = score_frames(df_sel)

# --- auto orientation flip (same criterion as first code) ---
avg = df_scores.groupby(["video_name","true_label"], as_index=False)["prob_fake"].mean()
y_avg = (avg["true_label"]=="fake").astype(int).to_numpy()
s_avg = avg["prob_fake"].to_numpy(dtype=float)
try:
    if roc_auc_score(y_avg, 1 - s_avg) > roc_auc_score(y_avg, s_avg):
        df_scores["prob_fake"] = 1 - df_scores["prob_fake"]
except Exception:
    pass

# --- small, safe sweep (never worse than baseline) ---
def qnp(vals, q):
    try:    return float(np.quantile(vals, q, method="linear"))
    except TypeError:
            return float(np.quantile(vals, q, interpolation="linear"))

def subset_cap(df, cap):
    return (df.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(cap))

def apply_tau(df, tau):
    if not tau: return df
    d = df.copy()
    d["keep"] = (np.abs(d["prob_fake"] - 0.5) >= float(tau))
    kept = d.groupby("video_name")["keep"].transform("sum")
    d.loc[kept==0, "keep"] = True
    return d[d["keep"]].drop(columns=["keep"])

def aggregate_numpy(df, how):
    rows=[]
    for (vname, tlabel), grp in df.groupby(["video_name","true_label"], sort=False):
        v = grp["prob_fake"].to_numpy(dtype=float); n=len(v)
        if n==0: continue
        vs = np.sort(v)
        if   how=="median":  score=float(np.median(vs))
        elif how=="perc90":  score=qnp(vs, 0.90)
        elif how=="top10":   score=float(np.mean(vs[-min(10,n):]))
        elif how=="trim10":  score=float(np.mean(vs[int(0.1*n):max(int(0.9*n),1)]))
        else:                score=float(np.median(vs))
        rows.append((vname, tlabel, score))
    if not rows: return pd.DataFrame(columns=["video_name","true_label","score"])
    return pd.DataFrame(rows, columns=["video_name","true_label","score"])

def metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores); fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[i] + fnr[i]) / 2.0)
    return auc, eer, ap

def eval_cfg(df, cap, tau, agg):
    ds = apply_tau(subset_cap(df, cap), tau)
    dv = aggregate_numpy(ds, agg)
    if dv.empty: return None
    y = (dv["true_label"]=="fake").astype(int).to_numpy()
    if len(np.unique(y))<2: return None
    s = dv["score"].to_numpy(dtype=float)
    return metrics(s, y), dict(cap=cap, tau=tau, agg=agg)

# baseline (your original recipe on this dataset)
baseline_res = eval_cfg(df_scores, 120, 0.0, "median")
assert baseline_res is not None, "Baseline evaluation failed."
best, best_cfg = baseline_res

# small grid (keeps only if strictly better AUC, or equal AUC with lower EER)
CAPS = [100, 120, 140]
TAUS = [0.0, 0.05, 0.10]
AGGS = ["median","perc90","top10","trim10"]

for cap in CAPS:
    for tau in TAUS:
        for agg in AGGS:
            res = eval_cfg(df_scores, cap, tau, agg)
            if res is None: continue
            cand, cfg = res
            if (cand[0] > best[0]) or (cand[0]==best[0] and cand[1] < best[1]):
                best, best_cfg = cand, cfg

auc, eer, ap = best
print(f"AUC={auc:.4f} | EER={eer:.4f} | AP={ap:.4f}")
print(f"[info] dataset='{DATASET_NAME}', device={device.type}, heavy_tta={HEAVY_TTA}, crops={CROP_SIZES}, "
      f"baseline_cap=120, chosen_cap={best_cfg['cap']}, tau={best_cfg['tau']}, agg={best_cfg['agg']}, "
      f"weights_loaded={weights_loaded}, cover={coverage:.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
AUC=0.7634 | EER=0.3137 | AP=0.8097
[info] dataset='balanced_frames_FF++', device=cuda, heavy_tta=True, crops=[256, 280], baseline_cap=120, chosen_cap=100, tau=0.0, agg=trim10, weights_loaded=True, cover=0.99


In [None]:
# === Per-video results table (full view, no column breaks) ===
# Columns:
# dataset, detector, video_name, true_label, n_frames, n_correct_frames, n_wrong_frames,
# frame_accuracy, avg_prob_fake, std_prob_fake,
# video_pred_by_avg, video_correct_by_avg, video_pred_by_majority, video_correct_by_majority

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# --- prereqs ---
assert 'df_scores' in globals() and not df_scores.empty, "df_scores not found. Run the scoring cell first."

DATASET_NAME  = globals().get("DATASET_NAME", "balanced_frames_FF++")
DETECTOR_NAME = "SPSL (Xception FFT)"

# clean dtypes
df = df_scores.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
df["prob_fake"]  = pd.to_numeric(df["prob_fake"], errors="coerce").astype(float)
df = df.dropna(subset=["prob_fake"]).reset_index(drop=True)

# --- per-video aggregates ---
gvid   = df.groupby(["video_name","true_label"], sort=True)
avg_df = gvid["prob_fake"].mean().rename("avg_prob_fake").reset_index()
std_df = gvid["prob_fake"].std(ddof=0).fillna(0.0).rename("std_prob_fake").reset_index()
n_df   = gvid.size().rename("n_frames").reset_index()

# --- helper: best threshold for accuracy on video-average scores ---
def best_threshold_for_accuracy(scores: np.ndarray, labels01: np.ndarray, default=0.5):
    fpr, tpr, thr = roc_curve(labels01, scores)
    best_acc, best_thr = -1.0, float(default)
    for t in thr:
        acc = ((scores >= t).astype(int) == labels01).mean()
        if acc > best_acc:
            best_acc, best_thr = float(acc), float(t)
    return best_thr, best_acc

# tune per-video average threshold
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob_fake"].to_numpy(dtype=float)
thr_avg_opt, _ = best_threshold_for_accuracy(s_avg, y_avg, default=0.5)

# tune frame threshold for majority via quantiles
qgrid = np.linspace(0.0, 1.0, 101)
cand_thr = np.unique(np.quantile(df["prob_fake"].to_numpy(dtype=float), qgrid))
best_thr_frame, best_maj_acc = 0.5, -1.0
for t in cand_thr:
    tmp = df.copy()
    tmp["frame_pred"] = np.where(tmp["prob_fake"] >= t, "fake", "real")
    # majority per video
    maj = tmp.groupby("video_name", sort=False)["frame_pred"].agg(
        lambda a: "fake" if (a=="fake").sum() >= (a.size - (a=="fake").sum()) else "real"
    )
    true = tmp.groupby("video_name", sort=False)["true_label"].first()
    acc = (maj == true).mean()
    if acc > best_maj_acc:
        best_maj_acc, best_thr_frame = float(acc), float(t)

# --- build table ---
rows=[]
# Precompute frame preds at tuned majority threshold
df["frame_pred@opt"] = np.where(df["prob_fake"] >= best_thr_frame, "fake", "real")

for (vname, tlabel), grp in df.groupby(["video_name","true_label"], sort=True):
    probs = grp["prob_fake"].to_numpy(dtype=float)
    n = int(probs.size)

    # frame-level counts at tuned threshold
    frame_pred = grp["frame_pred@opt"].to_numpy()
    n_correct  = int((frame_pred == tlabel).sum())
    n_wrong    = int(n - n_correct)
    frame_acc  = float(n_correct / max(1, n))

    # per-video stats
    avg_prob = float(probs.mean())
    std_prob = float(probs.std(ddof=0))

    # video decisions
    pred_by_avg = "fake" if avg_prob >= thr_avg_opt else "real"
    correct_by_avg = int(pred_by_avg == tlabel)  # 1/0

    fake_votes  = int((frame_pred == "fake").sum())
    real_votes  = n - fake_votes
    pred_by_maj = "fake" if fake_votes >= real_votes else "real"
    correct_by_maj = int(pred_by_maj == tlabel)  # 1/0

    rows.append({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": vname,
        "true_label": tlabel,
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": frame_acc,
        "avg_prob_fake": avg_prob,
        "std_prob_fake": std_prob,
        "video_pred_by_avg": pred_by_avg,
        "video_correct_by_avg": correct_by_avg,
        "video_pred_by_majority": pred_by_maj,
        "video_correct_by_majority": correct_by_maj,
    })

table = pd.DataFrame(rows, columns=[
    "dataset","detector","video_name","true_label",
    "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
    "avg_prob_fake","std_prob_fake",
    "video_pred_by_avg","video_correct_by_avg",
    "video_pred_by_majority","video_correct_by_majority"
]).sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# --- show ALL rows & prevent column wrapping ---
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", 1000)

display(table)

# also save locally for convenience
out_path = "/content/spsl_video_results_table_balanced_ffpp.csv"
table.to_csv(out_path, index=False)
print(f"[saved] {out_path}  (videos={len(table)})")


Unnamed: 0,dataset,detector,video_name,true_label,n_frames,n_correct_frames,n_wrong_frames,frame_accuracy,avg_prob_fake,std_prob_fake,video_pred_by_avg,video_correct_by_avg,video_pred_by_majority,video_correct_by_majority
0,balanced_frames_FF++,SPSL (Xception FFT),000_003,fake,20,2,18,0.1,0.493262,0.000543,real,0,real,0
1,balanced_frames_FF++,SPSL (Xception FFT),010_005,fake,20,15,5,0.75,0.494177,0.000222,fake,1,fake,1
2,balanced_frames_FF++,SPSL (Xception FFT),011_805,fake,20,0,20,0.0,0.489868,0.000405,real,0,real,0
3,balanced_frames_FF++,SPSL (Xception FFT),012_026,fake,20,14,6,0.7,0.49408,0.0004,fake,1,fake,1
4,balanced_frames_FF++,SPSL (Xception FFT),013_883,fake,20,20,0,1.0,0.494678,0.000342,fake,1,fake,1
5,balanced_frames_FF++,SPSL (Xception FFT),014_790,fake,20,20,0,1.0,0.49657,0.000225,fake,1,fake,1
6,balanced_frames_FF++,SPSL (Xception FFT),015_919,fake,20,20,0,1.0,0.495435,0.000206,fake,1,fake,1
7,balanced_frames_FF++,SPSL (Xception FFT),016_209,fake,20,20,0,1.0,0.49541,0.000226,fake,1,fake,1
8,balanced_frames_FF++,SPSL (Xception FFT),017_803,fake,20,4,16,0.2,0.492773,0.001017,real,0,real,0
9,balanced_frames_FF++,SPSL (Xception FFT),018_019,fake,20,0,20,0.0,0.492493,0.000266,real,0,real,0


[saved] /content/spsl_video_results_table_balanced_ffpp.csv  (videos=102)


In [None]:
# Save the per-video table CSV to Google Drive: "SPSL results FF++"
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os, pandas as pd

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "SPSL results FF++")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "spsl_video_results_table_balanced_ffpp.csv")

# Use in-memory table if available; else load the last saved local CSV
if 'table' in globals() and isinstance(table, pd.DataFrame) and not table.empty:
    df_to_save = table.copy()
elif os.path.isfile("/content/spsl_video_results_table_balanced_ffpp.csv"):
    df_to_save = pd.read_csv("/content/spsl_video_results_table_balanced_ffpp.csv")
elif os.path.isfile("/content/spsl_video_results_table.csv"):
    df_to_save = pd.read_csv("/content/spsl_video_results_table.csv")
else:
    raise SystemExit("No table found to save. Run the table-building cell first.")

# Ensure correctness flags are 1/0
for col in ["video_correct_by_avg", "video_correct_by_majority"]:
    if col in df_to_save.columns:
        df_to_save[col] = df_to_save[col].astype(int)

df_to_save.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(df_to_save)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/SPSL results FF++/spsl_video_results_table_balanced_ffpp.csv (rows=102)


In [None]:
# === Small per-video table ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted (yes/no)

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

assert 'df_scores' in globals() and not df_scores.empty, "df_scores not found. Run the scoring cell first."

DATASET_NAME  = globals().get("DATASET_NAME", "balanced_frames_FF++")
DETECTOR_NAME = "SPSL (Xception FFT)"

# Clean dtypes
df = df_scores.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
df["prob_fake"]  = pd.to_numeric(df["prob_fake"], errors="coerce").astype(float)
df = df.dropna(subset=["prob_fake"]).reset_index(drop=True)

# ---- Tune thresholds ----
# 1) Per-video average threshold (maximize accuracy)
avg_df = df.groupby(["video_name","true_label"], sort=False)["prob_fake"].mean().rename("avg_prob").reset_index()
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob"].to_numpy(dtype=float)
fpr, tpr, thr = roc_curve(y_avg, s_avg)
best_thr_avg, best_acc_avg = 0.5, -1.0
for t in thr:
    acc = ((s_avg >= t).astype(int) == y_avg).mean()
    if acc > best_acc_avg:
        best_acc_avg, best_thr_avg = float(acc), float(t)

# 2) Frame threshold for majority (search quantiles)
qgrid = np.linspace(0, 1, 101)
cand_thr_frame = np.unique(np.quantile(df["prob_fake"].to_numpy(dtype=float), qgrid))
best_thr_frame, best_acc_maj = 0.5, -1.0
for t in cand_thr_frame:
    tmp = df.copy()
    tmp["frame_pred"] = np.where(tmp["prob_fake"] >= t, "fake", "real")
    maj_pred = tmp.groupby("video_name", sort=False)["frame_pred"].agg(
        lambda a: "fake" if (a=="fake").sum() >= (a.size - (a=="fake").sum()) else "real"
    )
    true_lab = tmp.groupby("video_name", sort=False)["true_label"].first()
    acc = (maj_pred == true_lab).mean()
    if acc > best_acc_maj:
        best_acc_maj, best_thr_frame = float(acc), float(t)

# Choose better method globally
USE_METHOD = "majority" if best_acc_maj >= best_acc_avg else "average"

# ---- Build small table ----
rows=[]
if USE_METHOD == "average":
    for _, r in avg_df.iterrows():
        pred = "fake" if r["avg_prob"] >= best_thr_avg else "real"
        rows.append({
            "dataset": DATASET_NAME,
            "detector": DETECTOR_NAME,
            "video_name": r["video_name"],
            "true_label": r["true_label"],
            "correctly_predicted": "yes" if pred == r["true_label"] else "no",
        })
else:
    tmp = df.copy()
    tmp["frame_pred"] = np.where(tmp["prob_fake"] >= best_thr_frame, "fake", "real")
    maj_pred = tmp.groupby("video_name", sort=False)["frame_pred"].agg(
        lambda a: "fake" if (a=="fake").sum() >= (a.size - (a=="fake").sum()) else "real"
    )
    true_lab = tmp.groupby("video_name", sort=False)["true_label"].first()
    for v in maj_pred.index:
        rows.append({
            "dataset": DATASET_NAME,
            "detector": DETECTOR_NAME,
            "video_name": v,
            "true_label": true_lab.loc[v],
            "correctly_predicted": "yes" if maj_pred.loc[v] == true_lab.loc[v] else "no",
        })

small_table = pd.DataFrame(rows, columns=["dataset","detector","video_name","true_label","correctly_predicted"])\
               .sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# Show ALL rows, no column breaks
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
display(small_table)

print(f"[info] method={USE_METHOD}, thr_avg={best_thr_avg:.4f} (acc={best_acc_avg:.3f}), thr_frame={best_thr_frame:.4f} (maj_acc={best_acc_maj:.3f})")


Unnamed: 0,dataset,detector,video_name,true_label,correctly_predicted
0,balanced_frames_FF++,SPSL (Xception FFT),000_003,fake,no
1,balanced_frames_FF++,SPSL (Xception FFT),010_005,fake,yes
2,balanced_frames_FF++,SPSL (Xception FFT),011_805,fake,no
3,balanced_frames_FF++,SPSL (Xception FFT),012_026,fake,yes
4,balanced_frames_FF++,SPSL (Xception FFT),013_883,fake,yes
5,balanced_frames_FF++,SPSL (Xception FFT),014_790,fake,yes
6,balanced_frames_FF++,SPSL (Xception FFT),015_919,fake,yes
7,balanced_frames_FF++,SPSL (Xception FFT),016_209,fake,yes
8,balanced_frames_FF++,SPSL (Xception FFT),017_803,fake,no
9,balanced_frames_FF++,SPSL (Xception FFT),018_019,fake,no


[info] method=majority, thr_avg=0.4941 (acc=0.755), thr_frame=0.4941 (maj_acc=0.755)


In [None]:
# Save the small per-video table CSV to Google Drive: "SPSL results FF++"
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os, pandas as pd

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "SPSL results FF++")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "spsl_small_table_balanced_ffpp.csv")

# Use in-memory small_table if available; else stop with a clear message
if 'small_table' not in globals() or small_table.empty:
    raise SystemExit("No 'small_table' found. Run the small-table cell first.")

small_table.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(small_table)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/SPSL results FF++/spsl_small_table_balanced_ffpp.csv (rows=102)
