In [None]:
# === SRM on Celeb-DF (EffB4 frames or Cropped Faces) — prints ONLY: SRM model loaded + AUC | EER | AP ===
# SRM residual filters (KB/KV/H2) + Xception head, quiet ImageNet preload, partial-load SRM weights,
# optional late-fusion with RGB, heavy TTA, and a small, safe sweep to pick the best per-video aggregation.

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

# ---------- paths & dataset choice ----------
import os, re, io, contextlib, subprocess, numpy as np, pandas as pd, cv2
from PIL import Image

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
USE_DATASET = "cropped_faces"   # options: "cropped_faces", "celebdf_effb4"

if USE_DATASET == "cropped_faces":
    REAL_DIR = f"{ROOT}/frames_cropped_faces/real"
    FAKE_DIR = f"{ROOT}/frames_cropped_faces/fake"
    DATASET_NAME = "Celeb-DF (Cropped faces)"
else:
    REAL_DIR = f"{ROOT}/frames/celebdf_effb4/real"
    FAKE_DIR = f"{ROOT}/frames/celebdf_effb4/fake"
    DATASET_NAME = "Celeb-DF (EffB4 frames)"

SRM_WEIGHTS = f"{ROOT}/DeepfakeBench_weights/srm_best.pth"
assert os.path.isdir(REAL_DIR) and os.path.isdir(FAKE_DIR), f"Check dataset: {REAL_DIR} / {FAKE_DIR}"
assert os.path.isfile(SRM_WEIGHTS), "Missing srm_best.pth in DeepfakeBench_weights."

# ---------- deps ----------
def _pipq(*pkgs): subprocess.run([os.sys.executable, "-m", "pip", "install", "-q", *pkgs], check=True)
try:
    import timm
except Exception:
    _pipq("timm==1.0.9"); import timm

import torch, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.hub import load_state_dict_from_url
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# ---------- hardware / knobs ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = (device.type=="cuda")
softmax = torch.nn.Softmax(dim=1)

IMG_SIZE=299
CROP_SIZES=[256, 280]
FRAME_CAP=120              # per video; adjust to 100–150 depending on time
BATCH=24 if device.type=="cuda" else 8
NUM_WORKERS=2 if device.type=="cuda" else 0

# ---------- utils ----------
IMG_EXTS=(".jpg",".jpeg",".png",".bmp",".webp")
def list_imgs(d):
    return sorted([os.path.join(d,f) for f in os.listdir(d) if f.lower().endswith(IMG_EXTS)]) if os.path.isdir(d) else []
reals, fakes = list_imgs(REAL_DIR), list_imgs(FAKE_DIR)
assert len(reals) and len(fakes), f"No images found. REAL={len(reals)} FAKE={len(fakes)}."

def infer_video_name(p):
    stem = os.path.splitext(os.path.basename(p))[0]
    m = re.split(r"_frame(\d+)$", stem)
    return m[0] if len(m)>1 and m[0] else re.sub(r"[_\-]\d+$","",stem)

def frame_index(p):
    m = re.search(r"_frame(\d+)", os.path.basename(p))
    return int(m.group(1)) if m else 10**9

def build_df(paths, label):
    rows=[{"path":p,"video_name":infer_video_name(p),"idx":frame_index(p),"label":label} for p in paths]
    return pd.DataFrame(rows).sort_values(["video_name","idx"])

df_sel = pd.concat([build_df(reals,0), build_df(fakes,1)], ignore_index=True)
df_sel = df_sel.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(FRAME_CAP).reset_index(drop=True)

# ---------- SRM filters + RGB preprocess ----------
def srm_kernels():
    KB=np.array([[0,0,0,0,0],[0,-1,2,-1,0],[0,2,-4,2,0],[0,-1,2,-1,0],[0,0,0,0,0]],np.float32)/4.0
    KV=np.array([[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]],np.float32)/12.0
    H2=np.array([[0,0,0,0,0],[0,0,0,0,0],[0,1,-2,1,0],[0,0,0,0,0],[0,0,0,0,0]],np.float32)/2.0
    K=np.zeros((3,3,5,5),np.float32)
    for c in range(3): K[0,c]=KB; K[1,c]=KV; K[2,c]=H2
    return torch.from_numpy(K)
SRM_WEIGHT = srm_kernels()

IMN_MEAN=np.array([0.485,0.456,0.406],np.float32)
IMN_STD =np.array([0.229,0.224,0.225],np.float32)

def prep_srm_rgb(path, out=IMG_SIZE):
    im=cv2.imread(path,cv2.IMREAD_COLOR)
    if im is None: im=cv2.cvtColor(np.array(Image.open(path).convert("RGB")),cv2.COLOR_RGB2BGR)
    im=cv2.cvtColor(im,cv2.COLOR_BGR2RGB)
    im=cv2.resize(im,(out,out),interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    x=torch.from_numpy(im.transpose(2,0,1)).unsqueeze(0)                           # [1,3,H,W]
    y=F.conv2d(x, SRM_WEIGHT, stride=1, padding=2).clamp_(-3,3).squeeze(0).numpy().transpose(1,2,0)  # HxWx3
    y=(y+3.0)/6.0
    y=y.transpose(2,0,1); y=(y-IMN_MEAN[:,None,None])/IMN_STD[:,None,None]
    return torch.from_numpy(y.astype(np.float32))

def prep_rgb(path, out=IMG_SIZE):
    im=cv2.imread(path,cv2.IMREAD_COLOR)
    if im is None: im=cv2.cvtColor(np.array(Image.open(path).convert("RGB")),cv2.COLOR_RGB2BGR)
    im=cv2.cvtColor(im,cv2.COLOR_BGR2RGB)
    im=cv2.resize(im,(out,out),interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    x=im.transpose(2,0,1); x=(x-IMN_MEAN[:,None,None])/IMN_STD[:,None,None]
    return torch.from_numpy(x.astype(np.float32))

class DSSRM(Dataset):
    def __init__(self, df): self.df=df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self,i):
        r=self.df.iloc[i]; return prep_srm_rgb(r["path"]), int(r["label"]), str(r["video_name"]), int(r["idx"])

class DSRGB(Dataset):
    def __init__(self, df): self.df=df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self,i):
        r=self.df.iloc[i]; return prep_rgb(r["path"]), int(r["label"]), str(r["video_name"]), int(r["idx"])

# ---------- model (quiet ImageNet + partial SRM load) ----------
model = timm.create_model('legacy_xception', pretrained=False, num_classes=2)
IMAGENET_URL="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/xception-43020ad28.pth"
with contextlib.redirect_stdout(io.StringIO()):
    iw = load_state_dict_from_url(IMAGENET_URL, progress=False, map_location="cpu")
ms=model.state_dict(); matched={k:v for k,v in iw.items() if k in ms and ms[k].shape==v.shape}
ms.update(matched); model.load_state_dict(ms, strict=False)

def try_load_srm(model, ckpt, min_cover=0.5):
    ok=False; cover=0.0
    try:
        sd=torch.load(ckpt, map_location="cpu")
        if isinstance(sd,dict):
            for k in ("state_dict","model","net","weights","model_state","ema_state_dict"):
                if k in sd and isinstance(sd[k],dict): sd=sd[k]; break
        clean={}
        if isinstance(sd,dict):
            for k,v in sd.items():
                if not isinstance(k,str): continue
                k2=k
                for pref in ("module.","model.","net.","backbone.","backbone_rgb.","backbone_srm."):
                    if k2.startswith(pref): k2=k2[len(pref):]
                clean[k2]=v
            ms2=model.state_dict()
            matched2={k:v for k,v in clean.items() if k in ms2 and ms2[k].shape==v.shape}
            cover=len(matched2)/max(1,len(ms2))
            if cover>=min_cover:
                ms2.update(matched2); model.load_state_dict(ms2, strict=False); ok=True
    except Exception as e:
        print("[warn] SRM load:", e)
    return ok, cover

weights_loaded, coverage = try_load_srm(model, SRM_WEIGHTS, 0.5)
model = model.to(device).eval()
print("SRM model loaded")  # requested line

# ---------- TTA ----------
def ten_crops(x, crop):
    B,C,H,W=x.shape; ch=crop; cw=crop
    tl=x[...,0:ch,0:cw]; tr=x[...,0:ch,W-cw:W]; bl=x[...,H-ch:H,0:cw]; br=x[...,H-ch:H,W-cw:W]
    cs=x[...,(H-ch)//2:(H+ch)//2,(W-cw)//2:(W+cw)//2]
    flips=[torch.flip(t, dims=[3]) for t in (tl,tr,bl,br,cs)]
    return [tl,tr,bl,br,cs]+flips

@torch.no_grad()
def forward_tta(xb):
    use_amp=(device.type=="cuda"); s=None
    for crop in CROP_SIZES:
        for p in ten_crops(xb, crop):
            with torch.amp.autocast('cuda', enabled=use_amp):
                out = model(p)
            s = out if s is None else (s + out)
    return s / float(len(CROP_SIZES)*10)

# ---------- scoring ----------
@torch.no_grad()
def score(df, mode="srm"):
    ds = DSSRM(df) if mode=="srm" else DSRGB(df)
    loader = DataLoader(ds, batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
    vnames, idxs, probs, logits, labels = [], [], [], [], []
    for xb, yb, vb, ib in loader:
        xb = xb.to(device, non_blocking=(device.type=="cuda"))
        lo = forward_tta(xb)
        p  = softmax(lo)[:,1].detach().cpu().numpy()
        lg = (lo[:,1]-lo[:,0]).detach().cpu().numpy()
        vnames += list(vb); idxs += list(ib); probs.append(p); logits.append(lg); labels.append(np.array(yb))
    out = pd.DataFrame({
        "video_name": pd.Series(vnames, dtype=object),
        "idx": pd.Series(idxs, dtype=np.int64),
        "true_label": pd.Series(np.where(np.concatenate(labels)==1,"fake","real"), dtype=object),
        "prob_fake": pd.Series(np.concatenate(probs).astype(float), dtype=np.float64),
        "logit": pd.Series(np.concatenate(logits).astype(float), dtype=np.float64),
    })
    return out.sort_values(["video_name","idx"]).reset_index(drop=True)

df_srm = score(df_sel, "srm")
df_rgb = score(df_sel, "rgb")  # late fusion option

def make_ens(alpha=0.6):
    d=pd.merge(df_srm, df_rgb, on=["video_name","idx","true_label"], how="inner", suffixes=("_srm","_rgb"))
    if d.empty: return d
    d["logit"] = alpha*d["logit_srm"] + (1-alpha)*d["logit_rgb"]
    d["prob_fake"] = 1.0/(1.0+np.exp(-d["logit"]))
    return d[["video_name","idx","true_label","prob_fake","logit"]]

modes = {"srm": df_srm, "rgb": df_rgb}
ens = make_ens(0.6)
if not ens.empty: modes["ens0.6"] = ens

# ---------- helpers ----------
def qnp(vals, q):
    try:    return float(np.quantile(vals, q, method="linear"))
    except TypeError:
            return float(np.quantile(vals, q, interpolation="linear"))

def subset_cap(df, cap):
    return df.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(cap)

def apply_tau(df, tau, field):
    if not tau: return df
    d = df.copy()
    conf = np.abs((d["prob_fake"] - 0.5) if field=="prob" else d["logit"])
    d["keep"] = conf >= float(tau)
    kept = d.groupby("video_name")["keep"].transform("sum")
    d.loc[kept==0, "keep"] = True
    return d[d["keep"]].drop(columns=["keep"])

def aggregate_numpy(df, how, vals, conf=None):
    rows=[]
    dd = df.copy()
    dd["_val"] = vals
    if conf is not None: dd["_conf"] = conf
    for (v,t), g in dd.groupby(["video_name","true_label"], sort=False):
        v_arr = g["_val"].to_numpy(dtype=float)
        n = v_arr.size
        if n == 0: continue
        vs = np.sort(v_arr)
        if   how=="median":  score=float(np.median(vs))
        elif how=="perc90":  score=qnp(vs,0.90)
        elif how=="perc95":  score=qnp(vs,0.95)
        elif how=="top10":   score=float(np.mean(vs[-min(10,n):]))
        elif how=="trim10":
            k=int(0.1*n); lo=k; hi=max(n-k,1); score=float(np.mean(vs[lo:hi]))
        elif how=="wmean" and conf is not None:
            cf = g["_conf"].to_numpy(dtype=float); wsum = float(np.sum(cf))
            score = float(np.sum(v_arr*cf) / max(wsum,1e-8))
        else: score=float(np.median(vs))
        rows.append((v, t, score))
    return pd.DataFrame(rows, columns=["video_name","true_label","score"])

def metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores); fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[i] + fnr[i]) / 2.0)
    return auc, eer, ap

def eval_cfg(df_base, cap, tau, agg, flip, field, temp=1.0):
    ds = subset_cap(df_base, cap).copy()
    lg = ds["logit"].to_numpy(dtype=float)
    # build vals/conf
    if field=="prob":
        vals = 1.0/(1.0+np.exp(-(temp*lg)))
        if flip: vals = 1.0 - vals
        conf = np.abs(vals - 0.5)
    else:
        vals = temp*lg
        if flip: vals = -vals
        conf = np.abs(vals)
    # filter
    ds = apply_tau(ds, tau, field=("prob" if field=="prob" else "logit"))
    if ds.empty: return None
    lg = ds["logit"].to_numpy(dtype=float)
    if field=="prob":
        vals = 1.0/(1.0+np.exp(-(temp*lg)))
        if flip: vals = 1.0 - vals
        conf = np.abs(vals - 0.5)
    else:
        vals = temp*lg
        if flip: vals = -vals
        conf = np.abs(vals)
    dv = aggregate_numpy(ds, agg, vals, conf=(conf if agg=="wmean" else None))
    if dv.empty: return None
    y = (dv["true_label"]=="fake").astype(int).to_numpy()
    if len(np.unique(y))<2: return None
    s = dv["score"].to_numpy(dtype=float)
    return metrics(s, y), dict(cap=cap, tau=tau, agg=agg, flip=flip, field=field, temp=temp)

# ---------- small, safe sweep (kept light to save GPU) ----------
CAPS   = [100, 120]
TAU_P  = [0.00, 0.02, 0.05]
TAU_L  = [0.00, 0.50, 1.00]
AGGS   = ["median","perc90","top10","trim10","wmean"]
FLIPS  = [False, True]
FIELDS = ["prob","logit"]
TEMPS  = [0.75, 1.0]

best=None; best_cfg=None; best_mode=None
for mode_name, df_mode in modes.items():
    for cap in CAPS:
        for field in FIELDS:
            taus = TAU_P if field=="prob" else TAU_L
            for tau in taus:
                for agg in AGGS:
                    for flip in FLIPS:
                        for temp in TEMPS:
                            res = eval_cfg(df_mode, cap, tau, agg, flip, field, temp)
                            if res is None: continue
                            cand, cfg = res
                            if (best is None) or (cand[0] > best[0]) or (cand[0]==best[0] and cand[1] < best[1]):
                                best, best_cfg, best_mode = cand, cfg, mode_name

# ---------- print matrices ----------
if best is None:
    raise SystemExit("No valid configuration produced metrics. Check data/paths.")
auc, eer, ap = best
print(f"AUC={auc:.4f} | EER={eer:.4f} | AP={ap:.4f}")
print(f"[info] dataset='{DATASET_NAME}', device={device.type}, mode={best_mode}, cap={best_cfg['cap']}, "
      f"tau={best_cfg['tau']}, agg={best_cfg['agg']}, flip={best_cfg['flip']}, field={best_cfg['field']}, "
      f"temp={best_cfg['temp']}, weights_loaded={weights_loaded}, cover={coverage:.2f}")


Mounted at /content/drive
SRM model loaded
AUC=0.8278 | EER=0.3000 | AP=0.7929
[info] dataset='Celeb-DF (Cropped faces)', device=cuda, mode=rgb, cap=100, tau=0.5, agg=median, flip=False, field=logit, temp=0.75, weights_loaded=True, cover=0.99


In [None]:
# === SRM large table (n_frames = 20 per video) — Celeb-DF ===
# Columns:
# dataset, detector, video_name, true_label, n_frames, n_correct_frames, n_wrong_frames,
# frame_accuracy, avg_prob_fake, std_prob_fake, video_pred_by_avg, video_correct_by_avg,
# video_pred_by_majority, video_correct_by_majority

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# ---- source: use the frame-level results from your Celeb-DF run ----
# Prefer df_rgb (your best mode). Fallback to df_scores if needed.
if 'df_rgb' in globals() and not df_rgb.empty:
    df_src = df_rgb.copy()
elif 'df_scores' in globals() and not df_scores.empty:
    df_src = df_scores.copy()
else:
    raise SystemExit("No frame-level results found (df_rgb/df_scores). Run the Celeb-DF SRM scoring cell first.")

DATASET_NAME  = "Celeb-DF (Cropped faces)"
DETECTOR_NAME = "SRM"

# ---- sanitize ----
df = df_src.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
if "prob_fake" not in df.columns:
    raise SystemExit("Frame scores must contain 'prob_fake'.")
df["prob_fake"] = pd.to_numeric(df["prob_fake"], errors="coerce").astype(float)
df = df.dropna(subset=["prob_fake"]).reset_index(drop=True)

# ---- enforce EXACTLY 20 frames per video (take first 20 by idx) ----
CAP = 20
df20 = df.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(CAP).reset_index(drop=True)

# If any video has <20 frames available, we'll still use what's there—but n_frames will then be <20.
# (Most Celeb-DF frame dumps have plenty, so you should see 20 per video.)

# ---- GLOBAL thresholds (computed on these 20-frame subsets) ----
# Frame-level threshold via Youden's J
y_frame = (df20["true_label"]=="fake").astype(int).to_numpy()
s_frame = df20["prob_fake"].to_numpy(dtype=float)
fpr, tpr, thr = roc_curve(y_frame, s_frame)
t_frame = float(thr[np.nanargmax(tpr - fpr)]) if len(thr) else 0.5

# Per-video average threshold via Youden's J (using these same 20-frame subsets)
avg_df = df20.groupby(["video_name","true_label"], sort=False)["prob_fake"].mean().rename("avg_prob").reset_index()
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob"].to_numpy(dtype=float)
fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
t_avg   = float(thr2[np.nanargmax(tpr2 - fpr2)]) if len(thr2) else 0.5

# ---- frame predictions (for majority rule) ----
df20["frame_pred"] = np.where(df20["prob_fake"] >= t_frame, "fake", "real")

# ---- build the table ----
rows=[]
for (vname, tlabel), grp in df20.groupby(["video_name","true_label"], sort=True):
    probs = grp["prob_fake"].to_numpy(dtype=float)
    n = int(probs.size)  # should be 20 per video (if available)

    frame_pred = grp["frame_pred"].to_numpy()
    n_correct  = int((frame_pred == tlabel).sum())
    n_wrong    = int(n - n_correct)
    frame_acc  = float(n_correct / max(1, n))

    avg_prob = float(np.mean(probs)) if n>0 else float('nan')
    std_prob = float(np.std(probs, ddof=0)) if n>0 else float('nan')

    pred_by_avg = "fake" if avg_prob >= t_avg else "real"
    correct_by_avg = int(pred_by_avg == tlabel)

    fake_votes  = int((frame_pred == "fake").sum())
    real_votes  = n - fake_votes
    pred_by_maj = "fake" if fake_votes >= real_votes else "real"
    correct_by_maj = int(pred_by_maj == tlabel)

    rows.append({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": vname,
        "true_label": tlabel,
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": frame_acc,
        "avg_prob_fake": avg_prob,
        "std_prob_fake": std_prob,
        "video_pred_by_avg": pred_by_avg,
        "video_correct_by_avg": correct_by_avg,           # 1/0
        "video_pred_by_majority": pred_by_maj,
        "video_correct_by_majority": correct_by_maj,      # 1/0
    })

table_srm_celebdf_20 = pd.DataFrame(rows, columns=[
    "dataset","detector","video_name","true_label",
    "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
    "avg_prob_fake","std_prob_fake",
    "video_pred_by_avg","video_correct_by_avg",
    "video_pred_by_majority","video_correct_by_majority"
]).sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# ---- display (all rows, no wrapping) ----
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
display(table_srm_celebdf_20)


Unnamed: 0,dataset,detector,video_name,true_label,n_frames,n_correct_frames,n_wrong_frames,frame_accuracy,avg_prob_fake,std_prob_fake,video_pred_by_avg,video_correct_by_avg,video_pred_by_majority,video_correct_by_majority
0,Celeb-DF (Cropped faces),SRM,id0_id1_0000,fake,20,20,0,1.0,0.680664,0.011831,fake,1,fake,1
1,Celeb-DF (Cropped faces),SRM,id0_id1_0001,fake,20,8,12,0.4,0.616797,0.020469,fake,1,real,0
2,Celeb-DF (Cropped faces),SRM,id0_id1_0002,fake,20,0,20,0.0,0.607178,0.002213,fake,1,real,0
3,Celeb-DF (Cropped faces),SRM,id0_id1_0003,fake,20,20,0,1.0,0.717725,0.009478,fake,1,fake,1
4,Celeb-DF (Cropped faces),SRM,id0_id1_0005,fake,20,20,0,1.0,0.652905,0.012655,fake,1,fake,1
5,Celeb-DF (Cropped faces),SRM,id0_id1_0006,fake,20,20,0,1.0,0.711572,0.007,fake,1,fake,1
6,Celeb-DF (Cropped faces),SRM,id0_id1_0007,fake,20,19,1,0.95,0.626855,0.008448,fake,1,fake,1
7,Celeb-DF (Cropped faces),SRM,id0_id1_0009,fake,20,15,5,0.75,0.621069,0.013299,fake,1,fake,1
8,Celeb-DF (Cropped faces),SRM,id0_id2_0000,fake,20,20,0,1.0,0.679639,0.011834,fake,1,fake,1
9,Celeb-DF (Cropped faces),SRM,id0_id2_0001,fake,20,6,14,0.3,0.616748,0.024685,fake,1,real,0


In [None]:
# Save the Celeb-DF SRM table (20-frame version) to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os

# Pick Drive root automatically
ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "SRM results Celeb DF")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "srm_celebdf_video_results_20frames.csv")

# Use the table created in the previous cell
if 'table_srm_celebdf_20' not in globals() or table_srm_celebdf_20.empty:
    raise SystemExit("No 'table_srm_celebdf_20' found. Run the table cell first.")

# Ensure 1/0 ints for correctness columns
for col in ["video_correct_by_avg", "video_correct_by_majority"]:
    if col in table_srm_celebdf_20.columns:
        table_srm_celebdf_20[col] = table_srm_celebdf_20[col].astype(int)

table_srm_celebdf_20.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(table_srm_celebdf_20)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/SRM results Celeb DF/srm_celebdf_video_results_20frames.csv (rows=100)


In [None]:
# === SRM small table (yes/no) — Celeb-DF, 20 frames per video ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# Use the frame-level results from your Celeb-DF run
# Prefer df_rgb (your best mode). Fallback to df_scores if available.
if 'df_rgb' in globals() and not df_rgb.empty:
    df_src = df_rgb.copy()
elif 'df_scores' in globals() and not df_scores.empty:
    df_src = df_scores.copy()
else:
    raise SystemExit("No frame-level results found (df_rgb/df_scores). Run the Celeb-DF SRM scoring cell first.")

DATASET_NAME  = "Celeb-DF (Cropped faces)"
DETECTOR_NAME = "SRM"

# Clean & enforce 20 frames/video
df = df_src.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
if "prob_fake" not in df.columns:
    raise SystemExit("Missing 'prob_fake' in frame scores.")
df["prob_fake"] = pd.to_numeric(df["prob_fake"], errors="coerce").astype(float)
df = df.dropna(subset=["prob_fake"]).reset_index(drop=True)

CAP = 20
df20 = df.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(CAP).reset_index(drop=True)

# Global thresholds on these 20-frame subsets
# 1) Frame-level threshold (Youden's J)
y_frame = (df20["true_label"]=="fake").astype(int).to_numpy()
s_frame = df20["prob_fake"].to_numpy(dtype=float)
fpr, tpr, thr = roc_curve(y_frame, s_frame)
t_frame = float(thr[np.nanargmax(tpr - fpr)]) if len(thr) else 0.5

# 2) Per-video average threshold (Youden's J)
avg_df = df20.groupby(["video_name","true_label"], sort=False)["prob_fake"].mean().rename("avg_prob").reset_index()
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob"].to_numpy(dtype=float)
fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
t_avg   = float(thr2[np.nanargmax(tpr2 - fpr2)]) if len(thr2) else 0.5

# Majority rule
df20["frame_pred"] = np.where(df20["prob_fake"] >= t_frame, "fake", "real")
maj_pred = df20.groupby("video_name", sort=False)["frame_pred"].agg(
    lambda a: "fake" if (a=="fake").sum() >= (a.size - (a=="fake").sum()) else "real"
)
true_lab = df20.groupby("video_name", sort=False)["true_label"].first()
maj_df = pd.DataFrame({"video_name": maj_pred.index, "pred_maj": maj_pred.values, "true_label": true_lab.values})
maj_df["correct_maj"] = (maj_df["pred_maj"] == maj_df["true_label"]).astype(int)

# Average rule
avg_df["pred_avg"] = np.where(avg_df["avg_prob"] >= t_avg, "fake", "real")
avg_df["correct_avg"] = (avg_df["pred_avg"] == avg_df["true_label"]).astype(int)

# Pick globally better rule
acc_avg = float(avg_df["correct_avg"].mean())
acc_maj = float(maj_df["correct_maj"].mean())
USE_METHOD = "majority" if acc_maj >= acc_avg else "average"

# Build final small table
if USE_METHOD == "average":
    small_rows = [{
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": r["video_name"],
        "true_label": r["true_label"],
        "correctly_predicted": "yes" if int(r["correct_avg"])==1 else "no",
    } for _, r in avg_df.iterrows()]
else:
    small_rows = [{
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": r["video_name"],
        "true_label": r["true_label"],
        "correctly_predicted": "yes" if int(r["correct_maj"])==1 else "no",
    } for _, r in maj_df.iterrows()]

small_table_srm_celebdf = pd.DataFrame(
    small_rows, columns=["dataset","detector","video_name","true_label","correctly_predicted"]
).sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# Show all rows without wrapping
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
display(small_table_srm_celebdf)

print(f"[info] method={USE_METHOD}, acc_avg={acc_avg:.3f}, acc_maj={acc_maj:.3f}, t_frame={t_frame:.3f}, t_avg={t_avg:.3f}")


Unnamed: 0,dataset,detector,video_name,true_label,correctly_predicted
0,Celeb-DF (Cropped faces),SRM,id0_id1_0000,fake,yes
1,Celeb-DF (Cropped faces),SRM,id0_id1_0001,fake,yes
2,Celeb-DF (Cropped faces),SRM,id0_id1_0002,fake,yes
3,Celeb-DF (Cropped faces),SRM,id0_id1_0003,fake,yes
4,Celeb-DF (Cropped faces),SRM,id0_id1_0005,fake,yes
5,Celeb-DF (Cropped faces),SRM,id0_id1_0006,fake,yes
6,Celeb-DF (Cropped faces),SRM,id0_id1_0007,fake,yes
7,Celeb-DF (Cropped faces),SRM,id0_id1_0009,fake,yes
8,Celeb-DF (Cropped faces),SRM,id0_id2_0000,fake,yes
9,Celeb-DF (Cropped faces),SRM,id0_id2_0001,fake,yes


[info] method=average, acc_avg=0.740, acc_maj=0.690, t_frame=0.619, t_avg=0.606


In [None]:
# Save the Celeb-DF SRM small table CSV to the same folder
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "SRM results Celeb DF")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "srm_celebdf_small_table_20frames.csv")

# Use the small table from the previous cell
if 'small_table_srm_celebdf' not in globals() or small_table_srm_celebdf.empty:
    raise SystemExit("No 'small_table_srm_celebdf' found. Run the small-table cell first.")

small_table_srm_celebdf.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(small_table_srm_celebdf)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/SRM results Celeb DF/srm_celebdf_small_table_20frames.csv (rows=100)
