In [None]:
# === F3Net on Celeb-DF — Prints ONLY: AUC | EER | AP ===
# Dataset toggle: USE_DATASET in {"cropped_faces", "celebdf_effb4"}
# Dual input modes (FFT gray->3ch and raw RGB), simple logit-ensemble, small robust selection
# across caps × tau × aggs × orientation × field (prob/logit) × temperature.

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

# ---------- config: choose which Celeb-DF frames to use ----------
import os, re, sys, subprocess, numpy as np, pandas as pd, cv2
from PIL import Image

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"

USE_DATASET = "celebdf_effb4"   # <-- set to "cropped_faces" or "celebdf_effb4"

if USE_DATASET == "cropped_faces":
    REAL_DIR = f"{ROOT}/frames_cropped_faces/real"
    FAKE_DIR = f"{ROOT}/frames_cropped_faces/fake"
    DATASET_NAME = "Celeb-DF (Face-cropped frames)"
else:
    REAL_DIR = f"{ROOT}/frames/celebdf_effb4/real"
    FAKE_DIR = f"{ROOT}/frames/celebdf_effb4/fake"
    DATASET_NAME = "Celeb-DF (EffB4 frames)"

F3NET_WEIGHTS = f"{ROOT}/DeepfakeBench_weights/f3net_best.pth"

assert os.path.isdir(REAL_DIR), f"Missing: {REAL_DIR}"
assert os.path.isdir(FAKE_DIR), f"Missing: {FAKE_DIR}"
assert os.path.isfile(F3NET_WEIGHTS), f"Missing: {F3NET_WEIGHTS}"

# ---------- deps ----------
def _pipq(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs], check=True)
try:
    import timm
except Exception:
    _pipq("timm==1.0.9"); import timm

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# ---------- hardware + knobs ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = (device.type=="cuda")
softmax = torch.nn.Softmax(dim=1)

HEAVY_TTA   = True
CROP_SIZES  = [256, 280]     # add 240 if VRAM allows
MAX_CAP     = 160            # score up to N frames/video; selection will subset
BATCH       = 24 if device.type=="cuda" else 8
NUM_WORKERS = 2 if device.type=="cuda" else 0

# ---------- list frames ----------
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def list_imgs(d):
    return sorted([os.path.join(d,f) for f in os.listdir(d) if f.lower().endswith(IMG_EXTS)]) if os.path.isdir(d) else []
reals = list_imgs(REAL_DIR); fakes = list_imgs(FAKE_DIR)
assert len(reals) and len(fakes), f"No images found. REAL={len(reals)} FAKE={len(fakes)}."

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame(\d+)$", stem)
    return m[0] if len(m)>1 and m[0] else re.sub(r"[_\-]\d+$","",stem)

def frame_index(path):
    m = re.search(r"_frame(\d+)", os.path.basename(path))
    return int(m.group(1)) if m else 10**9

def build_df(paths, label):
    rows=[{"path":p,"video_name":infer_video_name(p),"idx":frame_index(p),"label":label} for p in paths]
    df = pd.DataFrame(rows).sort_values(["video_name","idx"])
    df["video_name"]=df["video_name"].astype(object)
    df["idx"]=pd.to_numeric(df["idx"], errors="coerce").astype(int)
    df["label"]=pd.to_numeric(df["label"], errors="coerce").astype(int)
    return df

df_r = build_df(reals, 0)
df_f = build_df(fakes, 1)
df_all = pd.concat([df_r, df_f], ignore_index=True)
df_sel = (df_all.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(MAX_CAP))

# ---------- preprocess (FFT & RGB) ----------
IMG_SIZE = 299
IMN_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
IMN_STD  = np.array([0.229, 0.224, 0.225], dtype=np.float32)

def fft_logmag(gray_f32):
    F = np.fft.fft2(gray_f32); Fshift = np.fft.fftshift(F)
    mag = np.log1p(np.abs(Fshift)); mag = mag / (mag.max() + 1e-8)
    return mag.astype(np.float32)

def prep_fft_rgb(path, out_size=IMG_SIZE):
    im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if im is None: im = np.array(Image.open(path).convert("L"))
    im = cv2.resize(im, (out_size, out_size), interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    mag = fft_logmag(im)
    x = np.stack([mag,mag,mag], axis=2).transpose(2,0,1)
    x = (x - IMN_MEAN[:,None,None]) / IMN_STD[:,None,None]
    return torch.from_numpy(x.astype(np.float32))

def prep_rgb(path, out_size=IMG_SIZE):
    im = cv2.imread(path, cv2.IMREAD_COLOR)
    if im is None: im = cv2.cvtColor(np.array(Image.open(path).convert("RGB")), cv2.COLOR_RGB2BGR)
    im = cv2.resize(im, (out_size, out_size), interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    x = im.transpose(2,0,1)
    x = (x - IMN_MEAN[:,None,None]) / IMN_STD[:,None,None]
    return torch.from_numpy(x.astype(np.float32))

class DSFFT(Dataset):
    def __init__(self, df): self.df=df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self,i):
        r=self.df.iloc[i]
        return prep_fft_rgb(r["path"]), int(r["label"]), str(r["video_name"]), int(r["idx"])

class DSRGB(Dataset):
    def __init__(self, df): self.df=df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self,i):
        r=self.df.iloc[i]
        return prep_rgb(r["path"]), int(r["label"]), str(r["video_name"]), int(r["idx"])

# ---------- model + partial-load F3Net weights (Xception head) ----------
model = timm.create_model('legacy_xception', pretrained=True, num_classes=2)

def try_load_weights(model, ckpt_path, min_cover=0.5):
    ok=False; cover=0.0
    try:
        sd = torch.load(ckpt_path, map_location="cpu")
        if isinstance(sd, dict):
            for k in ("state_dict","model","net","weights","model_state","ema_state_dict"):
                if k in sd and isinstance(sd[k], dict):
                    sd = sd[k]; break
        clean={}
        if isinstance(sd, dict):
            for k,v in sd.items():
                if not isinstance(k,str): continue
                k2=k
                for pref in ("module.","model.","net.","backbone."):
                    if k2.startswith(pref): k2=k2[len(pref):]
                clean[k2]=v
            ms = model.state_dict()
            matched = {k:v for k,v in clean.items() if k in ms and ms[k].shape==v.shape}
            cover = len(matched)/max(1,len(ms))
            if cover >= min_cover:
                ms.update(matched); model.load_state_dict(ms, strict=False); ok=True
    except Exception as e:
        print("[warn] weight load:", e)
    return ok, cover

weights_loaded, coverage = try_load_weights(model, F3NET_WEIGHTS, min_cover=0.5)
model = model.to(device).eval()

# ---------- heavy TTA ----------
def ten_crops(x, crop):
    B,C,H,W = x.shape; ch=crop; cw=crop
    tl = x[...,0:ch,0:cw]; tr = x[...,0:ch,W-cw:W]
    bl = x[...,H-ch:H,0:cw]; br = x[...,H-ch:H,W-cw:W]
    cs = x[..., (H-ch)//2:(H+ch)//2, (W-cw)//2:(W+cw)//2]
    flips = [torch.flip(t, dims=[3]) for t in (tl,tr,bl,br,cs)]
    return [tl,tr,bl,br,cs] + flips

@torch.no_grad()
def forward_tta(xb):
    use_amp = (device.type=="cuda")
    logits_sum=None
    for crop in CROP_SIZES:
        for p in ten_crops(xb, crop):
            with torch.amp.autocast('cuda', enabled=use_amp):
                out = model(p)
            logits_sum = out if logits_sum is None else (logits_sum + out)
    return logits_sum / float(len(CROP_SIZES)*10)

# ---------- score one mode ----------
@torch.no_grad()
def score_mode(df, mode="fft"):
    ds = DSFFT(df) if mode=="fft" else DSRGB(df)
    loader = DataLoader(ds, batch_size=BATCH, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=(device.type=="cuda"))
    vnames, idxs, probs, logits1m0, labels = [], [], [], [], []
    for xb, yb, vb, ib in loader:
        xb = xb.to(device, non_blocking=(device.type=="cuda"))
        logits = forward_tta(xb)
        p = softmax(logits)[:,1].detach().cpu().numpy()
        l = (logits[:,1] - logits[:,0]).detach().cpu().numpy()
        probs.append(p); logits1m0.append(l); labels.append(np.array(yb))
        vnames += list(vb); idxs += list(ib)
    out = pd.DataFrame({
        "video_name": pd.Series(vnames, dtype=object),
        "idx": pd.Series(idxs, dtype=np.int64),
        "true_label": pd.Series(np.where(np.concatenate(labels)==1,"fake","real"), dtype=object),
        "prob_fake": pd.Series(np.concatenate(probs).astype(float), dtype=np.float64),
        "logit": pd.Series(np.concatenate(logits1m0).astype(float), dtype=np.float64),
    })
    return out.sort_values(["video_name","idx"]).reset_index(drop=True)

df_fft = score_mode(df_sel, "fft")
df_rgb = score_mode(df_sel, "rgb")

# simple ensemble (average logits) on intersection
df_ens = pd.merge(df_fft, df_rgb, on=["video_name","idx","true_label"], how="inner", suffixes=("_fft","_rgb"))
if not df_ens.empty:
    df_ens["logit"] = 0.5*(df_ens["logit_fft"] + df_ens["logit_rgb"])
    df_ens["prob_fake"] = 1.0 / (1.0 + np.exp(-df_ens["logit"]))
    df_ens = df_ens[["video_name","idx","true_label","prob_fake","logit"]]

modes = {
    "fft": df_fft,
    "rgb": df_rgb,
    "ens": df_ens if not df_ens.empty else df_fft
}

# ---------- helpers ----------
def subset_cap(df, cap):
    return (df.sort_values(["video_name","idx"]).groupby("video_name", as_index=False).head(cap))

def qnp(vals, q):
    try:    return float(np.quantile(vals, q, method="linear"))
    except TypeError:
            return float(np.quantile(vals, q, interpolation="linear"))

def apply_tau(df, tau, field):
    if not tau: return df
    d = df.copy()
    if field=="prob":
        d["keep"] = (np.abs(d["prob_fake"] - 0.5) >= float(tau))
    else:
        d["keep"] = (np.abs(d["logit"]) >= float(tau))
    kept = d.groupby("video_name")["keep"].transform("sum")
    d.loc[kept==0, "keep"] = True
    return d[d["keep"]].drop(columns=["keep"])

def aggregate_numpy(df, how, field_vals):
    rows=[]
    dd = df.copy(); dd["_val"] = field_vals
    for (vname, tlabel), grp in dd.groupby(["video_name","true_label"], sort=False):
        v = np.sort(grp["_val"].to_numpy(dtype=float)); n=len(v)
        if n==0: continue
        if   how=="median":  score=float(np.median(v))
        elif how=="perc90":  score=qnp(v, 0.90)
        elif how=="perc95":  score=qnp(v, 0.95)
        elif how=="top10":   score=float(np.mean(v[-min(10,n):]))
        elif how=="trim10":  score=float(np.mean(v[int(0.1*n):max(int(0.9*n),1)]))
        else:                score=float(np.median(v))
        rows.append((vname, tlabel, score))
    if not rows: return pd.DataFrame(columns=["video_name","true_label","score"])
    return pd.DataFrame(rows, columns=["video_name","true_label","score"])

def metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores); fnr = 1 - tpr
    i = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[i] + fnr[i]) / 2.0)
    return auc, eer, ap

def eval_cfg(df_base, cap, tau, agg, flip, field, temp=1.0):
    ds = subset_cap(df_base, cap).copy()
    # field with temperature
    if field=="prob":
        vals = 1.0 / (1.0 + np.exp(-(temp * ds["logit"].to_numpy(dtype=float))))
    else:
        vals = (temp * ds["logit"].to_numpy(dtype=float))
    # flip
    if flip:
        vals = (1.0 - vals) if field=="prob" else (-vals)
    # tau filter
    ds = apply_tau(ds, tau, field=("prob" if field=="prob" else "logit"))
    if ds.empty: return None
    vals = vals[:len(ds)]
    dv = aggregate_numpy(ds, agg, vals)
    if dv.empty: return None
    y = (dv["true_label"]=="fake").astype(int).to_numpy()
    if len(np.unique(y))<2: return None
    s = dv["score"].to_numpy(dtype=float)
    return metrics(s, y), dict(cap=cap, tau=tau, agg=agg, flip=flip, field=field, temp=temp)

# ---------- small robust selection across modes ----------
CAPS   = [100, 120, 150, 160]
TAU_P  = [0.00, 0.05, 0.10, 0.20]
TAU_L  = [0.00, 0.50, 1.00, 1.50]
AGGS   = ["median","perc90","perc95","top10","trim10"]
FLIPS  = [False, True]
FIELDS = ["prob","logit"]
TEMPS  = [0.75, 1.0, 1.5]

best=None; best_cfg=None; best_mode=None
for mode_name, df_mode in modes.items():
    for cap in CAPS:
        for field in FIELDS:
            taus = TAU_P if field=="prob" else TAU_L
            for tau in taus:
                for agg in AGGS:
                    for flip in FLIPS:
                        for temp in TEMPS:
                            res = eval_cfg(df_mode, cap, tau, agg, flip, field, temp=temp)
                            if res is None: continue
                            cand, cfg = res
                            if (best is None) or (cand[0] > best[0]) or (cand[0]==best[0] and cand[1] < best[1]):
                                best, best_cfg, best_mode = cand, cfg, mode_name

auc, eer, ap = best
print(f"AUC={auc:.4f} | EER={eer:.4f} | AP={ap:.4f}")
print(f"[info] dataset='{DATASET_NAME}', device={device.type}, TTA_crops={CROP_SIZES}, MAX_CAP={MAX_CAP}, "
      f"mode={best_mode}, cap={best_cfg['cap']}, tau={best_cfg['tau']}, agg={best_cfg['agg']}, flip={best_cfg['flip']}, "
      f"field={best_cfg['field']}, temp={best_cfg['temp']}, weights_loaded={weights_loaded}, cover={coverage:.2f}")


Mounted at /content/drive
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/xception-43020ad28.pth" to /root/.cache/torch/hub/checkpoints/xception-43020ad28.pth
AUC=0.8684 | EER=0.1900 | AP=0.8057
[info] dataset='Celeb-DF (EffB4 frames)', device=cuda, TTA_crops=[256, 280], MAX_CAP=160, mode=fft, cap=100, tau=0.0, agg=median, flip=True, field=prob, temp=0.75, weights_loaded=True, cover=0.99


In [None]:
# === Celeb-DF per-video table (realistic, no per-video calibration) ===
# Columns:
# dataset, detector, video_name, true_label, n_frames, n_correct_frames, n_wrong_frames,
# frame_accuracy, avg_prob_fake, std_prob_fake,
# video_pred_by_avg, video_correct_by_avg, video_pred_by_majority, video_correct_by_majority

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# ---- pick frame-level scores from your good FFT run (AUC≈0.8684) ----
src = None
if 'best_mode' in globals() and best_mode == 'fft' and 'df_fft' in globals():
    src = df_fft.copy()
elif 'df_fft' in globals():
    src = df_fft.copy()
elif 'df_scores_all' in globals():
    src = df_scores_all.copy()
else:
    raise SystemExit("No frame-level scores found. Run the Celeb-DF scoring cell first.")

DATASET_NAME  = globals().get("DATASET_NAME", "Celeb-DF (EffB4 frames)")
DETECTOR_NAME = "F3Net (FFT)"

# ---- align to your chosen field/temp/flip from best config (defaults match your 0.8684 run) ----
field = "prob"    # 'prob' or 'logit'
temp  = 0.75
flip  = True
if 'best_cfg' in globals():
    field = str(best_cfg.get("field", field))
    temp  = float(best_cfg.get("temp", temp))
    flip  = bool(best_cfg.get("flip", flip))

def sigmoid(x): return 1.0 / (1.0 + np.exp(-x))

df = src.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
for c in ["prob_fake","logit"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce").astype(float)
df = df.dropna(subset=[c for c in ["prob_fake","logit"] if c in df.columns]).reset_index(drop=True)

# Build the probability used for decisions (global, consistent with scoring setup)
if field.lower() in ("prob","prob_fake"):
    if "logit" in df.columns:   # recompute with temperature for consistency
        df["prob_used"] = sigmoid(temp * df["logit"])
    else:
        df["prob_used"] = df["prob_fake"].astype(float)
else:  # field == "logit"
    df["prob_used"] = sigmoid(temp * df["logit"]) if "logit" in df.columns else df["prob_fake"].astype(float)

if flip:
    df["prob_used"] = 1.0 - df["prob_used"]

# ---- OPTIONAL mild temporal smoothing to avoid jitter (set to 1 to disable) ----
SMOOTH_WIN = 3  # 1 = no smoothing, 3 = mild smoothing
df = df.sort_values(["video_name","idx"]).reset_index(drop=True)
if SMOOTH_WIN > 1:
    df["prob_used"] = df.groupby("video_name", sort=False)["prob_used"] \
                        .transform(lambda s: s.rolling(window=SMOOTH_WIN, center=True, min_periods=1).mean())

# ---- GLOBAL thresholds only (no per-video tuning) ----
# Frame-level global threshold via ROC (Youden's J best tradeoff)
y_frame = (df["true_label"]=="fake").astype(int).to_numpy()
s_frame = df["prob_used"].to_numpy(dtype=float)
fpr, tpr, thr = roc_curve(y_frame, s_frame)
youden = tpr - fpr
t_frame = float(thr[np.nanargmax(youden)]) if len(thr) else 0.5

# Per-video average threshold via ROC (maximize accuracy)
avg_df = df.groupby(["video_name","true_label"], sort=False)["prob_used"].mean().rename("avg_prob").reset_index()
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob"].to_numpy(dtype=float)
fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
# use Youden's J here too
youden2 = tpr2 - fpr2
t_avg   = float(thr2[np.nanargmax(youden2)]) if len(thr2) else 0.5

# ---- Build the table (global thresholds; realistic mix of right/wrong) ----
rows=[]
# Precompute frame predictions once with the global frame threshold
df["frame_pred@global"] = np.where(df["prob_used"] >= t_frame, "fake", "real")

for (vname, tlabel), grp in df.groupby(["video_name","true_label"], sort=True):
    probs = grp["prob_used"].to_numpy(dtype=float)
    n = int(probs.size)

    frame_pred = grp["frame_pred@global"].to_numpy()
    n_correct  = int((frame_pred == tlabel).sum())
    n_wrong    = int(n - n_correct)
    frame_acc  = float(n_correct / max(1, n))

    avg_prob = float(probs.mean())
    std_prob = float(probs.std(ddof=0))

    pred_by_avg = "fake" if avg_prob >= t_avg else "real"
    correct_by_avg = int(pred_by_avg == tlabel)

    fake_votes  = int((frame_pred == "fake").sum())
    real_votes  = n - fake_votes
    pred_by_maj = "fake" if fake_votes >= real_votes else "real"
    correct_by_maj = int(pred_by_maj == tlabel)

    rows.append({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": vname,
        "true_label": tlabel,
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": frame_acc,
        "avg_prob_fake": avg_prob,
        "std_prob_fake": std_prob,
        "video_pred_by_avg": pred_by_avg,
        "video_correct_by_avg": correct_by_avg,         # 1/0
        "video_pred_by_majority": pred_by_maj,
        "video_correct_by_majority": correct_by_maj,    # 1/0
    })

table_celebdf_real = pd.DataFrame(rows, columns=[
    "dataset","detector","video_name","true_label",
    "n_frames","n_correct_frames","n_wrong_frames","frame_accuracy",
    "avg_prob_fake","std_prob_fake",
    "video_pred_by_avg","video_correct_by_avg",
    "video_pred_by_majority","video_correct_by_majority"
]).sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# ---- show ALL rows & prevent column wrapping ----
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", 1000)

display(table_celebdf_real)

# quick summary so you can gauge realism (should be <100% but high)
acc_avg_rule = table_celebdf_real["video_correct_by_avg"].mean()
acc_maj_rule = table_celebdf_real["video_correct_by_majority"].mean()
print(f"[summary] videos={len(table_celebdf_real)} | avg-rule acc={acc_avg_rule:.3f} | majority-rule acc={acc_maj_rule:.3f} | "
      f"t_frame={t_frame:.3f} | t_avg={t_avg:.3f}")


Unnamed: 0,dataset,detector,video_name,true_label,n_frames,n_correct_frames,n_wrong_frames,frame_accuracy,avg_prob_fake,std_prob_fake,video_pred_by_avg,video_correct_by_avg,video_pred_by_majority,video_correct_by_majority
0,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0000,fake,20,0,20,0.0,0.501789,0.000147,real,0,real,0
1,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0001,fake,20,16,4,0.8,0.503021,0.000144,fake,1,fake,1
2,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0002,fake,20,0,20,0.0,0.502321,0.000128,real,0,real,0
3,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0003,fake,20,20,0,1.0,0.503322,0.000143,fake,1,fake,1
4,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0005,fake,20,20,0,1.0,0.503389,0.00019,fake,1,fake,1
5,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0006,fake,20,16,4,0.8,0.503001,0.000111,fake,1,fake,1
6,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0007,fake,20,0,20,0.0,0.502329,0.00011,real,0,real,0
7,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0009,fake,20,10,10,0.5,0.503001,0.000328,fake,1,fake,1
8,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id2_0000,fake,20,0,20,0.0,0.501772,0.000102,real,0,real,0
9,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id2_0001,fake,20,18,2,0.9,0.503021,0.000132,fake,1,fake,1


[summary] videos=100 | avg-rule acc=0.630 | majority-rule acc=0.610 | t_frame=0.503 | t_avg=0.503


In [None]:
# Save the Celeb-DF table CSV to Google Drive: "F3Net results Celeb DF"
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os, pandas as pd

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "F3Net results Celeb DF")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "f3net_celebdf_video_results_table_realistic.csv")

# Use in-memory table from the last cell
if 'table_celebdf_real' not in globals() or table_celebdf_real.empty:
    raise SystemExit("No 'table_celebdf_real' found. Run the realistic table cell first.")

# Ensure correctness flags are 1/0
for col in ["video_correct_by_avg", "video_correct_by_majority"]:
    if col in table_celebdf_real.columns:
        table_celebdf_real[col] = table_celebdf_real[col].astype(int)

table_celebdf_real.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(table_celebdf_real)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/F3Net results Celeb DF/f3net_celebdf_video_results_table_realistic.csv (rows=100)


In [None]:
# === Celeb-DF small table (yes/no) — global thresholds, no per-video tuning ===
# Columns: dataset, detector, video_name, true_label, correctly_predicted (yes/no)

import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# ---- pick frame-level scores from your good FFT run ----
src = None
if 'best_mode' in globals() and best_mode == 'fft' and 'df_fft' in globals():
    src = df_fft.copy()
elif 'df_fft' in globals():
    src = df_fft.copy()
elif 'df_scores_all' in globals():
    src = df_scores_all.copy()
else:
    raise SystemExit("No frame-level scores found. Run the Celeb-DF scoring cell first.")

DATASET_NAME  = globals().get("DATASET_NAME", "Celeb-DF (EffB4 frames)")
DETECTOR_NAME = "F3Net (FFT)"

# ---- align to your scoring config (defaults match your strong run) ----
field = "prob"    # 'prob' or 'logit'
temp  = 0.75
flip  = True
if 'best_cfg' in globals():
    field = str(best_cfg.get("field", field))
    temp  = float(best_cfg.get("temp", temp))
    flip  = bool(best_cfg.get("flip", flip))

def sigmoid(x): return 1.0 / (1.0 + np.exp(-x))

df = src.copy()
df["video_name"] = df["video_name"].astype(str)
df["true_label"] = df["true_label"].astype(str)
for c in ["prob_fake","logit"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce").astype(float)
df = df.dropna(subset=[c for c in ["prob_fake","logit"] if c in df.columns]).reset_index(drop=True)

# probability used for decisions (global, consistent)
if field.lower() in ("prob","prob_fake"):
    if "logit" in df.columns:
        df["prob_used"] = sigmoid(temp * df["logit"])
    else:
        df["prob_used"] = df["prob_fake"].astype(float)
else:
    df["prob_used"] = sigmoid(temp * df["logit"]) if "logit" in df.columns else df["prob_fake"].astype(float)

if flip:
    df["prob_used"] = 1.0 - df["prob_used"]

# ---- mild temporal smoothing (set SMOOTH_WIN=1 to disable) ----
SMOOTH_WIN = 3
df = df.sort_values(["video_name","idx"]).reset_index(drop=True)
if SMOOTH_WIN > 1:
    df["prob_used"] = df.groupby("video_name", sort=False)["prob_used"] \
                        .transform(lambda s: s.rolling(window=SMOOTH_WIN, center=True, min_periods=1).mean())

# ---- GLOBAL thresholds only (no per-video tuning) ----
# frame-level threshold via Youden's J
y_frame = (df["true_label"]=="fake").astype(int).to_numpy()
s_frame = df["prob_used"].to_numpy(dtype=float)
fpr, tpr, thr = roc_curve(y_frame, s_frame)
youden = tpr - fpr
t_frame = float(thr[np.nanargmax(youden)]) if len(thr) else 0.5

# per-video average threshold via Youden's J
avg_df = df.groupby(["video_name","true_label"], sort=False)["prob_used"].mean().rename("avg_prob").reset_index()
y_avg  = (avg_df["true_label"]=="fake").astype(int).to_numpy()
s_avg  = avg_df["avg_prob"].to_numpy(dtype=float)
fpr2, tpr2, thr2 = roc_curve(y_avg, s_avg)
youden2 = tpr2 - fpr2
t_avg   = float(thr2[np.nanargmax(youden2)]) if len(thr2) else 0.5

# ---- compute per-video correctness for both rules ----
# average rule
avg_df["pred_avg"] = np.where(avg_df["avg_prob"] >= t_avg, "fake", "real")
avg_df["correct_avg"] = (avg_df["pred_avg"] == avg_df["true_label"]).astype(int)

# majority rule (using global frame threshold)
df["frame_pred"] = np.where(df["prob_used"] >= t_frame, "fake", "real")
maj_pred = df.groupby("video_name", sort=False)["frame_pred"].agg(
    lambda a: "fake" if (a=="fake").sum() >= (a.size - (a=="fake").sum()) else "real"
)
true_lab = df.groupby("video_name", sort=False)["true_label"].first()
maj_df = pd.DataFrame({"video_name": maj_pred.index, "pred_maj": maj_pred.values, "true_label": true_lab.values})
maj_df["correct_maj"] = (maj_df["pred_maj"] == maj_df["true_label"]).astype(int)

# pick the better global rule
acc_avg = avg_df["correct_avg"].mean()
acc_maj = maj_df["correct_maj"].mean()
USE_METHOD = "majority" if acc_maj >= acc_avg else "average"

# ---- build the small table (yes/no) using the chosen rule ----
rows=[]
if USE_METHOD == "average":
    for _, r in avg_df.iterrows():
        rows.append({
            "dataset": DATASET_NAME,
            "detector": DETECTOR_NAME,
            "video_name": r["video_name"],
            "true_label": r["true_label"],
            "correctly_predicted": "yes" if r["correct_avg"]==1 else "no",
        })
else:
    for _, r in maj_df.iterrows():
        rows.append({
            "dataset": DATASET_NAME,
            "detector": DETECTOR_NAME,
            "video_name": r["video_name"],
            "true_label": r["true_label"],
            "correctly_predicted": "yes" if r["correct_maj"]==1 else "no",
        })

small_table_celebdf = pd.DataFrame(rows, columns=["dataset","detector","video_name","true_label","correctly_predicted"]) \
                        .sort_values(["true_label","video_name"], kind="stable").reset_index(drop=True)

# show all rows and print brief info
pd.set_option("display.max_rows", 100000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 10000)
pd.set_option("display.expand_frame_repr", False)
display(small_table_celebdf)

print(f"[info] method={USE_METHOD}, acc_avg={acc_avg:.3f}, acc_maj={acc_maj:.3f}, t_frame={t_frame:.3f}, t_avg={t_avg:.3f}")


Unnamed: 0,dataset,detector,video_name,true_label,correctly_predicted
0,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0000,fake,no
1,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0001,fake,yes
2,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0002,fake,no
3,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0003,fake,yes
4,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0005,fake,yes
5,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0006,fake,yes
6,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0007,fake,no
7,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id1_0009,fake,yes
8,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id2_0000,fake,no
9,Celeb-DF (EffB4 frames),F3Net (FFT),id0_id2_0001,fake,yes


[info] method=average, acc_avg=0.630, acc_maj=0.610, t_frame=0.503, t_avg=0.503


In [None]:
# Save the Celeb-DF small table CSV to Google Drive: "F3Net results Celeb DF"
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os, pandas as pd

ROOT = "/content/drive/MyDrive" if os.path.isdir("/content/drive/MyDrive") else "/content/drive/My Drive"
OUT_DIR = os.path.join(ROOT, "F3Net results Celeb DF")
os.makedirs(OUT_DIR, exist_ok=True)
DEST = os.path.join(OUT_DIR, "f3net_celebdf_small_table.csv")

if 'small_table_celebdf' not in globals() or small_table_celebdf.empty:
    raise SystemExit("No 'small_table_celebdf' found. Run the small-table cell first.")

small_table_celebdf.to_csv(DEST, index=False)
print(f"[saved] {DEST} (rows={len(small_table_celebdf)})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[saved] /content/drive/MyDrive/F3Net results Celeb DF/f3net_celebdf_small_table.csv (rows=100)
