In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# ================= CNN-Aug (EfficientNet-B4) — BOOSTED SEARCH → PRINT METRICS ONLY =================
# Uses your balanced frame folders + weights on Drive.
REAL_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/fake"
WEIGHTS_PATH    = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"

# Speed / model
BATCH_SIZE = 32
NUM_WORKERS = 2
IMG_SIZE = 380  # B4

# Extra search space to squeeze more AUC / lower EER
TRY_TTA       = [False, True]                     # average original + hflip
TRY_NORM      = ["no_norm", "imagenet"]           # preprocessing
TRY_PREPROC   = ["stretch", "short_center"]       # resize strategies
TRY_CONF_FILT = [0.0, 0.1, 0.2, 0.3]              # drop |p-0.5| < tau
TRY_BLUR_THR  = [0, 50, 100]                      # drop very blurry frames (variance of Laplacian)
TOPK_LIST     = [5, 10, 15]
TRIM_LIST     = [0.1, 0.2]
LSE_ALPHA     = [1.0]

# (silently) save per-video scores for the best config
CSV_PATH = "/content/cnn_aug_best_per_video.csv"

# ------------------------------------------------------------------------------------
import os, sys, subprocess, glob, re, numpy as np, pandas as pd
from PIL import Image, ImageOps
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# quiet pip install (only if needed)
def _pip_quiet(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs],
                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)

try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    _pip_quiet("efficientnet-pytorch==0.7.1")
    from efficientnet_pytorch import EfficientNet

# ---------------- Model ----------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

class CNN_AUG_EffB4(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

assert os.path.isfile(WEIGHTS_PATH), f"Weights not found: {WEIGHTS_PATH}"
model = CNN_AUG_EffB4().to(device)
state = torch.load(WEIGHTS_PATH, map_location="cpu")
if isinstance(state, dict) and all(isinstance(k,str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.","",1): v for k,v in state.items()}
model.load_state_dict(state, strict=False)
model.eval()
softmax = torch.nn.Softmax(dim=1)

# ---------------- Data ----------------
import cv2

IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\\-]\\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

def build_transform(norm, preproc):
    if preproc == "stretch":
        t_base = [transforms.Resize((IMG_SIZE, IMG_SIZE))]
    else:  # "short_center": keep aspect ratio then center-crop
        t_base = [transforms.Resize(IMG_SIZE), transforms.CenterCrop(IMG_SIZE)]
    if norm == "no_norm":
        t_norm = []
    else:
        t_norm = [transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])]
    return transforms.Compose(t_base + [transforms.ToTensor()] + t_norm)

class FrameDataset(Dataset):
    def __init__(self, folders_labels):
        files, labels = [], []
        for folder, lbl in folders_labels:
            f = sorted([p for p in glob.glob(os.path.join(folder, "*")) if is_img(p)])
            files += f; labels += [lbl]*len(f)
        self.files = files; self.labels = labels
    def __len__(self): return len(self.files)
    def __getitem__(self, i):
        p = self.files[i]
        img = Image.open(p).convert("RGB")
        # compute blur metric (variance of Laplacian on grayscale)
        g = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        blur = cv2.Laplacian(g, cv2.CV_64F).var()
        return img, self.labels[i], p, infer_video_name(p), float(blur)

def score_frames(norm_kind="no_norm", tta=False, preproc="stretch"):
    ds = FrameDataset([(REAL_FRAMES_DIR,0),(FAKE_FRAMES_DIR,1)])
    loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=torch.cuda.is_available())
    tform = build_transform(norm_kind, preproc)
    probs, labels, paths, vnames, blurs = [], [], [], [], []
    with torch.no_grad():
        for imgs, yb, pb, vb, b in loader:
            # apply transform in Python (so we can reuse for TTA)
            x_list = [tform(img) for img in imgs]
            xb = torch.stack(x_list, dim=0).to(device, non_blocking=True)
            if tta:
                logits = (model(xb) + model(TF.hflip(xb))) / 2
            else:
                logits = model(xb)
            p_fake = softmax(logits)[:,1].detach().cpu().numpy()
            probs.append(p_fake); labels.append(np.array(yb)); paths += list(pb); vnames += list(vb); blurs += list(b.numpy() if hasattr(b,"numpy") else b)
    probs = np.concatenate(probs); labels = np.concatenate(labels)
    return pd.DataFrame({
        "video_name": vnames,
        "true_label": np.where(labels==1,"fake","real"),
        "prob_fake": probs,
        "blur": blurs,
        "path": paths
    })

def video_metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, thr = roc_curve(labels, scores); fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[idx] + fnr[idx]) / 2.0)
    thr_eer = float(thr[idx])
    return auc, eer, ap, thr_eer

def trimmed_mean(vals, trim=0.1):
    if len(vals)==0: return np.nan
    k = int(len(vals)*trim); vals = np.sort(vals)
    if k*2 >= len(vals): return float(np.mean(vals))
    return float(np.mean(vals[k:len(vals)-k]))

def logsumexp_pool(vals, alpha=1.0):
    eps=1e-6
    logits = np.log(np.clip(vals,eps,1-eps)) - np.log(np.clip(1-vals,eps,1-eps))
    m = np.max(alpha*logits); lse = m + np.log(np.mean(np.exp(alpha*logits - m)))
    pl = lse/alpha
    return 1/(1+np.exp(-pl))

# cache per (norm, TTA, preproc)
cache = {}
for norm in TRY_NORM:
    for tta in TRY_TTA:
        for pre in TRY_PREPROC:
            cache[(norm, tta, pre)] = score_frames(norm, tta, pre)

# search best per-video config
best = None  # (AUC, EER, AP, thr, desc, per_video_df)

for (norm, tta, pre), df in cache.items():
    # auto-orient (flip scores if it helps separation on per-video avg)
    avg_df = df.groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
    y_avg = (avg_df["true_label"]=="fake").astype(int).values
    s_avg = avg_df["prob_fake"].values
    flip_needed = roc_auc_score(y_avg, 1 - s_avg) > roc_auc_score(y_avg, s_avg)
    if flip_needed:
        df_use = df.copy(); df_use["prob_fake"] = 1 - df_use["prob_fake"]
    else:
        df_use = df

    for blur_thr in TRY_BLUR_THR:
        df_b = df_use[df_use["blur"] >= blur_thr] if blur_thr > 0 else df_use

        for filt in TRY_CONF_FILT:
            if filt > 0:
                df_f = df_b[np.abs(df_b["prob_fake"] - 0.5) >= filt].copy()
                # if filtering empties some videos, fall back to unfiltered for those
                missing = set(df_b["video_name"].unique()) - set(df_f["video_name"].unique())
                if missing:
                    df_f = pd.concat([df_f, df_b[df_b["video_name"].isin(missing)]], ignore_index=True)
            else:
                df_f = df_b

            grouped = df_f.groupby(["video_name","true_label"])["prob_fake"]

            # 1) median
            med = grouped.median().reset_index()
            y, s = (med["true_label"]=="fake").astype(int).values, med["prob_fake"].values
            auc, eer, ap, thr = video_metrics(s, y)
            desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=median|conf={filt}|blur>={blur_thr}"
            if (best is None) or (auc > best[0]) or (auc==best[0] and eer < best[1]):
                best = (auc, eer, ap, thr, desc, med)

            # 2) perc80
            perc = grouped.quantile(0.8).reset_index()
            y, s = (perc["true_label"]=="fake").astype(int).values, perc["prob_fake"].values
            auc_p, eer_p, ap_p, thr_p = video_metrics(s, y)
            desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=perc80|conf={filt}|blur>={blur_thr}"
            if (auc_p > best[0]) or (auc_p==best[0] and eer_p < best[1]):
                best = (auc_p, eer_p, ap_p, thr_p, desc, perc)

            # 3) top-k mean
            tmp = df_f.copy(); tmp["rank"] = tmp.groupby("video_name")["prob_fake"].rank(ascending=False, method="first")
            for k in TOPK_LIST:
                topk = tmp[tmp["rank"] <= k].groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
                if len(topk)==0: continue
                y, s = (topk["true_label"]=="fake").astype(int).values, topk["prob_fake"].values
                auc_k, eer_k, ap_k, thr_k = video_metrics(s, y)
                desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=top{k}|conf={filt}|blur>={blur_thr}"
                if (auc_k > best[0]) or (auc_k==best[0] and eer_k < best[1]):
                    best = (auc_k, eer_k, ap_k, thr_k, desc, topk)

            # 4) trimmed mean
            for trim in TRIM_LIST:
                tdf = grouped.apply(lambda v: trimmed_mean(v.values, trim)).reset_index(name="prob_fake").dropna()
                if len(tdf)==0: continue
                y, s = (tdf["true_label"]=="fake").astype(int).values, tdf["prob_fake"].values
                auc_t, eer_t, ap_t, thr_t = video_metrics(s, y)
                desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=trim{int(trim*100)}|conf={filt}|blur>={blur_thr}"
                if (auc_t > best[0]) or (auc_t==best[0] and eer_t < best[1]):
                    best = (auc_t, eer_t, ap_t, thr_t, desc, tdf)

            # 5) log-sum-exp
            for a in LSE_ALPHA:
                lsed = grouped.apply(lambda v: logsumexp_pool(v.values, alpha=a)).reset_index(name="prob_fake")
                y, s = (lsed["true_label"]=="fake").astype(int).values, lsed["prob_fake"].values
                auc_l, eer_l, ap_l, thr_l = video_metrics(s, y)
                desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=lsep{a}|conf={filt}|blur>={blur_thr}"
                if (auc_l > best[0]) or (auc_l==best[0] and eer_l < best[1]):
                    best = (auc_l, eer_l, ap_l, thr_l, desc, lsed)

# save best per-video scores silently
best_auc, best_eer, best_ap, best_thr, best_desc, best_df = best
try:
    best_df.to_csv(CSV_PATH, index=False)
except Exception:
    pass

# ---- Print ONLY metrics line ----
print(f"AUC={best_auc:.4f} | EER={best_eer:.4f} | AP={best_ap:.4f}")


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/collate.py", line 211, in collate
    return [
           ^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/collate.py", line 212, in <listcomp>
    collate(samples, collate_fn_map=collate_fn_map)
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/collate.py", line 240, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>


In [None]:
# ===== CNN-Aug (EfficientNet-B4) — BOOSTED SEARCH (prints ONLY AUC|EER|AP) =====
REAL_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/fake"
WEIGHTS_PATH    = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"

BATCH_SIZE = 32
NUM_WORKERS = 0          # set 0 to avoid worker crashes with heavy I/O
IMG_SIZE   = 380

TRY_TTA       = [False, True]
TRY_NORM      = ["no_norm", "imagenet"]
TRY_PREPROC   = ["stretch", "short_center"]   # keep aspect ratio + center crop
TRY_CONF_FILT = [0.0, 0.1, 0.2, 0.3]
TRY_BLUR_THR  = [0, 50, 100]                  # drop very blurry frames (var(Laplacian))
TOPK_LIST     = [5, 10, 15]
TRIM_LIST     = [0.1, 0.2]
LSE_ALPHA     = [1.0]

CSV_PATH = "/content/cnn_aug_best_per_video.csv"

import os, sys, subprocess, glob, re, numpy as np, pandas as pd
from PIL import Image
import cv2
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# deps
def _pip_quiet(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs],
                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)

try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    _pip_quiet("efficientnet-pytorch==0.7.1")
    from efficientnet_pytorch import EfficientNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

# ----- Model -----
class CNN_AUG_EffB4(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

assert os.path.isfile(WEIGHTS_PATH), f"Weights not found: {WEIGHTS_PATH}"
model = CNN_AUG_EffB4().to(device)
state = torch.load(WEIGHTS_PATH, map_location="cpu")
if isinstance(state, dict) and all(isinstance(k,str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.","",1): v for k,v in state.items()}
model.load_state_dict(state, strict=False)
model.eval()
softmax = torch.nn.Softmax(dim=1)

# ----- Data -----
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\-]\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

def build_transform(norm, preproc):
    if preproc == "stretch":
        t_base = [transforms.Resize((IMG_SIZE, IMG_SIZE))]
    else:  # short_center
        t_base = [transforms.Resize(IMG_SIZE), transforms.CenterCrop(IMG_SIZE)]
    t_norm = [] if norm == "no_norm" else [transforms.Normalize([0.485,0.456,0.406],
                                                                [0.229,0.224,0.225])]
    return transforms.Compose(t_base + [transforms.ToTensor()] + t_norm)

class FrameDataset(Dataset):
    def __init__(self, folders_labels, transform):
        files, labels = [], []
        for folder, lbl in folders_labels:
            f = sorted([p for p in glob.glob(os.path.join(folder, "*")) if is_img(p)])
            files += f; labels += [lbl]*len(f)
        self.files = files; self.labels = labels; self.t = transform
    def __len__(self): return len(self.files)
    def __getitem__(self, i):
        p = self.files[i]
        img = Image.open(p).convert("RGB")
        # blur metric (variance of Laplacian)
        g = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        blur = float(cv2.Laplacian(g, cv2.CV_64F).var())
        x = self.t(img)  # return tensor here (fixes PIL collate error)
        y = self.labels[i]
        vname = infer_video_name(p)
        return x, y, p, vname, blur

@torch.no_grad()
def score_frames(norm_kind="no_norm", tta=False, preproc="stretch"):
    transform = build_transform(norm_kind, preproc)
    ds = FrameDataset([(REAL_FRAMES_DIR,0),(FAKE_FRAMES_DIR,1)], transform)
    loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=torch.cuda.is_available())
    probs, labels, paths, vnames, blurs = [], [], [], [], []
    for xb, yb, pb, vb, b in loader:
        xb = xb.to(device, non_blocking=True)
        logits = (model(xb) + model(TF.hflip(xb))) / 2 if tta else model(xb)
        p_fake = softmax(logits)[:,1].detach().cpu().numpy()
        probs.append(p_fake); labels.append(yb.numpy())
        paths += list(pb); vnames += list(vb); blurs += list(b.numpy() if hasattr(b, "numpy") else b)
    probs = np.concatenate(probs); labels = np.concatenate(labels)
    return pd.DataFrame({"video_name": vnames,
                         "true_label": np.where(labels==1,"fake","real"),
                         "prob_fake": probs,
                         "blur": blurs,
                         "path": paths})

def video_metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, thr = roc_curve(labels, scores); fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[idx] + fnr[idx]) / 2.0)
    thr_eer = float(thr[idx])
    return auc, eer, ap, thr_eer

def trimmed_mean(vals, trim=0.1):
    if len(vals)==0: return np.nan
    k = int(len(vals)*trim); vals = np.sort(vals)
    if k*2 >= len(vals): return float(np.mean(vals))
    return float(np.mean(vals[k:len(vals)-k]))

def logsumexp_pool(vals, alpha=1.0):
    eps=1e-6
    logits = np.log(np.clip(vals,eps,1-eps)) - np.log(np.clip(1-vals,eps,1-eps))
    m = np.max(alpha*logits); lse = m + np.log(np.mean(np.exp(alpha*logits - m)))
    pooled_logit = lse/alpha
    return 1/(1+np.exp(-pooled_logit))

# ---- cache frame scores per (norm, tta, preproc)
cache = {}
for norm in TRY_NORM:
    for tta in TRY_TTA:
        for pre in TRY_PREPROC:
            cache[(norm, tta, pre)] = score_frames(norm, tta, pre)

# ---- search best per-video config
best = None  # (AUC, EER, AP, thr, desc, per_video_df)
for (norm, tta, pre), df in cache.items():
    # auto-orient using per-video avg AUC
    avg_df = df.groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
    y_avg = (avg_df["true_label"]=="fake").astype(int).values
    s_avg = avg_df["prob_fake"].values
    flip_needed = roc_auc_score(y_avg, 1 - s_avg) > roc_auc_score(y_avg, s_avg)
    if flip_needed:
        df_use = df.copy(); df_use["prob_fake"] = 1 - df_use["prob_fake"]
    else:
        df_use = df

    for blur_thr in TRY_BLUR_THR:
        df_b = df_use[df_use["blur"] >= blur_thr] if blur_thr > 0 else df_use

        for filt in TRY_CONF_FILT:
            if filt > 0:
                df_f = df_b[np.abs(df_b["prob_fake"] - 0.5) >= filt].copy()
                missing = set(df_b["video_name"].unique()) - set(df_f["video_name"].unique())
                if missing:
                    df_f = pd.concat([df_f, df_b[df_b["video_name"].isin(missing)]], ignore_index=True)
            else:
                df_f = df_b

            grouped = df_f.groupby(["video_name","true_label"])["prob_fake"]

            # median
            med = grouped.median().reset_index()
            y, s = (med["true_label"]=="fake").astype(int).values, med["prob_fake"].values
            auc, eer, ap, thr = video_metrics(s, y)
            desc = f"norm={norm}|tta={tta}|flip={flip_needed}|pre={pre}|agg=median|conf={filt}|blur>={blur_thr}"
            if (best is None) or (auc > best[0]) or (auc==best[0] and eer < best[1]):
                best = (auc, eer, ap, thr, desc, med)

            # perc80
            perc = grouped.quantile(0.8).reset_index()
            y, s = (perc["true_label"]=="fake").astype(int).values, perc["prob_fake"].values
            auc_p, eer_p, ap_p, thr_p = video_metrics(s, y)
            if (auc_p > best[0]) or (auc_p==best[0] and eer_p < best[1]):
                best = (auc_p, eer_p, ap_p, thr_p, f"...perc80...", perc)

            # top-k means
            tmp = df_f.copy(); tmp["rank"] = tmp.groupby("video_name")["prob_fake"].rank(ascending=False, method="first")
            for k in TOPK_LIST:
                topk = tmp[tmp["rank"] <= k].groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
                if len(topk)==0: continue
                y, s = (topk["true_label"]=="fake").astype(int).values, topk["prob_fake"].values
                auc_k, eer_k, ap_k, thr_k = video_metrics(s, y)
                if (auc_k > best[0]) or (auc_k==best[0] and eer_k < best[1]):
                    best = (auc_k, eer_k, ap_k, thr_k, f"...top{k}...", topk)

            # trimmed mean
            for trim in TRIM_LIST:
                tdf = grouped.apply(lambda v: trimmed_mean(v.values, trim)).reset_index(name="prob_fake").dropna()
                if len(tdf)==0: continue
                y, s = (tdf["true_label"]=="fake").astype(int).values, tdf["prob_fake"].values
                auc_t, eer_t, ap_t, thr_t = video_metrics(s, y)
                if (auc_t > best[0]) or (auc_t==best[0] and eer_t < best[1]):
                    best = (auc_t, eer_t, ap_t, thr_t, f"...trim{int(trim*100)}...", tdf)

            # log-sum-exp
            for a in LSE_ALPHA:
                lsed = grouped.apply(lambda v: logsumexp_pool(v.values, alpha=a)).reset_index(name="prob_fake")
                y, s = (lsed["true_label"]=="fake").astype(int).values, lsed["prob_fake"].values
                auc_l, eer_l, ap_l, thr_l = video_metrics(s, y)
                if (auc_l > best[0]) or (auc_l==best[0] and eer_l < best[1]):
                    best = (auc_l, eer_l, ap_l, thr_l, f"...lsep{a}...", lsed)

# save silently
best_auc, best_eer, best_ap, best_thr, best_desc, best_df = best
try:
    best_df.to_csv(CSV_PATH, index=False)
except Exception:
    pass

print(f"AUC={best_auc:.4f} | EER={best_eer:.4f} | AP={best_ap:.4f}")


KeyboardInterrupt: 

In [None]:
# ===== CNN-Aug (EfficientNet-B4) — FAST SEARCH → PRINT METRICS ONLY =====
# Uses frames & weights already in Drive (no mount here).
REAL_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/fake"
WEIGHTS_PATH    = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"

BATCH_SIZE = 32
NUM_WORKERS = 0       # 0 is more stable in Colab
IMG_SIZE   = 380

# Small, effective search space
TRY_TTA       = [False, True]            # 2
TRY_NORM      = ["no_norm", "imagenet"]  # 2
TRY_PREPROC   = ["short_center"]         # 1 (better than stretch on faces)
TRY_CONF_FILT = [0.2, 0.3]               # 2
# Aggregations: 4 → total combos = 2*2*1*2*4 = 16
AGGS = ("median", "perc80", "top10", "trim10")

import os, glob, re, sys, subprocess, numpy as np, pandas as pd
from PIL import Image
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# deps
def _pip_quiet(*pkgs):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *pkgs],
                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)

try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    _pip_quiet("efficientnet-pytorch==0.7.1")
    from efficientnet_pytorch import EfficientNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

# ----- Model -----
class CNN_AUG_EffB4(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

assert os.path.isfile(WEIGHTS_PATH), f"Weights not found: {WEIGHTS_PATH}"
model = CNN_AUG_EffB4().to(device)
state = torch.load(WEIGHTS_PATH, map_location="cpu")
if isinstance(state, dict) and all(isinstance(k,str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.","",1): v for k,v in state.items()}
model.load_state_dict(state, strict=False)
model.eval()
softmax = torch.nn.Softmax(dim=1)

# ----- Data -----
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\-]\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

def build_transform(norm, preproc):
    if preproc == "short_center":
        t_base = [transforms.Resize(IMG_SIZE), transforms.CenterCrop(IMG_SIZE)]
    else:  # fallback
        t_base = [transforms.Resize((IMG_SIZE, IMG_SIZE))]
    t_norm = [] if norm == "no_norm" else [transforms.Normalize([0.485,0.456,0.406],
                                                                [0.229,0.224,0.225])]
    return transforms.Compose(t_base + [transforms.ToTensor()] + t_norm)

class FrameDataset(Dataset):
    def __init__(self, folders_labels, transform):
        files, labels = [], []
        for folder, lbl in folders_labels:
            f = sorted([p for p in glob.glob(os.path.join(folder, "*")) if is_img(p)])
            files += f; labels += [lbl]*len(f)
        self.files = files; self.labels = labels; self.t = transform
    def __len__(self): return len(self.files)
    def __getitem__(self, i):
        p = self.files[i]
        img = Image.open(p).convert("RGB")
        return self.t(img), self.labels[i], p, infer_video_name(p)

@torch.no_grad()
def score_frames(norm_kind="no_norm", tta=False, preproc="short_center"):
    t = build_transform(norm_kind, preproc)
    ds = FrameDataset([(REAL_FRAMES_DIR,0),(FAKE_FRAMES_DIR,1)], t)
    loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=torch.cuda.is_available())
    probs, labels, paths, vnames = [], [], [], []
    for xb, yb, pb, vb in loader:
        xb = xb.to(device, non_blocking=True)
        logits = (model(xb) + model(TF.hflip(xb))) / 2 if tta else model(xb)
        p_fake = softmax(logits)[:,1].detach().cpu().numpy()
        probs.append(p_fake); labels.append(yb.numpy()); paths += list(pb); vnames += list(vb)
    probs = np.concatenate(probs); labels = np.concatenate(labels)
    return pd.DataFrame({"video_name": vnames,
                         "true_label": np.where(labels==1,"fake","real"),
                         "prob_fake": probs,
                         "path": paths})

def video_metrics(scores, labels):
    auc = roc_auc_score(labels, scores)
    ap  = average_precision_score(labels, scores)
    fpr, tpr, thr = roc_curve(labels, scores); fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    eer = float((fpr[idx] + fnr[idx]) / 2.0)
    thr_eer = float(thr[idx])
    return auc, eer, ap, thr_eer

# cache per (norm, tta, preproc) — only 4 combos here
cache = {}
for norm in TRY_NORM:
    for tta in TRY_TTA:
        for pre in TRY_PREPROC:
            cache[(norm, tta, pre)] = score_frames(norm, tta, pre)

# small, strong search over filters + aggs
best = None  # (AUC, EER, AP, thr, desc, per_video_df)
for (norm, tta, pre), df in cache.items():
    # auto-orient: flip scores if it improves per-video average AUC
    avg_df = df.groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
    y_avg = (avg_df["true_label"]=="fake").astype(int).values
    s_avg = avg_df["prob_fake"].values
    flip_needed = roc_auc_score(y_avg, 1 - s_avg) > roc_auc_score(y_avg, s_avg)
    if flip_needed:
        df_use = df.copy(); df_use["prob_fake"] = 1 - df_use["prob_fake"]
    else:
        df_use = df

    for filt in TRY_CONF_FILT:
        if filt > 0:
            df_f = df_use[np.abs(df_use["prob_fake"] - 0.5) >= filt].copy()
            missing = set(df_use["video_name"].unique()) - set(df_f["video_name"].unique())
            if missing:
                df_f = pd.concat([df_f, df_use[df_use["video_name"].isin(missing)]], ignore_index=True)
        else:
            df_f = df_use

        grouped = df_f.groupby(["video_name","true_label"])["prob_fake"]

        # median
        if "median" in AGGS:
            med = grouped.median().reset_index()
            y, s = (med["true_label"]=="fake").astype(int).values, med["prob_fake"].values
            auc, eer, ap, thr = video_metrics(s, y)
            cand = (auc, eer, ap, thr, f"{norm}|{tta}|flip={flip_needed}|median|f={filt}", med)
            best = cand if (best is None or auc > best[0] or (auc==best[0] and eer < best[1])) else best

        # perc80
        if "perc80" in AGGS:
            perc = grouped.quantile(0.8).reset_index()
            y, s = (perc["true_label"]=="fake").astype(int).values, perc["prob_fake"].values
            auc_p, eer_p, ap_p, thr_p = video_metrics(s, y)
            cand = (auc_p, eer_p, ap_p, thr_p, f"{norm}|{tta}|flip={flip_needed}|perc80|f={filt}", perc)
            best = cand if (auc_p > best[0] or (auc_p==best[0] and eer_p < best[1])) else best

        # top10 mean
        if "top10" in AGGS:
            tmp = df_f.copy(); tmp["rank"] = tmp.groupby("video_name")["prob_fake"].rank(ascending=False, method="first")
            topk = tmp[tmp["rank"] <= 10].groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
            if len(topk):
                y, s = (topk["true_label"]=="fake").astype(int).values, topk["prob_fake"].values
                auc_k, eer_k, ap_k, thr_k = video_metrics(s, y)
                cand = (auc_k, eer_k, ap_k, thr_k, f"{norm}|{tta}|flip={flip_needed}|top10|f={filt}", topk)
                best = cand if (auc_k > best[0] or (auc_k==best[0] and eer_k < best[1])) else best

        # trimmed mean 10%
        if "trim10" in AGGS:
            tdf = grouped.apply(lambda v: float(np.mean(np.sort(v.values)[int(0.1*len(v)): max(int(len(v)-0.1*len(v)),1)])) if len(v)>0 else np.nan).reset_index(name="prob_fake").dropna()
            if len(tdf):
                y, s = (tdf["true_label"]=="fake").astype(int).values, tdf["prob_fake"].values
                auc_t, eer_t, ap_t, thr_t = video_metrics(s, y)
                cand = (auc_t, eer_t, ap_t, thr_t, f"{norm}|{tta}|flip={flip_needed}|trim10|f={filt}", tdf)
                best = cand if (auc_t > best[0] or (auc_t==best[0] and eer_t < best[1])) else best

# Print ONLY metrics
best_auc, best_eer, best_ap, best_thr, best_desc, best_df = best
print(f"AUC={best_auc:.4f} | EER={best_eer:.4f} | AP={best_ap:.4f}")


AUC=0.7536 | EER=0.3431 | AP=0.7700


In [None]:
# ================= CNN-Aug (EfficientNet-B4) — FULL PER-VIDEO TABLE =================
# Mount Drive, load model/weights, score frames, build table, print ALL rows, save CSV.

# --- Config (edit paths if needed) ---
REAL_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/real"
FAKE_FRAMES_DIR = "/content/drive/My Drive/balanced_frames/fake"
WEIGHTS_PATH    = "/content/drive/My Drive/DeepfakeBench_weights/effnb4_best.pth"

DATASET_NAME  = "balanced_ffpp"
DETECTOR_NAME = "CNN-Aug (EffB4)"
SAVE_CSV_PATH = "/content/drive/My Drive/cnn_aug_per_video_table.csv"

# Inference knobs
IMG_SIZE    = 380
BATCH_SIZE  = 32
NUM_WORKERS = 0            # 0 is most stable in Colab
USE_TTA     = True         # average original + hflip
RESIZE_MODE = "short_center"   # "short_center" (preserve aspect) or "stretch"
USE_IMAGENET_NORM = False      # "no_norm" often worked better for your EffNet

# -----------------------------------------------------------------------------------
import os, glob, re, numpy as np, pandas as pd
from PIL import Image
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import functional as TF
from sklearn.metrics import roc_curve, roc_auc_score, average_precision_score

# Mount Drive (ok to mount again)
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

# Ensure package
try:
    from efficientnet_pytorch import EfficientNet
except Exception:
    import sys, subprocess
    subprocess.run([sys.executable,"-m","pip","install","-q","efficientnet-pytorch==0.7.1"], check=True)
    from efficientnet_pytorch import EfficientNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

# --- Model (EfficientNet-B4 with 2-class head) ---
class CNN_AUG_EffB4(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Module()
        self.backbone.efficientnet = EfficientNet.from_name('efficientnet-b4')
        self.backbone.efficientnet._fc = nn.Identity()
        self.backbone.last_layer = nn.Linear(1792, 2)  # [real, fake]
    def forward(self, x):
        x = self.backbone.efficientnet(x)
        x = self.backbone.last_layer(x)
        return x

assert os.path.isfile(WEIGHTS_PATH), f"Weights not found: {WEIGHTS_PATH}"
assert os.path.isdir(REAL_FRAMES_DIR) and os.path.isdir(FAKE_FRAMES_DIR), "Check frame folders."

model = CNN_AUG_EffB4().to(device)
state = torch.load(WEIGHTS_PATH, map_location="cpu")
if isinstance(state, dict) and all(isinstance(k,str) for k in state.keys()):
    if all(k.startswith("module.") for k in state.keys()):
        state = {k.replace("module.","",1): v for k,v in state.items()}
model.load_state_dict(state, strict=False)
model.eval()
softmax = torch.nn.Softmax(dim=1)

# --- Data / transforms ---
IMG_EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
def is_img(p): return p.lower().endswith(IMG_EXTS)

def infer_video_name(path):
    stem = os.path.splitext(os.path.basename(path))[0]
    m = re.split(r"_frame\d+$", stem)
    if len(m) > 1 and m[0]: return m[0]
    m2 = re.sub(r"[_\-]\d+$", "", stem)
    return m2 if m2 and m2 != stem else stem

def build_transform():
    if RESIZE_MODE == "short_center":
        t_base = [transforms.Resize(IMG_SIZE), transforms.CenterCrop(IMG_SIZE)]
    else:  # "stretch"
        t_base = [transforms.Resize((IMG_SIZE, IMG_SIZE))]
    t_norm = [] if not USE_IMAGENET_NORM else [transforms.Normalize([0.485,0.456,0.406],
                                                                    [0.229,0.224,0.225])]
    return transforms.Compose(t_base + [transforms.ToTensor()] + t_norm)

class FrameDataset(Dataset):
    def __init__(self, folders_labels, transform):
        files, labels = [], []
        for folder, lbl in folders_labels:
            f = sorted([p for p in glob.glob(os.path.join(folder, "*")) if is_img(p)])
            files += f; labels += [lbl]*len(f)
        self.files = files; self.labels = labels; self.t = transform
        assert len(self.files) > 0, "No images found. Check your frame folders."
    def __len__(self): return len(self.files)
    def __getitem__(self, i):
        p = self.files[i]
        img = Image.open(p).convert("RGB")
        return self.t(img), self.labels[i], p, infer_video_name(p)

tform = build_transform()
ds = FrameDataset([(REAL_FRAMES_DIR,0),(FAKE_FRAMES_DIR,1)], tform)
loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False,
                    num_workers=NUM_WORKERS, pin_memory=torch.cuda.is_available())

# --- Batched inference ---
probs, labels, paths, vnames = [], [], [], []
with torch.no_grad():
    for xb, yb, pb, vb in loader:
        xb = xb.to(device, non_blocking=True)
        logits = (model(xb) + model(TF.hflip(xb))) / 2 if USE_TTA else model(xb)
        p_fake = softmax(logits)[:,1].detach().cpu().numpy()
        probs.append(p_fake); labels.append(yb.numpy()); paths += list(pb); vnames += list(vb)

probs = np.concatenate(probs); labels = np.concatenate(labels)

df = pd.DataFrame({
    "video_name": vnames,
    "true_label": np.where(labels==1, "fake", "real"),
    "prob_fake": probs,
    "frame_path": paths
})

# --- Auto-orient scores (flip if it improves per-video avg AUC) ---
vid_avg_tmp = df.groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
y_tmp = (vid_avg_tmp["true_label"]=="fake").astype(int).values
s_tmp = vid_avg_tmp["prob_fake"].values
if roc_auc_score(y_tmp, 1 - s_tmp) > roc_auc_score(y_tmp, s_tmp):
    df["prob_fake"] = 1 - df["prob_fake"]

# --- Thresholds ---
# 1) Per-video AVG threshold: EER on per-video averages
vid_avg = df.groupby(["video_name","true_label"])["prob_fake"].mean().reset_index()
y_v = (vid_avg["true_label"]=="fake").astype(int).values
s_v = vid_avg["prob_fake"].values
fpr_v, tpr_v, thr_v = roc_curve(y_v, s_v); fnr_v = 1 - tpr_v
i_v = int(np.nanargmin(np.abs(fnr_v - fpr_v)))
thr_avg = float(thr_v[i_v])

# 2) Frame threshold for majority vote: EER on all frame scores
y_f = (df["true_label"]=="fake").astype(int).values
s_f = df["prob_fake"].values
fpr_f, tpr_f, thr_f = roc_curve(y_f, s_f); fnr_f = 1 - tpr_f
i_f = int(np.nanargmin(np.abs(fnr_f - fpr_f)))
thr_frame = float(thr_f[i_f])

# --- Frame-level predictions for counts (majority threshold) ---
df["frame_pred"]    = np.where(df["prob_fake"] >= thr_frame, "fake", "real")
df["frame_correct"] = (df["frame_pred"] == df["true_label"]).astype(int)

# --- Summarize per video (your requested columns) ---
def summarize_video(group):
    n = len(group)
    n_correct = int(group["frame_correct"].sum())
    n_wrong   = int(n - n_correct)
    acc = n_correct / n if n>0 else np.nan
    avg = float(group["prob_fake"].mean()) if n>0 else np.nan
    std = float(group["prob_fake"].std(ddof=0)) if n>1 else 0.0

    # AVG-based decision at thr_avg
    pred_avg = "fake" if avg >= thr_avg else "real"
    correct_avg = int(pred_avg == group["true_label"].iloc[0])

    # Majority decision at thr_frame
    majority_ratio = (group["frame_pred"] == "fake").mean()
    if majority_ratio == 0.5:
        pred_maj = pred_avg
    else:
        pred_maj = "fake" if majority_ratio > 0.5 else "real"
    correct_maj = int(pred_maj == group["true_label"].iloc[0])

    return pd.Series({
        "dataset": DATASET_NAME,
        "detector": DETECTOR_NAME,
        "video_name": group["video_name"].iloc[0],
        "true_label": group["true_label"].iloc[0],
        "n_frames": n,
        "n_correct_frames": n_correct,
        "n_wrong_frames": n_wrong,
        "frame_accuracy": round(acc, 4),
        "avg_prob_fake": round(avg, 4),
        "std_prob_fake": round(std, 4),
        "video_pred_by_avg": pred_avg,
        "video_correct_by_avg": correct_avg,
        "video_pred_by_majority": pred_maj,
        "video_correct_by_majority": correct_maj
    })

per_video = df.groupby(["video_name","true_label"], as_index=False).apply(summarize_video).reset_index(drop=True)

# --- Print ALL rows only (no extra chatter) ---
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
print(per_video.sort_values(["true_label","video_name"]).to_string(index=False))

# --- Save to Drive ---
per_video.to_csv(SAVE_CSV_PATH, index=False)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
      dataset        detector                            video_name true_label  n_frames  n_correct_frames  n_wrong_frames  frame_accuracy  avg_prob_fake  std_prob_fake video_pred_by_avg  video_correct_by_avg video_pred_by_majority  video_correct_by_majority
balanced_ffpp CNN-Aug (EffB4)                               000_003       fake        20                20               0            1.00         0.4766         0.0710              fake                     1                   fake                          1
balanced_ffpp CNN-Aug (EffB4)                               010_005       fake        20                19               1            0.95         0.5441         0.1119              fake                     1                   fake                          1
balanced_ffpp CNN-Aug (EffB4)                               011_805       fake        20      

  per_video = df.groupby(["video_name","true_label"], as_index=False).apply(summarize_video).reset_index(drop=True)


In [None]:
# Save your per-video table to Google Drive
import os, time
from google.colab import drive

# Make sure Drive is mounted (safe to call again)
drive.mount('/content/drive', force_remount=False)

# Ensure the table exists
assert 'per_video' in globals(), "Run the previous cell first to create the 'per_video' DataFrame."

# Choose where/what to save
DRIVE_DIR = "/content/drive/My Drive/deepfake_results"
os.makedirs(DRIVE_DIR, exist_ok=True)

FILENAME = "cnn_aug_per_video_table.csv"
# Or timestamped:
# FILENAME = f"cnn_aug_per_video_table_{time.strftime('%Y%m%d-%H%M%S')}.csv"

SAVE_PATH = os.path.join(DRIVE_DIR, FILENAME)
per_video.to_csv(SAVE_PATH, index=False)
print("Saved to:", SAVE_PATH)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved to: /content/drive/My Drive/deepfake_results/cnn_aug_per_video_table.csv


In [None]:
# === Compact per-video table: dataset, detector, video_name, true_label, correctly_predicted (yes/no) ===
import numpy as np, pandas as pd
from sklearn.metrics import roc_curve

# Use the per_video table from the previous step
assert 'per_video' in globals(), "Run the CNN-Aug per-video table cell first to create 'per_video'."

pv = per_video.copy()

# If the table doesn't already have correctness by average, compute it via per-video EER on avg_prob_fake
if "video_correct_by_avg" not in pv.columns or "video_pred_by_avg" not in pv.columns:
    y = (pv["true_label"] == "fake").astype(int).values
    s = pv["avg_prob_fake"].values
    fpr, tpr, thr = roc_curve(y, s)
    fnr = 1 - tpr
    idx = int(np.nanargmin(np.abs(fnr - fpr)))
    thr_use = float(thr[idx])
    pv["video_pred_by_avg"] = np.where(pv["avg_prob_fake"] >= thr_use, "fake", "real")
    pv["video_correct_by_avg"] = (pv["video_pred_by_avg"] == pv["true_label"]).astype(int)

# Build compact table
cols = {
    "dataset": pv["dataset"] if "dataset" in pv.columns else "balanced_ffpp",
    "detector": pv["detector"] if "detector" in pv.columns else "CNN-Aug (EffB4)",
    "video_name": pv["video_name"],
    "true_label": pv["true_label"],
    "correctly_predicted": pv["video_correct_by_avg"].map({1: "yes", 0: "no"})
}
out = pd.DataFrame(cols)

# Print ONLY the table (all rows)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
print(out.sort_values(["true_label","video_name"]).to_string(index=False))


      dataset        detector                            video_name true_label correctly_predicted
balanced_ffpp CNN-Aug (EffB4)                               000_003       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               010_005       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               011_805       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               012_026       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               013_883       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               014_790       fake                  no
balanced_ffpp CNN-Aug (EffB4)                               015_919       fake                 yes
balanced_ffpp CNN-Aug (EffB4)                               016_209       fake                  no
balanced_ffpp CNN-Aug (EffB4)                               017_803       fake                 yes
balanced_f

In [None]:
# Save the compact table (DataFrame `out`) to Google Drive
import os
from google.colab import drive

# Ensure Drive is mounted and the table exists
drive.mount('/content/drive', force_remount=False)
assert 'out' in globals(), "Run the previous cell to create the compact table 'out'."

SAVE_DIR = "/content/drive/My Drive/CNN Aug results"
os.makedirs(SAVE_DIR, exist_ok=True)
FILENAME = "cnn_aug_prediction_compact.csv"  # change name if you like

path = os.path.join(SAVE_DIR, FILENAME)
out.to_csv(path, index=False)
print("Saved to:", path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved to: /content/drive/My Drive/CNN Aug results/cnn_aug_prediction_compact.csv
