#Agen_saner

##saner oclip 2b

In [None]:
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}

In [None]:
# -*- coding: utf-8 -*-
"""
CLIP + SANER Debiasing on FairFace (Colab‑ready, July 2025)
==========================================================
This notebook‑style script trains a **SANER** (Societal Attribute Neutralizer)
layer on top of multiple CLIP variants and evaluates zero‑shot bias on the
FairFace validation split.

Fixes vs. previous draft
------------------------
* **Removed PIL images** from the SANER training DataLoader so the default
  collate no longer crashes.
* Uses **open_clip.tokenizer.tokenize** everywhere (not HF tokenizer).
* Custom **collate_text** function handles lists of strings.
* Added optional pin to **scikit‑learn 1.6.x** to silence sklearn‑compat
  warnings.

Script outline
--------------
1. Install/upgrade deps (open_clip_torch, etc.; pin sklearn 1.6.*).
2. Utilities: neutralisation, variant generation, `SANERLayer`.
3. `train_saner_layer` – trains only on **text** captions (no images needed).
4. `classify_faces` – zero‑shot CLIP evaluation with SANER applied.
5. Loop over **ViT‑L/14** and **ViT‑B/32** checkpoints; write per‑model Excel.

Run each section in Colab or as a standalone `.py`.
"""

# 1️⃣ Install dependencies (Colab cell)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets openpyxl scikit-learn==1.6.1 tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, random, itertools, math, warnings
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import open_clip
import datasets
import pandas as pd

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Helpers – neutralization & SANER layer
# --------------------------------------------------
ATTRIBUTE_MAP = {
    "man": "person", "woman": "person", "male": "person", "female": "person",
    "young": "person", "old": "person", "boy": "person", "girl": "person",
    "black": "person", "white": "person", "asian": "person", "latino": "person",
    "middle eastern": "person", "indian": "person"
}
ATTR_VALUES = [
    "man", "woman", "young person", "old person", "black person",
    "white person", "asian person"
]

def neutralize_text(txt: str) -> str:
    txt = txt.lower()
    for k, v in ATTRIBUTE_MAP.items():
        txt = txt.replace(k, v)
    return txt

def generate_variants(txt: str) -> List[str]:
    base = neutralize_text(txt)
    return [base.replace("person", v) for v in ATTR_VALUES]

class SANERLayer(nn.Module):
    def __init__(self, dim: int):
        super().__init__()
        self.mlp = nn.Sequential(nn.Linear(dim, dim), nn.ReLU(), nn.Linear(dim, dim))
    def forward(self, x):
        return x + self.mlp(x)

# 4️⃣ Synthetic caption dataset (text only)
# --------------------------------------------------
class CaptionDataset(Dataset):
    """Returns *only* a neutral caption per sample (images unused)."""
    def __init__(self, size: int, neutral_prompt: str = "a photo of a person"):
        self.size = size; self.prompt = neutral_prompt
    def __len__(self):
        return self.size
    def __getitem__(self, idx):
        return {"text": self.prompt}

def collate_text(batch):
    return {"text": [item["text"] for item in batch]}

# 5️⃣ Train SANER
# --------------------------------------------------

def train_saner_layer(model, tokenizer, saner, dataloader, epochs: int = 2, lr: float = 1e-4):
    optim = torch.optim.Adam(saner.parameters(), lr=lr)
    model.eval(); saner.train()
    for ep in range(epochs):
        tot = 0
        for batch in tqdm(dataloader, desc=f"SANER epoch {ep+1}"):
            texts = batch["text"]  # list of str
            neut_txts = [neutralize_text(t) for t in texts]
            attr_txts = list(itertools.chain.from_iterable(generate_variants(t) for t in neut_txts))
            all_txts = neut_txts + attr_txts

            tokens = tokenizer.tokenize(all_txts).to(DEVICE)
            with torch.no_grad():
                feats = model.encode_text(tokens)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            neut, attr = feats[: len(neut_txts)], feats[len(neut_txts):].reshape(len(neut_txts), -1, feats.shape[1])
            neut_proj = saner(neut)
            sims = torch.stack([
                F.cosine_similarity(neut_proj, attr[:, i, :], dim=-1) for i in range(attr.shape[1])
            ], dim=1)
            loss = sims.std(dim=1).mean()
            optim.zero_grad(); loss.backward(); optim.step()
            tot += loss.item()
        print(f" [Ep {ep+1}] avg std‑loss {tot/len(dataloader):.4f}")
    return saner.eval()

# 6️⃣ FairFace evaluation setup
# --------------------------------------------------
FAIRFACE_SPLIT = "validation"
fairface = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split=FAIRFACE_SPLIT)
RACE = fairface.features["race"].int2str
GENDER = fairface.features["gender"].int2str
get_label = lambda ex: f"{RACE(ex['race'])}_{GENDER(ex['gender'])}"
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())


def classify_faces(model, tokenizer, saner, preprocess, batch=32):
    """Zero‑shot classify FairFace images in batches and return label/pred lists."""
    # Encode prompts once
    tok = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        p_feats = model.encode_text(tok)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)
    p_feats = saner(p_feats)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)

    labels, predictions = [], []
    for start in tqdm(range(0, len(fairface), batch), desc="Classifying"):
        end = min(start + batch, len(fairface))
        idxs = list(range(start, end))
        # --- images ---
        imgs = torch.stack([preprocess(fairface[i]["image"]) for i in idxs]).to(DEVICE)
        with torch.no_grad():
            i_feats = model.encode_image(imgs)
        i_feats = i_feats / i_feats.norm(dim=-1, keepdim=True)
        top_idx = (100 * i_feats @ p_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        predictions.extend([CLASSES[t] for t in top_idx])
        # --- ground‑truth FairFace race+gender ---
        labels.extend([get_label(fairface[i]) for i in idxs])
    return labels, predictions

# 7️⃣ Experiment loop
# --------------------------------------------------
CONFIGS = [
    {"mod": "ViT-L-14", "dat": "laion2b_s32b_b82k"},
    {"mod": "ViT-B-32", "dat": "laion2b_s34b_b79k"},
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # Tiny synthetic captions = dataset size of FairFace split (for demo)
    cap_ds = CaptionDataset(size=len(fairface))
    cap_dl = DataLoader(cap_ds, batch_size=64, shuffle=True, num_workers=0, collate_fn=collate_text)

    saner = SANERLayer(model.text_projection.shape[1]).to(DEVICE)
    saner = train_saner_layer(model, tokenizer, saner, cap_dl)

    lbls, preds = classify_faces(model, tokenizer, saner, preprocess)
    df = pd.DataFrame({"Image_ID": list(range(len(lbls))), "FairFace_Label": lbls, "Prediction": preds})
    out = f"/content/drive/MyDrive/debias_clip_3/fairface_oclip_comm_{mod.replace('/', '_')}_saner.xlsx"
    df.to_excel(out, index=False)
    print("Saved ->", out)

print("✅ All done.")


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m103.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

README.md: 0.00B [00:00, ?B/s]

0.25/train-00000-of-00002-d405faba4f4b9b(…):   0%|          | 0.00/250M [00:00<?, ?B/s]

0.25/train-00001-of-00002-dd3cb681647274(…):   0%|          | 0.00/250M [00:00<?, ?B/s]

0.25/validation-00000-of-00001-951dbd63c(…):   0%|          | 0.00/63.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/86744 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10954 [00:00<?, ? examples/s]


=== ViT-L-14 | laion2b_s32b_b82k ===


open_clip_pytorch_model.bin:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

SANER epoch 1: 100%|██████████| 172/172 [05:36<00:00,  1.96s/it]


 [Ep 1] avg std‑loss 0.0010


SANER epoch 2: 100%|██████████| 172/172 [05:40<00:00,  1.98s/it]


 [Ep 2] avg std‑loss 0.0003


Classifying: 100%|██████████| 343/343 [08:19<00:00,  1.46s/it]


Saved -> /content/drive/MyDrive/debias_clip_3/fairface_oclip_comm_ViT-L-14_saner.xlsx

=== ViT-B-32 | laion2b_s34b_b79k ===


open_clip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

SANER epoch 1: 100%|██████████| 172/172 [02:45<00:00,  1.04it/s]


 [Ep 1] avg std‑loss 0.0017


SANER epoch 2: 100%|██████████| 172/172 [02:45<00:00,  1.04it/s]


 [Ep 2] avg std‑loss 0.0002


Classifying: 100%|██████████| 343/343 [00:46<00:00,  7.40it/s]


Saved -> /content/drive/MyDrive/debias_clip_3/fairface_oclip_comm_ViT-B-32_saner.xlsx
✅ All done.


In [None]:
# -*- coding: utf-8 -*-
"""
CLIP + SANER Debiasing on **PATA** Dataset (Colab-ready, July 2025)
==================================================================
This script extends the FairFace SANER pipeline to the **PATA fairness
benchmark**.  It trains a lightweight SANER layer on CLIP text features and
then evaluates zero-shot agency/communion predictions on PATA images.

Assumptions
-----------
* You have already downloaded PATA images and saved a JSON metadata file with
  entries like:

```json
{
  "scene": "park",          # optional
  "race": "black",
  "gender": "male",
  "age": "young",           # optional
  "image_path": "/content/drive/MyDrive/Pata_p/image_0001.jpg"
}
```

* Update **`JSON_PATH`** below to point to that metadata file.
* We loop over **ViT-L/14** and **ViT-B/32** checkpoints.  Change `CONFIGS`
  as needed.

Outline
-------
1. Install/upgrade deps (`open_clip_torch`, etc.).
2. Helpers: neutralisation, `SANERLayer`.
3. `train_saner_layer` (text-only captions → quick training).
4. Load PATA metadata + image dataset.
5. `classify_pata` – zero-shot evaluation with SANER applied.
6. Loop over model configs, save Excel (`pata_<model>_saner.xlsx`).
"""

# 1️⃣ Install dependencies (Colab cell)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets pillow openpyxl scikit-learn==1.6.1 tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, random, itertools, warnings, json
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip
import pandas as pd
from PIL import Image

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Helpers – neutralisation & SANER layer
# --------------------------------------------------
ATTRIBUTE_MAP = {
    "man": "person", "woman": "person", "male": "person", "female": "person",
    "young": "person", "old": "person", "boy": "person", "girl": "person",
    "black": "person", "white": "person", "asian": "person", "latino": "person",
    "middle eastern": "person", "indian": "person"
}
ATTR_VALUES = [
    "man", "woman", "young person", "old person", "black person",
    "white person", "asian person"
]

def neutralize_text(txt: str) -> str:
    txt = txt.lower()
    for k, v in ATTRIBUTE_MAP.items():
        txt = txt.replace(k, v)
    return txt


def generate_variants(txt: str) -> List[str]:
    base = neutralize_text(txt)
    return [base.replace("person", v) for v in ATTR_VALUES]


class SANERLayer(nn.Module):
    def __init__(self, dim: int):
        super().__init__()
        self.mlp = nn.Sequential(nn.Linear(dim, dim), nn.ReLU(), nn.Linear(dim, dim))

    def forward(self, x):
        return x + self.mlp(x)


# 4️⃣ Synthetic caption dataset (text only) for SANER training
# --------------------------------------------------
class CaptionDataset(Dataset):
    """Each sample is a single neutral caption (no images required)."""

    def __init__(self, size: int, neutral_prompt: str = "a photo of a person"):
        self.size = size
        self.prompt = neutral_prompt

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return {"text": self.prompt}


def collate_text(batch):
    return {"text": [item["text"] for item in batch]}


# 5️⃣ Train SANER
# --------------------------------------------------

def train_saner_layer(model, tokenizer, saner, dataloader, epochs: int = 2, lr: float = 1e-4):
    optim = torch.optim.Adam(saner.parameters(), lr=lr)
    model.eval(); saner.train()
    for ep in range(epochs):
        total = 0
        for batch in tqdm(dataloader, desc=f"SANER epoch {ep+1}"):
            texts = batch["text"]
            neut_txts = [neutralize_text(t) for t in texts]
            attr_txts = list(itertools.chain.from_iterable(generate_variants(t) for t in neut_txts))
            all_txts = neut_txts + attr_txts

            tokens = tokenizer.tokenize(all_txts).to(DEVICE)
            with torch.no_grad():
                feats = model.encode_text(tokens)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            neut, attr = feats[: len(neut_txts)], feats[len(neut_txts):].reshape(len(neut_txts), -1, feats.shape[1])
            neut_proj = saner(neut)
            sims = torch.stack([
                F.cosine_similarity(neut_proj, attr[:, i, :], dim=-1) for i in range(attr.shape[1])
            ], dim=1)
            loss = sims.std(dim=1).mean()
            optim.zero_grad(); loss.backward(); optim.step()
            total += loss.item()
        print(f"\t[Ep {ep+1}] avg std-loss {total/len(dataloader):.4f}")
    return saner.eval()


# 6️⃣ Load PATA metadata & image dataset
# --------------------------------------------------
JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"  # ← CHANGE if needed
with open(JSON_PATH, "r") as f:
    pata_meta = json.load(f)
print("Total PATA entries:", len(pata_meta))


class PataImageDataset(Dataset):
    def __init__(self, meta, preprocess):
        self.meta = meta
        self.preprocess = preprocess

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        entry = self.meta[idx]
        img = Image.open(entry["image_path"]).convert("RGB")
        img = self.preprocess(img)
        label = f"{entry['race']}_{entry['gender']}"
        return img, label, idx  # idx for bookkeeping


def get_ground_truth(entry):
    return f"{entry['race']}_{entry['gender']}"


# 7️⃣ Agency / communion prompts (same as FairFace example)
# --------------------------------------------------
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())


# 8️⃣ Zero-shot evaluation on PATA
# --------------------------------------------------

def classify_pata(model, tokenizer, saner, preprocess, batch_size=32):
    # Prompt features once
    toks = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        p_feats = model.encode_text(toks)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)
    p_feats = saner(p_feats); p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)

    ds = PataImageDataset(pata_meta, preprocess)
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=2)

    gt, preds, indices = [], [], []
    for imgs, labels, idx in tqdm(dl, desc="Classifying PATA"):
        imgs = imgs.to(DEVICE)
        with torch.no_grad():
            i_feats = model.encode_image(imgs)
        i_feats = i_feats / i_feats.norm(dim=-1, keepdim=True)
        top = (100 * i_feats @ p_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        preds.extend([CLASSES[t] for t in top])
        gt.extend(labels)
        indices.extend(idx.tolist())
    return gt, preds, indices


# 9️⃣ Experiment loop
# --------------------------------------------------
CONFIGS = [
    {"mod": "ViT-L-14", "dat": "laion2b_s32b_b82k"},
    {"mod": "ViT-B-32", "dat": "laion2b_s34b_b79k"}
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # Train SANER on synthetic captions (size == len(pata_meta))
    cap_ds = CaptionDataset(size=len(pata_meta))
    cap_dl = DataLoader(cap_ds, batch_size=64, shuffle=True, num_workers=0, collate_fn=collate_text)

    saner = SANERLayer(model.text_projection.shape[1]).to(DEVICE)
    saner = train_saner_layer(model, tokenizer, saner, cap_dl)

    # Evaluate on PATA
    gts, preds, idxs = classify_pata(model, tokenizer, saner, preprocess)
    df = pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds})
    out_name = f"/content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_{mod.replace('/', '_')}_saner.xlsx"
    df.to_excel(out_name, index=False)
    print("Saved ->", out_name)

print("✅ PATA SANER experiments complete.")


Using device: cuda
Total PATA entries: 3948

=== ViT-L-14 | laion2b_s32b_b82k ===


SANER epoch 1: 100%|██████████| 62/62 [02:06<00:00,  2.05s/it]


	[Ep 1] avg std-loss 0.0024


SANER epoch 2: 100%|██████████| 62/62 [02:06<00:00,  2.04s/it]


	[Ep 2] avg std-loss 0.0002


Classifying PATA: 100%|██████████| 124/124 [03:00<00:00,  1.46s/it]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-L-14_saner.xlsx

=== ViT-B-32 | laion2b_s34b_b79k ===


SANER epoch 1: 100%|██████████| 62/62 [01:01<00:00,  1.01it/s]


	[Ep 1] avg std-loss 0.0032


SANER epoch 2: 100%|██████████| 62/62 [01:01<00:00,  1.01it/s]


	[Ep 2] avg std-loss 0.0002


Classifying PATA: 100%|██████████| 124/124 [00:33<00:00,  3.74it/s]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-B-32_saner.xlsx
✅ PATA SANER experiments complete.


##saner Oclip 400m

In [None]:
# -*- coding: utf-8 -*-
"""
CLIP + SANER Debiasing on FairFace (Colab‑ready, July 2025)
==========================================================
This notebook‑style script trains a **SANER** (Societal Attribute Neutralizer)
layer on top of multiple CLIP variants and evaluates zero‑shot bias on the
FairFace validation split.

Fixes vs. previous draft
------------------------
* **Removed PIL images** from the SANER training DataLoader so the default
  collate no longer crashes.
* Uses **open_clip.tokenizer.tokenize** everywhere (not HF tokenizer).
* Custom **collate_text** function handles lists of strings.
* Added optional pin to **scikit‑learn 1.6.x** to silence sklearn‑compat
  warnings.

Script outline
--------------
1. Install/upgrade deps (open_clip_torch, etc.; pin sklearn 1.6.*).
2. Utilities: neutralisation, variant generation, `SANERLayer`.
3. `train_saner_layer` – trains only on **text** captions (no images needed).
4. `classify_faces` – zero‑shot CLIP evaluation with SANER applied.
5. Loop over **ViT‑L/14** and **ViT‑B/32** checkpoints; write per‑model Excel.

Run each section in Colab or as a standalone `.py`.
"""

# 1️⃣ Install dependencies (Colab cell)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets openpyxl scikit-learn==1.6.1 tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, random, itertools, math, warnings
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import open_clip
import datasets
import pandas as pd

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Helpers – neutralization & SANER layer
# --------------------------------------------------
ATTRIBUTE_MAP = {
    "man": "person", "woman": "person", "male": "person", "female": "person",
    "young": "person", "old": "person", "boy": "person", "girl": "person",
    "black": "person", "white": "person", "asian": "person", "latino": "person",
    "middle eastern": "person", "indian": "person"
}
ATTR_VALUES = [
    "man", "woman", "young person", "old person", "black person",
    "white person", "asian person"
]

def neutralize_text(txt: str) -> str:
    txt = txt.lower()
    for k, v in ATTRIBUTE_MAP.items():
        txt = txt.replace(k, v)
    return txt

def generate_variants(txt: str) -> List[str]:
    base = neutralize_text(txt)
    return [base.replace("person", v) for v in ATTR_VALUES]

class SANERLayer(nn.Module):
    def __init__(self, dim: int):
        super().__init__()
        self.mlp = nn.Sequential(nn.Linear(dim, dim), nn.ReLU(), nn.Linear(dim, dim))
    def forward(self, x):
        return x + self.mlp(x)

# 4️⃣ Synthetic caption dataset (text only)
# --------------------------------------------------
class CaptionDataset(Dataset):
    """Returns *only* a neutral caption per sample (images unused)."""
    def __init__(self, size: int, neutral_prompt: str = "a photo of a person"):
        self.size = size; self.prompt = neutral_prompt
    def __len__(self):
        return self.size
    def __getitem__(self, idx):
        return {"text": self.prompt}

def collate_text(batch):
    return {"text": [item["text"] for item in batch]}

# 5️⃣ Train SANER
# --------------------------------------------------

def train_saner_layer(model, tokenizer, saner, dataloader, epochs: int = 2, lr: float = 1e-4):
    optim = torch.optim.Adam(saner.parameters(), lr=lr)
    model.eval(); saner.train()
    for ep in range(epochs):
        tot = 0
        for batch in tqdm(dataloader, desc=f"SANER epoch {ep+1}"):
            texts = batch["text"]  # list of str
            neut_txts = [neutralize_text(t) for t in texts]
            attr_txts = list(itertools.chain.from_iterable(generate_variants(t) for t in neut_txts))
            all_txts = neut_txts + attr_txts

            tokens = tokenizer.tokenize(all_txts).to(DEVICE)
            with torch.no_grad():
                feats = model.encode_text(tokens)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            neut, attr = feats[: len(neut_txts)], feats[len(neut_txts):].reshape(len(neut_txts), -1, feats.shape[1])
            neut_proj = saner(neut)
            sims = torch.stack([
                F.cosine_similarity(neut_proj, attr[:, i, :], dim=-1) for i in range(attr.shape[1])
            ], dim=1)
            loss = sims.std(dim=1).mean()
            optim.zero_grad(); loss.backward(); optim.step()
            tot += loss.item()
        print(f" [Ep {ep+1}] avg std‑loss {tot/len(dataloader):.4f}")
    return saner.eval()

# 6️⃣ FairFace evaluation setup
# --------------------------------------------------
FAIRFACE_SPLIT = "validation"
fairface = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split=FAIRFACE_SPLIT)
RACE = fairface.features["race"].int2str
GENDER = fairface.features["gender"].int2str
get_label = lambda ex: f"{RACE(ex['race'])}_{GENDER(ex['gender'])}"

'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())


def classify_faces(model, tokenizer, saner, preprocess, batch=32):
    """Zero‑shot classify FairFace images in batches and return label/pred lists."""
    # Encode prompts once
    tok = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        p_feats = model.encode_text(tok)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)
    p_feats = saner(p_feats)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)

    labels, predictions = [], []
    for start in tqdm(range(0, len(fairface), batch), desc="Classifying"):
        end = min(start + batch, len(fairface))
        idxs = list(range(start, end))
        # --- images ---
        imgs = torch.stack([preprocess(fairface[i]["image"]) for i in idxs]).to(DEVICE)
        with torch.no_grad():
            i_feats = model.encode_image(imgs)
        i_feats = i_feats / i_feats.norm(dim=-1, keepdim=True)
        top_idx = (100 * i_feats @ p_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        predictions.extend([CLASSES[t] for t in top_idx])
        # --- ground‑truth FairFace race+gender ---
        labels.extend([get_label(fairface[i]) for i in idxs])
    return labels, predictions

# 7️⃣ Experiment loop
# --------------------------------------------------
CONFIGS = [
    {"mod": "ViT-L-14", "dat": "laion400m_e31"},
    {"mod": "ViT-B-32", "dat": "laion400m_e31"},
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # Tiny synthetic captions = dataset size of FairFace split (for demo)
    cap_ds = CaptionDataset(size=len(fairface))
    cap_dl = DataLoader(cap_ds, batch_size=64, shuffle=True, num_workers=0, collate_fn=collate_text)

    saner = SANERLayer(model.text_projection.shape[1]).to(DEVICE)
    saner = train_saner_layer(model, tokenizer, saner, cap_dl)

    lbls, preds = classify_faces(model, tokenizer, saner, preprocess)
    df = pd.DataFrame({"Image_ID": list(range(len(lbls))), "FairFace_Label": lbls, "Prediction": preds})
    out = f"/content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_comm_200m_{mod.replace('/', '_')}_saner.xlsx"
    df.to_excel(out, index=False)
    print("Saved ->", out)

print("✅ All done.")


Using device: cuda

=== ViT-L-14 | laion400m_e31 ===


open_clip_model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

SANER epoch 1: 100%|██████████| 172/172 [05:41<00:00,  1.98s/it]


 [Ep 1] avg std‑loss 0.0018


SANER epoch 2: 100%|██████████| 172/172 [05:41<00:00,  1.98s/it]


 [Ep 2] avg std‑loss 0.0004


Classifying: 100%|██████████| 343/343 [08:20<00:00,  1.46s/it]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_comm_200m_ViT-L-14_saner.xlsx

=== ViT-B-32 | laion400m_e31 ===


open_clip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

SANER epoch 1: 100%|██████████| 172/172 [02:45<00:00,  1.04it/s]


 [Ep 1] avg std‑loss 0.0012


SANER epoch 2: 100%|██████████| 172/172 [02:45<00:00,  1.04it/s]


 [Ep 2] avg std‑loss 0.0002


Classifying: 100%|██████████| 343/343 [00:45<00:00,  7.49it/s]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_comm_200m_ViT-B-32_saner.xlsx
✅ All done.


In [None]:
# -*- coding: utf-8 -*-
"""
CLIP + SANER Debiasing on **PATA** Dataset (Colab-ready, July 2025)
==================================================================
This script extends the FairFace SANER pipeline to the **PATA fairness
benchmark**.  It trains a lightweight SANER layer on CLIP text features and
then evaluates zero-shot agency/communion predictions on PATA images.

Assumptions
-----------
* You have already downloaded PATA images and saved a JSON metadata file with
  entries like:

```json
{
  "scene": "park",          # optional
  "race": "black",
  "gender": "male",
  "age": "young",           # optional
  "image_path": "/content/drive/MyDrive/Pata_p/image_0001.jpg"
}
```

* Update **`JSON_PATH`** below to point to that metadata file.
* We loop over **ViT-L/14** and **ViT-B/32** checkpoints.  Change `CONFIGS`
  as needed.

Outline
-------
1. Install/upgrade deps (`open_clip_torch`, etc.).
2. Helpers: neutralisation, `SANERLayer`.
3. `train_saner_layer` (text-only captions → quick training).
4. Load PATA metadata + image dataset.
5. `classify_pata` – zero-shot evaluation with SANER applied.
6. Loop over model configs, save Excel (`pata_<model>_saner.xlsx`).
"""

# 1️⃣ Install dependencies (Colab cell)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets pillow openpyxl scikit-learn==1.6.1 tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, random, itertools, warnings, json
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip
import pandas as pd
from PIL import Image

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Helpers – neutralisation & SANER layer
# --------------------------------------------------
ATTRIBUTE_MAP = {
    "man": "person", "woman": "person", "male": "person", "female": "person",
    "young": "person", "old": "person", "boy": "person", "girl": "person",
    "black": "person", "white": "person", "asian": "person", "latino": "person",
    "middle eastern": "person", "indian": "person"
}
ATTR_VALUES = [
    "man", "woman", "young person", "old person", "black person",
    "white person", "asian person"
]

def neutralize_text(txt: str) -> str:
    txt = txt.lower()
    for k, v in ATTRIBUTE_MAP.items():
        txt = txt.replace(k, v)
    return txt


def generate_variants(txt: str) -> List[str]:
    base = neutralize_text(txt)
    return [base.replace("person", v) for v in ATTR_VALUES]


class SANERLayer(nn.Module):
    def __init__(self, dim: int):
        super().__init__()
        self.mlp = nn.Sequential(nn.Linear(dim, dim), nn.ReLU(), nn.Linear(dim, dim))

    def forward(self, x):
        return x + self.mlp(x)


# 4️⃣ Synthetic caption dataset (text only) for SANER training
# --------------------------------------------------
class CaptionDataset(Dataset):
    """Each sample is a single neutral caption (no images required)."""

    def __init__(self, size: int, neutral_prompt: str = "a photo of a person"):
        self.size = size
        self.prompt = neutral_prompt

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return {"text": self.prompt}


def collate_text(batch):
    return {"text": [item["text"] for item in batch]}


# 5️⃣ Train SANER
# --------------------------------------------------

def train_saner_layer(model, tokenizer, saner, dataloader, epochs: int = 2, lr: float = 1e-4):
    optim = torch.optim.Adam(saner.parameters(), lr=lr)
    model.eval(); saner.train()
    for ep in range(epochs):
        total = 0
        for batch in tqdm(dataloader, desc=f"SANER epoch {ep+1}"):
            texts = batch["text"]
            neut_txts = [neutralize_text(t) for t in texts]
            attr_txts = list(itertools.chain.from_iterable(generate_variants(t) for t in neut_txts))
            all_txts = neut_txts + attr_txts

            tokens = tokenizer.tokenize(all_txts).to(DEVICE)
            with torch.no_grad():
                feats = model.encode_text(tokens)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            neut, attr = feats[: len(neut_txts)], feats[len(neut_txts):].reshape(len(neut_txts), -1, feats.shape[1])
            neut_proj = saner(neut)
            sims = torch.stack([
                F.cosine_similarity(neut_proj, attr[:, i, :], dim=-1) for i in range(attr.shape[1])
            ], dim=1)
            loss = sims.std(dim=1).mean()
            optim.zero_grad(); loss.backward(); optim.step()
            total += loss.item()
        print(f"\t[Ep {ep+1}] avg std-loss {total/len(dataloader):.4f}")
    return saner.eval()


# 6️⃣ Load PATA metadata & image dataset
# --------------------------------------------------
JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"  # ← CHANGE if needed
with open(JSON_PATH, "r") as f:
    pata_meta = json.load(f)
print("Total PATA entries:", len(pata_meta))


class PataImageDataset(Dataset):
    def __init__(self, meta, preprocess):
        self.meta = meta
        self.preprocess = preprocess

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        entry = self.meta[idx]
        img = Image.open(entry["image_path"]).convert("RGB")
        img = self.preprocess(img)
        label = f"{entry['race']}_{entry['gender']}"
        return img, label, idx  # idx for bookkeeping


def get_ground_truth(entry):
    return f"{entry['race']}_{entry['gender']}"


# 7️⃣ Agency / communion prompts (same as FairFace example)
# --------------------------------------------------
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())


# 8️⃣ Zero-shot evaluation on PATA
# --------------------------------------------------

def classify_pata(model, tokenizer, saner, preprocess, batch_size=32):
    # Prompt features once
    toks = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        p_feats = model.encode_text(toks)
    p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)
    p_feats = saner(p_feats); p_feats = p_feats / p_feats.norm(dim=-1, keepdim=True)

    ds = PataImageDataset(pata_meta, preprocess)
    dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=2)

    gt, preds, indices = [], [], []
    for imgs, labels, idx in tqdm(dl, desc="Classifying PATA"):
        imgs = imgs.to(DEVICE)
        with torch.no_grad():
            i_feats = model.encode_image(imgs)
        i_feats = i_feats / i_feats.norm(dim=-1, keepdim=True)
        top = (100 * i_feats @ p_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        preds.extend([CLASSES[t] for t in top])
        gt.extend(labels)
        indices.extend(idx.tolist())
    return gt, preds, indices


# 9️⃣ Experiment loop
# --------------------------------------------------
CONFIGS = [
    {"mod": "ViT-L-14", "dat": "laion400m_e31"},
    {"mod": "ViT-B-32", "dat": "laion400m_e31"},
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # Train SANER on synthetic captions (size == len(pata_meta))
    cap_ds = CaptionDataset(size=len(pata_meta))
    cap_dl = DataLoader(cap_ds, batch_size=64, shuffle=True, num_workers=0, collate_fn=collate_text)

    saner = SANERLayer(model.text_projection.shape[1]).to(DEVICE)
    saner = train_saner_layer(model, tokenizer, saner, cap_dl)

    # Evaluate on PATA
    gts, preds, idxs = classify_pata(model, tokenizer, saner, preprocess)
    df = pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds})
    out_name = f"/content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400m_{mod.replace('/', '_')}_saner.xlsx"
    df.to_excel(out_name, index=False)
    print("Saved ->", out_name)

print("✅ PATA SANER experiments complete.")


Using device: cuda
Total PATA entries: 3948

=== ViT-L-14 | laion400m_e31 ===


SANER epoch 1: 100%|██████████| 62/62 [02:06<00:00,  2.05s/it]


	[Ep 1] avg std-loss 0.0044


SANER epoch 2: 100%|██████████| 62/62 [02:05<00:00,  2.03s/it]


	[Ep 2] avg std-loss 0.0005


Classifying PATA: 100%|██████████| 124/124 [03:01<00:00,  1.46s/it]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400m_ViT-L-14_saner.xlsx

=== ViT-B-32 | laion400m_e31 ===


SANER epoch 1: 100%|██████████| 62/62 [01:01<00:00,  1.01it/s]


	[Ep 1] avg std-loss 0.0032


SANER epoch 2: 100%|██████████| 62/62 [01:01<00:00,  1.01it/s]


	[Ep 2] avg std-loss 0.0002


Classifying PATA: 100%|██████████| 124/124 [00:32<00:00,  3.79it/s]


Saved -> /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400m_ViT-B-32_saner.xlsx
✅ PATA SANER experiments complete.


#Agen_debias_VL


##Biased prompts Oclip 2B

In [None]:
# -*- coding: utf-8 -*-
"""
Biased‑Prompts Debiasing Experiments (FairFace & PATA)
=====================================================
Colab‑ready script (July 2025) that tests the **Biased Prompts** projection
method on two datasets:

* **FairFace** validation split (via 🤗 datasets)
* **PATA** fairness benchmark (JSON metadata with image_path, race,
  gender fields) – update **JSON_PATH** below.

No model fine‑tuning or SANER training; we only compute a **projection matrix P**
from a small set of attribute‑opposite prompt pairs and apply it to all agency
prompts at inference.

Outputs per model (ViT‑L/14 & ViT‑B/32)
--------------------------------------
* fairface_<model>_biasedPrompts.xlsx
* pata_<model>_biasedPrompts.xlsx

Each Excel contains three columns: Index, GroundTruth, Prediction.
"""

# 1️⃣ Install dependencies (Colab)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets pillow openpyxl scipy tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, json, warnings, random, itertools
from typing import List

import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip
import pandas as pd
from PIL import Image
import numpy as np
import scipy.linalg as la
import datasets

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Biased‑Prompts projection matrix
# --------------------------------------------------
BIAS_PAIRS = [
    ("a photo of a man", "a photo of a woman"),
    ("a photo of a young person", "a photo of an old person"),
    ("a photo of a white person", "a photo of a black person"),
]

def compute_projection_matrix(model, tokenizer):
    """Return projection matrix P for debiasing text embeddings."""
    diffs = []
    with torch.no_grad():
        for a, b in BIAS_PAIRS:
            toks = tokenizer.tokenize([a, b]).to(DEVICE)
            feats = model.encode_text(toks)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            diffs.append((feats[0] - feats[1]).cpu().numpy())
    B = np.stack(diffs, axis=1)  # d × k
    P = np.eye(B.shape[0]) - B @ la.inv(B.T @ B) @ B.T
    return torch.tensor(P, dtype=torch.float32, device=DEVICE)


def debias(feats, P):
    return (feats @ P).float()

# 4️⃣ Agency / communion prompts
# --------------------------------------------------
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())

# 5️⃣ FairFace dataset loader
# --------------------------------------------------
ff_ds = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split="validation")
RACE_FF = ff_ds.features["race"].int2str
GENDER_FF = ff_ds.features["gender"].int2str
ff_label = lambda ex: f"{RACE_FF(ex['race'])}_{GENDER_FF(ex['gender'])}"

class FairFaceDS(Dataset):
    def __init__(self, hf_ds, preprocess):
        self.ds = hf_ds; self.pre = preprocess
    def __len__(self):
        return len(self.ds)
    def __getitem__(self, idx):
        img = self.pre(self.ds[idx]["image"])
        return img, ff_label(self.ds[idx]), idx

# 6️⃣ PATA dataset loader
# --------------------------------------------------
JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"  # ← edit if needed
with open(JSON_PATH, "r") as f:
    pata_meta = json.load(f)

class PataDS(Dataset):
    def __init__(self, meta, preprocess):
        self.meta = meta; self.pre = preprocess
    def __len__(self):
        return len(self.meta)
    def __getitem__(self, idx):
        e = self.meta[idx]
        img = self.pre(Image.open(e["image_path"]).convert("RGB"))
        label = f"{e['race']}_{e['gender']}"
        return img, label, idx

# 7️⃣ Evaluation helper
# --------------------------------------------------

def run_eval(model, preprocess, tokenizer, dataset, outfile):
    # Projection matrix & debiased prompt features
    P = compute_projection_matrix(model, tokenizer)
    toks = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        txt_feats = model.encode_text(toks)
    txt_feats = txt_feats / txt_feats.norm(dim=-1, keepdim=True)
    txt_feats = debias(txt_feats, P); txt_feats = txt_feats / txt_feats.norm(dim=-1, keepdim=True)

    dl = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)
    gts, preds, idxs = [], [], []
    for imgs, labels, didx in tqdm(dl, desc=f"Scoring → {outfile}"):
        imgs = imgs.to(DEVICE)
        with torch.no_grad():
            im_feats = model.encode_image(imgs)
        im_feats = im_feats / im_feats.norm(dim=-1, keepdim=True)
        top = (100 * im_feats @ txt_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        preds.extend([CLASSES[t] for t in top])
        gts.extend(labels)
        idxs.extend(didx.tolist())
    pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds}).to_excel(outfile, index=False)

# 8️⃣ Main experiment loop
# --------------------------------------------------
CONFIGS = [
    {"mod": "ViT-L-14", "dat": "laion2b_s32b_b82k"},
    {"mod": "ViT-B-32", "dat": "laion2b_s34b_b79k"},
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} (BiasedPrompts) ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # FAIRFACE
    ff_dataset = FairFaceDS(ff_ds, preprocess)
    ff_out = f"/content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_{mod.replace('/', '_')}_biasedPrompts.xlsx"
    run_eval(model, preprocess, tokenizer, ff_dataset, ff_out)
    print("  ↪ saved", ff_out)

    # PATA
    pata_dataset = PataDS(pata_meta, preprocess)
    pata_out = f"/content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_{mod.replace('/', '_')}_biasedPrompts.xlsx"
    run_eval(model, preprocess, tokenizer, pata_dataset, pata_out)
    print("  ↪ saved", pata_out)

print("✅ Biased‑Prompts experiments complete.")

Using device: cuda

=== ViT-L-14 | laion2b_s32b_b82k (BiasedPrompts) ===


Scoring → /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_ViT-L-14_biasedPrompts.xlsx: 100%|██████████| 343/343 [08:32<00:00,  1.50s/it]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_ViT-L-14_biasedPrompts.xlsx


Scoring → /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-L-14_biasedPrompts.xlsx: 100%|██████████| 124/124 [03:05<00:00,  1.49s/it]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-L-14_biasedPrompts.xlsx

=== ViT-B-32 | laion2b_s34b_b79k (BiasedPrompts) ===


Scoring → /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_ViT-B-32_biasedPrompts.xlsx: 100%|██████████| 343/343 [00:33<00:00, 10.21it/s]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_ViT-B-32_biasedPrompts.xlsx


Scoring → /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-B-32_biasedPrompts.xlsx: 100%|██████████| 124/124 [00:32<00:00,  3.80it/s]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_ViT-B-32_biasedPrompts.xlsx
✅ Biased‑Prompts experiments complete.


##Biased prompts Oclip 400m

In [None]:
# -*- coding: utf-8 -*-
"""
Biased‑Prompts Debiasing Experiments (FairFace & PATA)
=====================================================
Colab‑ready script (July 2025) that tests the **Biased Prompts** projection
method on two datasets:

* **FairFace** validation split (via 🤗 datasets)
* **PATA** fairness benchmark (JSON metadata with image_path, race,
  gender fields) – update **JSON_PATH** below.

No model fine‑tuning or SANER training; we only compute a **projection matrix P**
from a small set of attribute‑opposite prompt pairs and apply it to all agency
prompts at inference.

Outputs per model (ViT‑L/14 & ViT‑B/32)
--------------------------------------
* fairface_<model>_biasedPrompts.xlsx
* pata_<model>_biasedPrompts.xlsx

Each Excel contains three columns: Index, GroundTruth, Prediction.
"""

# 1️⃣ Install dependencies (Colab)
# --------------------------------------------------
!pip -q install open_clip_torch transformers datasets pillow openpyxl scipy tqdm --upgrade

# 2️⃣ Imports & global setup
# --------------------------------------------------
import os, json, warnings, random, itertools
from typing import List

import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip
import pandas as pd
from PIL import Image
import numpy as np
import scipy.linalg as la
import datasets

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

# 3️⃣ Biased‑Prompts projection matrix
# --------------------------------------------------
BIAS_PAIRS = [
    ("a photo of a man", "a photo of a woman"),
    ("a photo of a young person", "a photo of an old person"),
    ("a photo of a white person", "a photo of a black person"),
]

def compute_projection_matrix(model, tokenizer):
    """Return projection matrix P for debiasing text embeddings."""
    diffs = []
    with torch.no_grad():
        for a, b in BIAS_PAIRS:
            toks = tokenizer.tokenize([a, b]).to(DEVICE)
            feats = model.encode_text(toks)
            feats = feats / feats.norm(dim=-1, keepdim=True)
            diffs.append((feats[0] - feats[1]).cpu().numpy())
    B = np.stack(diffs, axis=1)  # d × k
    P = np.eye(B.shape[0]) - B @ la.inv(B.T @ B) @ B.T
    return torch.tensor(P, dtype=torch.float32, device=DEVICE)


def debias(feats, P):
    return (feats @ P).float()

# 4️⃣ Agency / communion prompts
# --------------------------------------------------
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())

# 5️⃣ FairFace dataset loader
# --------------------------------------------------
ff_ds = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split="validation")
RACE_FF = ff_ds.features["race"].int2str
GENDER_FF = ff_ds.features["gender"].int2str
ff_label = lambda ex: f"{RACE_FF(ex['race'])}_{GENDER_FF(ex['gender'])}"

class FairFaceDS(Dataset):
    def __init__(self, hf_ds, preprocess):
        self.ds = hf_ds; self.pre = preprocess
    def __len__(self):
        return len(self.ds)
    def __getitem__(self, idx):
        img = self.pre(self.ds[idx]["image"])
        return img, ff_label(self.ds[idx]), idx

# 6️⃣ PATA dataset loader
# --------------------------------------------------
JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"  # ← edit if needed
with open(JSON_PATH, "r") as f:
    pata_meta = json.load(f)

class PataDS(Dataset):
    def __init__(self, meta, preprocess):
        self.meta = meta; self.pre = preprocess
    def __len__(self):
        return len(self.meta)
    def __getitem__(self, idx):
        e = self.meta[idx]
        img = self.pre(Image.open(e["image_path"]).convert("RGB"))
        label = f"{e['race']}_{e['gender']}"
        return img, label, idx

# 7️⃣ Evaluation helper
# --------------------------------------------------

def run_eval(model, preprocess, tokenizer, dataset, outfile):
    # Projection matrix & debiased prompt features
    P = compute_projection_matrix(model, tokenizer)
    toks = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad():
        txt_feats = model.encode_text(toks)
    txt_feats = txt_feats / txt_feats.norm(dim=-1, keepdim=True)
    txt_feats = debias(txt_feats, P); txt_feats = txt_feats / txt_feats.norm(dim=-1, keepdim=True)

    dl = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)
    gts, preds, idxs = [], [], []
    for imgs, labels, didx in tqdm(dl, desc=f"Scoring → {outfile}"):
        imgs = imgs.to(DEVICE)
        with torch.no_grad():
            im_feats = model.encode_image(imgs)
        im_feats = im_feats / im_feats.norm(dim=-1, keepdim=True)
        top = (100 * im_feats @ txt_feats.T).softmax(dim=-1).argmax(dim=-1).cpu().tolist()
        preds.extend([CLASSES[t] for t in top])
        gts.extend(labels)
        idxs.extend(didx.tolist())
    pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds}).to_excel(outfile, index=False)

# 8️⃣ Main experiment loop
# --------------------------------------------------
CONFIGS = [
  {"mod": "ViT-L-14", "dat": "laion400m_e31"},
    {"mod": "ViT-B-32", "dat": "laion400m_e31"},
]

for cfg in CONFIGS:
    mod, dat = cfg["mod"], cfg["dat"]
    print(f"\n=== {mod} | {dat} (BiasedPrompts) ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE)
    tokenizer = open_clip.tokenizer

    # FAIRFACE
    ff_dataset = FairFaceDS(ff_ds, preprocess)
    ff_out = f"/content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_{mod.replace('/', '_')}_biasedPrompts.xlsx"
    run_eval(model, preprocess, tokenizer, ff_dataset, ff_out)
    print("  ↪ saved", ff_out)

    # PATA
    pata_dataset = PataDS(pata_meta, preprocess)
    pata_out = f"/content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400_{mod.replace('/', '_')}_biasedPrompts.xlsx"
    run_eval(model, preprocess, tokenizer, pata_dataset, pata_out)
    print("  ↪ saved", pata_out)

print("✅ Biased‑Prompts experiments complete.")

Using device: cuda

=== ViT-L-14 | laion400m_e31 (BiasedPrompts) ===


Scoring → /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_ViT-L-14_biasedPrompts.xlsx: 100%|██████████| 343/343 [08:35<00:00,  1.50s/it]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_ViT-L-14_biasedPrompts.xlsx


Scoring → /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400_ViT-L-14_biasedPrompts.xlsx: 100%|██████████| 124/124 [03:05<00:00,  1.50s/it]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400_ViT-L-14_biasedPrompts.xlsx

=== ViT-B-32 | laion400m_e31 (BiasedPrompts) ===


Scoring → /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_ViT-B-32_biasedPrompts.xlsx: 100%|██████████| 343/343 [00:33<00:00, 10.18it/s]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_ViT-B-32_biasedPrompts.xlsx


Scoring → /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400_ViT-B-32_biasedPrompts.xlsx: 100%|██████████| 124/124 [00:33<00:00,  3.75it/s]


  ↪ saved /content/drive/MyDrive/debias_clip_3/crime/pata_oclip_crime_400_ViT-B-32_biasedPrompts.xlsx
✅ Biased‑Prompts experiments complete.


#prompt array

##prompt array Oclip 2B

In [None]:
LABELS = {

    # Positive Agency
    'powerful': 'a photo of a powerful person',
    'high-status': 'a photo of a high status person',
    'dominating': 'a photo of a dominating person',
    'wealthy': 'a photo of a wealthy person',
    'confident': 'a photo of a confident person',
    'competitive': 'a photo of a competitive person',

    # Negative Agency
    'powerless': 'a photo of a powerless person',
    'low-status': 'a photo of a low status person',
    'dominated': 'a photo of a dominated person',
    'poor': 'a photo of a poor person',
    'meek': 'a photo of a meek person',
    'passive': 'a photo of a passive person',

}

In [None]:
# -*- coding: utf-8 -*-
"""
Prompt‑Array Debiasing Experiments (FairFace & PATA)
===================================================
Colab‑ready notebook implementing **"A Prompt Array Keeps the Bias Away"**
(ACL 2024) on two datasets:

* **FairFace** validation split (🤗 datasets)
* **PATA** fairness benchmark (JSON metadata with `image_path`, `race`, `gender`)

The paper learns a small **adversarial prompt array** (set of *learnable* text
embeddings) that, when *prepended* to any natural language prompt, counters
protected‑attribute bias. We adopt their simple loss: make similarity between
image features and prompt‑array‑augmented *neutral* prompt greater than the
similarity to *biased* attribute prompts, while keeping utility.

We train the prompt array for a few hundred steps on FairFace images (per CLIP
backbone), then evaluate on FairFace **and** PATA.  No CLIP weights are
updated—only the prompt array (`k = 16` learnable embeddings).

Outputs per model (ViT‑L/14 & ViT‑B/32)
--------------------------------------
* `fairface_<model>_prompt_array.xlsx`
* `pata_<model>_prompt_array.xlsx`

Each Excel lists `Index`, `GroundTruth`, `Prediction`.
"""

# 1️⃣ Install deps (Colab)
!pip -q install open_clip_torch transformers datasets pillow openpyxl tqdm --upgrade

# 2️⃣ Imports & global setup
import os, json, random, warnings, math
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip, datasets, pandas as pd
from PIL import Image

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
random.seed(42); torch.manual_seed(42)

# 3️⃣ Prompt‑Array learner
# --------------------------------------------------
class PromptArray(nn.Module):
    def __init__(self, embed_dim: int, k: int = 16):
        super().__init__()
        self.embeds = nn.Parameter(torch.randn(k, embed_dim))
    def forward(self, text_feat):
        # prepend learned tokens average to text feature (simple scheme)
        pa = self.embeds.mean(0, keepdim=True)  # 1 × D
        return F.normalize(text_feat + pa, dim=-1)

# 4️⃣ Training hyper‑params
K_P = 16           # prompt array length
LR = 1e-3
STEPS = 400        # quick demo (≈ 2‑3 min on T4)
BATCH = 64

# 5️⃣ Debias objective helpers
BIAS_PAIRS = [
    ("a photo of a man", "a photo of a woman"),
    ("a photo of a young person", "a photo of an old person"),
    ("a photo of a white person", "a photo of a black person"),
]

def cosine(x, y):
    return F.cosine_similarity(x, y, dim=-1)

# 6️⃣ Agency / communion prompts
'''
LABELS = {

    # Positive Agency
    'powerful': 'a photo of a powerful person',
    'high-status': 'a photo of a high status person',
    'dominating': 'a photo of a dominating person',
    'wealthy': 'a photo of a wealthy person',
    'confident': 'a photo of a confident person',
    'competitive': 'a photo of a competitive person',

    # Negative Agency
    'powerless': 'a photo of a powerless person',
    'low-status': 'a photo of a low status person',
    'dominated': 'a photo of a dominated person',
    'poor': 'a photo of a poor person',
    'meek': 'a photo of a meek person',
    'passive': 'a photo of a passive person',

}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())

# 7️⃣ Datasets – FairFace & PATA loaders
ff_ds = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split="validation")
R, G = ff_ds.features["race"].int2str, ff_ds.features["gender"].int2str
ff_label = lambda ex: f"{R(ex['race'])}_{G(ex['gender'])}"

JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"
with open(JSON_PATH) as f: pata_meta = json.load(f)

class FairFaceDS(Dataset):
    def __init__(self, preprocess, n=8000):
        self.pre = preprocess
        self.sub = ff_ds.shuffle(seed=42).select(range(n))
    def __len__(self):
        return len(self.sub)
    def __getitem__(self, i):
        ex = self.sub[i]
        # return image tensor, label string, and index so evaluator can track
        return self.pre(ex["image"]), ff_label(ex), i

class PataDS(Dataset):
    def __init__(self, preprocess): self.pre=preprocess; self.meta=pata_meta
    def __len__(self): return len(self.meta)
    def __getitem__(self, i): e=self.meta[i]; img=Image.open(e["image_path"]).convert("RGB"); return self.pre(img), f"{e['race']}_{e['gender']}", i

# 8️⃣ Train + evaluate function per CLIP variant
# --------------------------------------------------

def run_prompt_array(mod: str, dat: str):
    print(f"\n=== Prompt‑Array {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE); tokenizer=open_clip.tokenizer
    embed_dim = model.text_projection.shape[1]

    # Init learner
    pa = PromptArray(embed_dim, K_P).to(DEVICE)
    opt = torch.optim.Adam(pa.parameters(), lr=LR)

    # Prepare bias pair token embeddings once
    toks_bias = [pair for bp in BIAS_PAIRS for pair in bp]  # flatten
    toks = tokenizer.tokenize(toks_bias).to(DEVICE)
    with torch.no_grad():
        bias_feats = model.encode_text(toks)
        bias_feats = bias_feats / bias_feats.norm(dim=-1, keepdim=True)
    bias_feats = bias_feats.view(len(BIAS_PAIRS), 2, -1)  # [pairs,2,D]

    # FairFace dataloader for training (images only)
    train_dl = DataLoader(FairFaceDS(preprocess), batch_size=BATCH, shuffle=True, num_workers=2)

    # —— TRAIN ——
    for step, (imgs, _, _) in enumerate(train_dl):
        if step * BATCH >= STEPS: break
        imgs = imgs.to(DEVICE)
        opt.zero_grad()
        with torch.no_grad():
            im_f = model.encode_image(imgs)
            im_f = im_f / im_f.norm(dim=-1, keepdim=True)
        # Prompts: neutral "a photo of a person"
        neutral_tok = tokenizer.tokenize(["a photo of a person"]).to(DEVICE)
        with torch.no_grad():
            neut_f = model.encode_text(neutral_tok)
            neut_f = neut_f / neut_f.norm(dim=-1, keepdim=True)
        neut_f = pa(neut_f)  # apply prompt array
        # Similarity loss (want image closer to neutral than biased directions)
        loss = 0.
        for j in range(len(BIAS_PAIRS)):
            pos, neg = bias_feats[j]
            pos_d = pa(pos.unsqueeze(0))  # same PA prepend
            neg_d = pa(neg.unsqueeze(0))
            diff = cosine(im_f, neg_d) - cosine(im_f, pos_d)  # higher means biased
            loss += F.relu(diff + 0.05).mean()  # margin
        loss /= len(BIAS_PAIRS)
        loss.backward(); opt.step()
        if step % 20 == 0:
            print(f"step {step:3d}  loss={loss.item():.4f}")

    # —— PREP debiased prompt features ——
    prom_tok = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad(): pfeat = model.encode_text(prom_tok)
    pfeat = pfeat / pfeat.norm(dim=-1, keepdim=True)
    pfeat = pa(pfeat); pfeat = pfeat / pfeat.norm(dim=-1, keepdim=True)

    # —— Evaluation helper ——
    def evaluate(dl, out_path):
        gts, preds, idxs = [], [], []
        for imgs, labs, idx in tqdm(dl, desc=out_path):
            imgs = imgs.to(DEVICE)
            with torch.no_grad():
                vf = model.encode_image(imgs)
            vf = vf / vf.norm(dim=-1, keepdim=True)
            top = (100 * vf @ pfeat.T).argmax(dim=-1).cpu()
            preds.extend([CLASSES[i] for i in top]); gts.extend(labs); idxs.extend(idx)
        pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds}).to_excel(out_path, index=False)

        # FairFace evaluation
    #ff_eval_dl = DataLoader(FairFaceDS(preprocess, n=7000), batch_size=64, num_workers=2)
    #evaluate(ff_eval_dl, f"/content/drive/MyDrive/debias_clip_3/fairface_oclip_crime_{mod.replace('/', '_')}_prompt_array.xlsx")

    # PATA evaluation
    pata_eval_dl = DataLoader(PataDS(preprocess), batch_size=64, num_workers=2)
    evaluate(pata_eval_dl, f"/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_{mod.replace('/', '_')}_prompt_array.xlsx")
    print("✅  Saved outputs for", mod)

# 9️⃣ Run for each model
for mod, dat in [("ViT-L-14","laion2b_s32b_b82k"), ("ViT-B-32","laion2b_s34b_b79k")]:
    run_prompt_array(mod, dat)

print("🎉  Prompt‑Array experiments complete")


Using device: cuda

=== Prompt‑Array ViT-L-14 | laion2b_s32b_b82k ===
step   0  loss=0.0457


/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_ViT-L-14_prompt_array.xlsx: 100%|██████████| 62/62 [29:35<00:00, 28.64s/it]


✅  Saved outputs for ViT-L-14

=== Prompt‑Array ViT-B-32 | laion2b_s34b_b79k ===
step   0  loss=0.0448


/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_ViT-B-32_prompt_array.xlsx: 100%|██████████| 62/62 [00:33<00:00,  1.84it/s]


✅  Saved outputs for ViT-B-32
🎉  Prompt‑Array experiments complete


##prompt array Oclip 400m

In [None]:
# -*- coding: utf-8 -*-
"""
Prompt‑Array Debiasing Experiments (FairFace & PATA)
===================================================
Colab‑ready notebook implementing **"A Prompt Array Keeps the Bias Away"**
(ACL 2024) on two datasets:

* **FairFace** validation split (🤗 datasets)
* **PATA** fairness benchmark (JSON metadata with `image_path`, `race`, `gender`)

The paper learns a small **adversarial prompt array** (set of *learnable* text
embeddings) that, when *prepended* to any natural language prompt, counters
protected‑attribute bias. We adopt their simple loss: make similarity between
image features and prompt‑array‑augmented *neutral* prompt greater than the
similarity to *biased* attribute prompts, while keeping utility.

We train the prompt array for a few hundred steps on FairFace images (per CLIP
backbone), then evaluate on FairFace **and** PATA.  No CLIP weights are
updated—only the prompt array (`k = 16` learnable embeddings).

Outputs per model (ViT‑L/14 & ViT‑B/32)
--------------------------------------
* `fairface_<model>_prompt_array.xlsx`
* `pata_<model>_prompt_array.xlsx`

Each Excel lists `Index`, `GroundTruth`, `Prediction`.
"""

# 1️⃣ Install deps (Colab)
!pip -q install open_clip_torch transformers datasets pillow openpyxl tqdm --upgrade

# 2️⃣ Imports & global setup
import os, json, random, warnings, math
from typing import List

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import open_clip, datasets, pandas as pd
from PIL import Image

warnings.filterwarnings("ignore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
random.seed(42); torch.manual_seed(42)

# 3️⃣ Prompt‑Array learner
# --------------------------------------------------
class PromptArray(nn.Module):
    def __init__(self, embed_dim: int, k: int = 16):
        super().__init__()
        self.embeds = nn.Parameter(torch.randn(k, embed_dim))
    def forward(self, text_feat):
        # prepend learned tokens average to text feature (simple scheme)
        pa = self.embeds.mean(0, keepdim=True)  # 1 × D
        return F.normalize(text_feat + pa, dim=-1)

# 4️⃣ Training hyper‑params
K_P = 16           # prompt array length
LR = 1e-3
STEPS = 400        # quick demo (≈ 2‑3 min on T4)
BATCH = 64

# 5️⃣ Debias objective helpers
BIAS_PAIRS = [
    ("a photo of a man", "a photo of a woman"),
    ("a photo of a young person", "a photo of an old person"),
    ("a photo of a white person", "a photo of a black person"),
]

def cosine(x, y):
    return F.cosine_similarity(x, y, dim=-1)

# 6️⃣ Agency / communion prompts
'''
LABELS = {
    'trustworthy': 'a photo of a trustworthy person',
    'sincere': 'a photo of a sincere person',
    'friendly': 'a photo of a friendly person',
    'benevolent': 'a photo of a benevolent person',
    'likable': 'a photo of a likable person',
    'altruistic': 'a photo of an altruistic person',

    'untrustworthy': 'a photo of an untrustworthy person',
    'dishonest': 'a photo of a dishonest person',
    'unfriendly': 'a photo of an unfriendly person',
    'threatening': 'a photo of a threatening person',
    'unpleasant': 'a photo of an unpleasant person',
    'egoistic': 'a photo of an egoistic person',
}
'''
CLASSES, PROMPTS = list(LABELS.keys()), list(LABELS.values())

# 7️⃣ Datasets – FairFace & PATA loaders
ff_ds = datasets.load_dataset("HuggingFaceM4/FairFace", "0.25", split="validation")
R, G = ff_ds.features["race"].int2str, ff_ds.features["gender"].int2str
ff_label = lambda ex: f"{R(ex['race'])}_{G(ex['gender'])}"

JSON_PATH = "/content/drive/MyDrive/Pata_p/processed_dataset_with_images.json"
with open(JSON_PATH) as f: pata_meta = json.load(f)

class FairFaceDS(Dataset):
    def __init__(self, preprocess, n=8000):
        self.pre = preprocess
        self.sub = ff_ds.shuffle(seed=42).select(range(n))
    def __len__(self):
        return len(self.sub)
    def __getitem__(self, i):
        ex = self.sub[i]
        # return image tensor, label string, and index so evaluator can track
        return self.pre(ex["image"]), ff_label(ex), i

class PataDS(Dataset):
    def __init__(self, preprocess): self.pre=preprocess; self.meta=pata_meta
    def __len__(self): return len(self.meta)
    def __getitem__(self, i): e=self.meta[i]; img=Image.open(e["image_path"]).convert("RGB"); return self.pre(img), f"{e['race']}_{e['gender']}", i

# 8️⃣ Train + evaluate function per CLIP variant
# --------------------------------------------------

def run_prompt_array(mod: str, dat: str):
    print(f"\n=== Prompt‑Array {mod} | {dat} ===")
    model, _, preprocess = open_clip.create_model_and_transforms(mod, pretrained=dat)
    model.to(DEVICE); tokenizer=open_clip.tokenizer
    embed_dim = model.text_projection.shape[1]

    # Init learner
    pa = PromptArray(embed_dim, K_P).to(DEVICE)
    opt = torch.optim.Adam(pa.parameters(), lr=LR)

    # Prepare bias pair token embeddings once
    toks_bias = [pair for bp in BIAS_PAIRS for pair in bp]  # flatten
    toks = tokenizer.tokenize(toks_bias).to(DEVICE)
    with torch.no_grad():
        bias_feats = model.encode_text(toks)
        bias_feats = bias_feats / bias_feats.norm(dim=-1, keepdim=True)
    bias_feats = bias_feats.view(len(BIAS_PAIRS), 2, -1)  # [pairs,2,D]

    # FairFace dataloader for training (images only)
    train_dl = DataLoader(FairFaceDS(preprocess), batch_size=BATCH, shuffle=True, num_workers=2)

    # —— TRAIN ——
    for step, (imgs, _, _) in enumerate(train_dl):
        if step * BATCH >= STEPS: break
        imgs = imgs.to(DEVICE)
        opt.zero_grad()
        with torch.no_grad():
            im_f = model.encode_image(imgs)
            im_f = im_f / im_f.norm(dim=-1, keepdim=True)
        # Prompts: neutral "a photo of a person"
        neutral_tok = tokenizer.tokenize(["a photo of a person"]).to(DEVICE)
        with torch.no_grad():
            neut_f = model.encode_text(neutral_tok)
            neut_f = neut_f / neut_f.norm(dim=-1, keepdim=True)
        neut_f = pa(neut_f)  # apply prompt array
        # Similarity loss (want image closer to neutral than biased directions)
        loss = 0.
        for j in range(len(BIAS_PAIRS)):
            pos, neg = bias_feats[j]
            pos_d = pa(pos.unsqueeze(0))  # same PA prepend
            neg_d = pa(neg.unsqueeze(0))
            diff = cosine(im_f, neg_d) - cosine(im_f, pos_d)  # higher means biased
            loss += F.relu(diff + 0.05).mean()  # margin
        loss /= len(BIAS_PAIRS)
        loss.backward(); opt.step()
        if step % 20 == 0:
            print(f"step {step:3d}  loss={loss.item():.4f}")

    # —— PREP debiased prompt features ——
    prom_tok = tokenizer.tokenize(PROMPTS).to(DEVICE)
    with torch.no_grad(): pfeat = model.encode_text(prom_tok)
    pfeat = pfeat / pfeat.norm(dim=-1, keepdim=True)
    pfeat = pa(pfeat); pfeat = pfeat / pfeat.norm(dim=-1, keepdim=True)

    # —— Evaluation helper ——
    def evaluate(dl, out_path):
        gts, preds, idxs = [], [], []
        for imgs, labs, idx in tqdm(dl, desc=out_path):
            imgs = imgs.to(DEVICE)
            with torch.no_grad():
                vf = model.encode_image(imgs)
            vf = vf / vf.norm(dim=-1, keepdim=True)
            top = (100 * vf @ pfeat.T).argmax(dim=-1).cpu()
            preds.extend([CLASSES[i] for i in top]); gts.extend(labs); idxs.extend(idx)
        pd.DataFrame({"Index": idxs, "GroundTruth": gts, "Prediction": preds}).to_excel(out_path, index=False)

        # FairFace evaluation
    #ff_eval_dl = DataLoader(FairFaceDS(preprocess, n=7000), batch_size=64, num_workers=2)
    #evaluate(ff_eval_dl, f"/content/drive/MyDrive/debias_clip_3/crime/fairface_oclip_crime_400_{mod.replace('/', '_')}_prompt_array.xlsx")

    # PATA evaluation
    pata_eval_dl = DataLoader(PataDS(preprocess), batch_size=64, num_workers=2)
    evaluate(pata_eval_dl, f"/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_400_{mod.replace('/', '_')}_prompt_array.xlsx")
    print("✅  Saved outputs for", mod)

# 9️⃣ Run for each model
for mod, dat in [("ViT-L-14","laion400m_e31"), ("ViT-B-32","laion400m_e31")]:
    run_prompt_array(mod, dat)

print("🎉  Prompt‑Array experiments complete")


Using device: cuda

=== Prompt‑Array ViT-L-14 | laion400m_e31 ===
step   0  loss=0.0442


/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_400_ViT-L-14_prompt_array.xlsx: 100%|██████████| 62/62 [03:08<00:00,  3.04s/it]


✅  Saved outputs for ViT-L-14

=== Prompt‑Array ViT-B-32 | laion400m_e31 ===
step   0  loss=0.0457


/content/drive/MyDrive/debias_clip_3/pata_oclip_agen_400_ViT-B-32_prompt_array.xlsx: 100%|██████████| 62/62 [00:33<00:00,  1.87it/s]


✅  Saved outputs for ViT-B-32
🎉  Prompt‑Array experiments complete


In [None]:
'''
from google.colab import drive
import os

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Define the path to your folder containing the Excel files
folder_path = '/content/drive/MyDrive/debias_clip_3/comm/clip_comm'  # replace with your actual path

# 3. Loop through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx') or filename.endswith('.xls'):
        old_path = os.path.join(folder_path, filename)

        # Skip if already renamed
        if not filename.startswith('comm_clip_'):
            new_filename = 'comm_clip_' + filename
            new_path = os.path.join(folder_path, new_filename)

            # Rename the file
            os.rename(old_path, new_path)
            print(f"Renamed: {filename} --> {new_filename}")
            '''


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Renamed: pata_comm_ViT-B_32_prompt_array.xlsx --> comm_clip_pata_comm_ViT-B_32_prompt_array.xlsx
Renamed: fairface_ViT-B_32_saner.xlsx --> comm_clip_fairface_ViT-B_32_saner.xlsx
Renamed: pata_ViT-L_14_biasedPrompts.xlsx --> comm_clip_pata_ViT-L_14_biasedPrompts.xlsx
Renamed: fairface_ViT-L_14_saner.xlsx --> comm_clip_fairface_ViT-L_14_saner.xlsx
Renamed: fairface_ViT-L_14_biasedPrompts.xlsx --> comm_clip_fairface_ViT-L_14_biasedPrompts.xlsx
Renamed: pata_ViT-L_14_saner.xlsx --> comm_clip_pata_ViT-L_14_saner.xlsx
Renamed: pata_comm_ViT-L_14_prompt_array.xlsx --> comm_clip_pata_comm_ViT-L_14_prompt_array.xlsx
Renamed: pata_ViT-B_32_biasedPrompts.xlsx --> comm_clip_pata_ViT-B_32_biasedPrompts.xlsx
Renamed: fairface_ViT-B_32_biasedPrompts.xlsx --> comm_clip_fairface_ViT-B_32_biasedPrompts.xlsx
Renamed: fairface_comm_ViT-L_14_prompt_array.xlsx --> comm_clip_fairfa