## Importing necessary libraries

In [9]:
%pip install ftfy regex tqdm git+https://github.com/openai/CLIP.git torchvision

import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import numpy as np
from glob import glob
from sklearn.metrics.pairwise import cosine_similarity
import itertools
import urllib.request

Python(17601) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/_9/51_3yw1x3db244x7mjkk66h80000gn/T/pip-req-build-rk1_g66x
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /private/var/folders/_9/51_3yw1x3db244x7mjkk66h80000gn/T/pip-req-build-rk1_g66x
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Installing build dependencies ... [?2done
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Users/vishwasparekh/Desktop/University of Southern California/CSCI-544/Assignments/HW2/myenv/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the

## Loading the CLIP Model

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

## Downloading Sentiment word lists

In [11]:
os.makedirs("wordlists", exist_ok=True)
urllib.request.urlretrieve(
    "http://ptrckprry.com/course/ssd/data/positive-words.txt",
    "wordlists/positive-words.txt"
)
urllib.request.urlretrieve(
    "http://ptrckprry.com/course/ssd/data/negative-words.txt",
    "wordlists/negative-words.txt"
)

('wordlists/negative-words.txt', <http.client.HTTPMessage at 0x17a83be20>)

## Loading image embeddings for all subgroups from the MMBias Dataset

In [12]:
def load_images(folder, limit=None):
    paths = sorted(glob(os.path.join(folder, "*.jpg")))[:limit]
    tensors = []
    for p in paths:
        try:
            image = preprocess(Image.open(p).convert("RGB")).unsqueeze(0).to(device)
            tensors.append(image)
        except:
            continue
    if tensors:
        return torch.cat(tensors)
    return None

def embed_images(folder, limit=None):
    imgs = load_images(folder, limit)
    if imgs is None: return None
    with torch.no_grad():
        emb = model.encode_image(imgs).float()
    return emb

def collect_all_embeddings(root_path, limit=None):
    target_groups = [
        "Religion", 
        "Nationality", 
        "Disability", 
        "Sexual Orientation", 
        "Valence Images"
    ]
    
    embeddings = {}
    for category in target_groups:
        cat_path = os.path.join(root_path, category)
        if not os.path.isdir(cat_path):
            continue
        embeddings[category] = {}
        for subgroup in os.listdir(cat_path):
            sub_path = os.path.join(cat_path, subgroup)
            if not os.path.isdir(sub_path):
                continue
            print(f"Embedding {category}/{subgroup}")
            emb = embed_images(sub_path, limit)
            if emb is not None:
                embeddings[category][subgroup] = emb
    return embeddings

# Limit determines how many images we are using per dataset, this can be removed when we test the entire model, keep the value low for testing
embeddings = collect_all_embeddings("MMBias/data/Images", limit=50)


Embedding Religion/Buddhist
Embedding Religion/Hindu
Embedding Religion/Jewish
Embedding Religion/Christian
Embedding Religion/Muslim
Embedding Nationality/Chinese.jpg
Embedding Nationality/American.jpg
Embedding Nationality/Mexican.jpg
Embedding Nationality/Arab.jpg
Embedding Disability/Non-Disabled
Embedding Disability/Mental Disability
Embedding Disability/Physical Disability
Embedding Sexual Orientation/LGBT.jpg
Embedding Sexual Orientation/Heterosexual.jpg
Embedding Valence Images/Unpleasant
Embedding Valence Images/Pleasant


## Load attribute words and text embeddings

In [14]:
def load_words(filepath, prefix="This is"):
    with open(filepath, encoding='latin1') as f:
        lines = [w.strip() for w in f if w.strip() and not w.startswith(";")]
    clean = [line for line in lines if line.isascii() and line.isalpha()]
    return [f"{prefix} {w}." for w in clean]

def batch_tokenize_and_embed(text_list, batch_size=64):
    all_embeddings = []
    for i in range(0, len(text_list), batch_size):
        batch = text_list[i:i+batch_size]
        tokens = clip.tokenize(batch).to(device)
        with torch.no_grad():
            emb = model.encode_text(tokens).float()
        all_embeddings.append(emb.cpu())
    return torch.cat(all_embeddings)

pos_words = load_words("wordlists/positive-words.txt")
neg_words = load_words("wordlists/negative-words.txt")
all_words = pos_words + neg_words

print(f"Embedding {len(pos_words)} positive and {len(neg_words)} negative words...")
pos_emb = batch_tokenize_and_embed(pos_words)
neg_emb = batch_tokenize_and_embed(neg_words)
all_word_emb = torch.cat([pos_emb, neg_emb])
print("✅ Text embedding complete.")


Embedding 1904 positive and 4658 negative words...
✅ Text embedding complete.


## Top 15 attribute associations for each subgroup

In [15]:
def top_attributes(emb, all_words, all_word_emb, top_k=15):
    avg_emb = emb.mean(dim=0, keepdim=True)
    sims = cosine_similarity(avg_emb.cpu().numpy(), all_word_emb.cpu().numpy())[0]
    indices = np.argsort(sims)[::-1][:top_k]
    return [(all_words[i], sims[i]) for i in indices]


## Bias score calculation using the same logic as they did in the MMBias paper

In [16]:
def caliskan_score(X, Y, A, B):
    def s(w): return cosine_similarity(w.cpu(), A.cpu()).mean() - cosine_similarity(w.cpu(), B.cpu()).mean()
    s_X = torch.tensor([s(x.unsqueeze(0)) for x in X])
    s_Y = torch.tensor([s(y.unsqueeze(0)) for y in Y])
    return ((s_X.mean() - s_Y.mean()) / torch.std(torch.cat([s_X, s_Y]))).item()

## Getting pairwise bias in groups

In [17]:
def pairwise_bias(embeddings, A, B):
    groups = list(embeddings.keys())
    scores = {}
    for g1, g2 in itertools.combinations(groups, 2):
        score = caliskan_score(embeddings[g1], embeddings[g2], A, B)
        scores[(g1, g2)] = score
    return scores


## Load target textual phrases

In [18]:
def load_textual_targets(filepath):
    with open(filepath, 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    with torch.no_grad():
        tokens = clip.tokenize(lines).to(device)
        embeddings = model.encode_text(tokens).float()
    return dict(zip(lines, embeddings))


## Running the project

In [19]:
for subgroup, emb in embeddings["Religion"].items():
    print(f"\nTop attributes for {subgroup}")
    for word, score in top_attributes(emb, all_words, all_word_emb):
        print(f"{word}: {score:.3f}")

# Example: calculate bias scores within religion
religion_bias = pairwise_bias(embeddings["Religion"], pos_emb, neg_emb)
print("\nBias scores (Religion):")
for (g1, g2), score in religion_bias.items():
    print(f"{g1} vs {g2}: {score:.2f}")


Top attributes for Buddhist
This is reverence.: 0.276
This is enlightenment.: 0.275
This is upliftment.: 0.273
This is renunciation.: 0.267
This is vibration.: 0.265
This is calmness.: 0.265
This is harmonious.: 0.264
This is saintliness.: 0.264
This is beneficent.: 0.263
This is piety.: 0.262
This is divinely.: 0.259
This is gratefully.: 0.258
This is venerate.: 0.258
This is virtuous.: 0.256
This is benevolence.: 0.256

Top attributes for Hindu
This is upliftment.: 0.290
This is reverence.: 0.285
This is piety.: 0.282
This is felicitate.: 0.281
This is inculcate.: 0.279
This is renunciation.: 0.279
This is praising.: 0.279
This is indigent.: 0.277
This is harmonious.: 0.277
This is sunder.: 0.277
This is intolerance.: 0.274
This is beneficent.: 0.271
This is beneficiary.: 0.271
This is handsomely.: 0.271
This is richness.: 0.271

Top attributes for Jewish
This is extremists.: 0.289
This is dissidents.: 0.286
This is martyrdom.: 0.286
This is mourner.: 0.283
This is fundamentalism.: 