In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.preprocessing import normalize
from sklearn.neighbors import kneighbors_graph
from sklearn.model_selection import train_test_split
import open_clip
from transformers import BertTokenizer, BertModel
from torch_geometric.nn import GATConv

device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MisogynyDataset(Dataset):
    def __init__(self, data, label_map, transform=None):
        self.data = data.reset_index(drop=True)
        self.label_map = label_map
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = Image.open(row["image_path"]).convert("RGB")
        image = self.transform(image)
        label = self.label_map[row["image_label"]]
        caption = row["image_caption"]
        return image, caption, label

class MisogynyDataLoader:
    def __init__(self, csv_file="data_csv.csv", batch_size=16, test_size=0.2, random_state=42,
                 train_transform=None, test_transform=None, num_workers=0, pin_memory=False):
        data = pd.read_csv(csv_file)
        label_map = {"kitchen":0, "shopping":1, "working":2, "leadership":3}

        train_df, test_df = train_test_split(
            data,
            test_size=test_size,
            random_state=random_state,
            shuffle=True,
            stratify=data["image_label"]
        )

        self.train_dataset = MisogynyDataset(train_df, label_map, transform=train_transform)
        self.test_dataset = MisogynyDataset(test_df, label_map, transform=test_transform)

        self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=False,
                                       num_workers=num_workers, pin_memory=pin_memory)
        self.test_loader = DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False,
                                      num_workers=num_workers, pin_memory=pin_memory)


In [3]:
class BERTEmbedder(nn.Module):
    def __init__(self):
        super().__init__()
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        self.model_bert = BertModel.from_pretrained("bert-base-uncased")
        self.model_bert.eval()

    def forward(self, input_text):
        inputs = self.tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            outputs = self.model_bert(**inputs)
        token_embeddings = outputs.last_hidden_state
        attention_mask = inputs["attention_mask"]
        mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sentence_embeddings = (token_embeddings * mask).sum(dim=1) / mask.sum(dim=1)
        embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return embeddings

class OpenClipVitEmbedder(nn.Module):
    def __init__(self, device=None):
        super().__init__()
        self.model, _, self.preprocess = open_clip.create_model_and_transforms(
            model_name="ViT-B-32", pretrained="openai"
        )
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(self.device)
        self.model.eval()
        for p in self.model.parameters():
            p.requires_grad = False

    def forward(self, image_tensor):
        image_tensor = image_tensor.to(self.device)
        with torch.no_grad():
            image_features = self.model.encode_image(image_tensor)
        image_features = F.normalize(image_features, p=2, dim=-1)
        return image_features

In [4]:
class LDALayer(nn.Module):
    def __init__(self, mean, coef):
        super().__init__()
        self.register_buffer("mean", torch.tensor(mean, dtype=torch.float32))
        self.register_buffer("weight", torch.tensor(coef, dtype=torch.float32))

    def forward(self, x):
        x = x - self.mean
        x = torch.matmul(x, self.weight.T)
        return x

In [5]:
class GraphModule(nn.Module):
    """
    Graph Module: k-NN graph creation, GAT layers, training, and save/load weights.
    """
    def __init__(self, in_dim, hidden_dim=32, out_dim=64, heads=4, dropout=0.2):
        super().__init__()
        self.gat1 = GATConv(in_dim, hidden_dim, heads=heads, concat=True, dropout=dropout)
        self.gat2 = GATConv(hidden_dim*heads, out_dim, heads=1, concat=False, dropout=dropout)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        x = self.gat2(x, edge_index)
        return x

    @staticmethod
    def create_knn_graph(embeddings, k=20):
        embeddings_norm = normalize(embeddings, axis=1)
        knn = kneighbors_graph(embeddings_norm, n_neighbors=k, mode='connectivity', include_self=True)
        knn = 0.5 * (knn + knn.T)
        coo = knn.tocoo()
        edge_index = torch.tensor([coo.row, coo.col], dtype=torch.long)
        return edge_index

    def save_weights(self, path):
        torch.save(self.state_dict(), path)
        print(f"GAT weights saved at {path}")

    def load_weights(self, path, map_location=None):
        self.load_state_dict(torch.load(path, map_location=map_location))
        print(f"GAT weights loaded from {path}")

    def train_gat(self, x, edge_index, labels, mask=None, lr=0.005, weight_decay=5e-4, epochs=200, verbose=True):
        optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
        if mask is None:
            mask = torch.ones(len(labels), dtype=torch.bool)
        
        for epoch in range(epochs):
            self.train()
            optimizer.zero_grad()
            out = self.forward(x, edge_index)
            loss = F.cross_entropy(out[mask], labels[mask])
            loss.backward()
            optimizer.step()
            
            if verbose and epoch % 20 == 0:
                pred = out.argmax(dim=1)
                acc = (pred[mask] == labels[mask]).float().mean().item()
                print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Train Acc: {acc:.4f}")
        
        return self.forward(x, edge_index)

In [6]:
def collect_embeddings(dataloader, text_model, image_model, device):
    text_embeddings = []
    image_embeddings = []

    for images, captions, _ in dataloader:
        captions = list(captions)
        with torch.no_grad():
            text_emb = text_model(captions).to("cpu")
            image_emb = image_model(images).to("cpu")
        text_embeddings.append(text_emb.numpy())
        image_embeddings.append(image_emb.numpy())

    text_embeddings = np.vstack(text_embeddings)
    image_embeddings = np.vstack(image_embeddings)
    return text_embeddings, image_embeddings

In [7]:
dataloaders = MisogynyDataLoader()
train_loader = dataloaders.train_loader

text_model = BERTEmbedder().to(device)
image_model = OpenClipVitEmbedder(device=device)

Loading weights: 100%|██████████| 199/199 [00:00<00:00, 2164.86it/s, Materializing param=pooler.dense.weight]                               
BertModel LOAD REPORT from: bert-base-uncased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.seq_relationship.weight                | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED |  | 
cls.predictions.bias                       | UNEXPECTED |  | 
cls.predictions.transform.dense.weight     | UNEXPECTED |  | 
cls.seq_relationship.bias                  | UNEXPECTED |  | 
cls.predictions.transform.dense.bias       | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [8]:
text_train_emb, image_train_emb = collect_embeddings(train_loader, text_model, image_model, device)
labels = np.array([label for _, _, label in dataloaders.train_dataset])
num_classes = len(np.unique(labels))

In [9]:
lda_mean = np.load("weights/combined_lda_mean.npy")
lda_coef = np.load("weights/combined_lda_coef.npy")
combined_lda_layer = LDALayer(lda_mean, lda_coef)

In [10]:
combined_raw = np.concatenate([text_train_emb, image_train_emb], axis=1)
combined_tensor = torch.tensor(combined_raw, dtype=torch.float32)
combined_lda_emb = combined_lda_layer(combined_tensor).numpy()

In [11]:
k = 20
edge_index = GraphModule.create_knn_graph(combined_lda_emb, k=k)

  edge_index = torch.tensor([coo.row, coo.col], dtype=torch.long)


In [12]:
np.save("graph_node_features.npy", combined_lda_emb)
np.save("graph_edge_index.npy", edge_index.numpy())
np.save("graph_labels.npy", labels)

print("Graph created successfully!")
print("Node features shape:", combined_lda_emb.shape)
print("Edge index shape:", edge_index.shape)

Graph created successfully!
Node features shape: (1704, 4)
Edge index shape: torch.Size([2, 40146])
