In [8]:
!pip install -q torch-geometric transformers

import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from transformers import BertTokenizer, BertModel

In [10]:
# Replace with actual filename
df = pd.read_csv("/kaggle/input/personality-vad/Dyadic_PELD.tsv", sep="\t")

# Emotion map
emotion2id = {'anger': 0, 'disgust': 1, 'fear': 2, 'joy': 3,
              'neutral': 4, 'sadness': 5, 'surprise': 6}
df["emotion_id"] = df["Emotion_3"].str.lower().map(emotion2id)

# Stratified train/test split
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["emotion_id"], random_state=42)

In [11]:
# Load BERT
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")
bert_model.eval()

def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
    with torch.no_grad():
        return bert_model(**inputs).last_hidden_state.mean(dim=1).squeeze(0)

In [12]:
def personality_to_vad(P):
    O, C, E, A, N = P
    return np.array([
        0.21 * E + 0.59 * A + 0.19 * N,
        0.15 * O + 0.30 * A - 0.57 * N,
        0.25 * O + 0.17 * C + 0.60 * E - 0.32 * A
    ])

In [13]:
def build_graph_version_a(row, verbose=False):
    try:
        P = eval(row['Personality']) if isinstance(row['Personality'], str) else row['Personality']
        if not isinstance(P, (list, tuple)) or len(P) != 5:
            if verbose: print(" Invalid Personality:", row['Personality'])
            return None

        U1 = get_bert_embedding(row['Utterance_1'])
        U2 = get_bert_embedding(row['Utterance_2'])
        M3 = get_bert_embedding(row['Utterance_3'])

        if U1.shape[0] != 768 or U2.shape[0] != 768 or M3.shape[0] != 768:
            if verbose: print(" BERT shape issue")
            return None

        P_node = F.pad(torch.tensor(personality_to_vad(P), dtype=torch.float), (0, 765), value=0)
        x = torch.stack([U1, U2, P_node], dim=0)
        edge_index = torch.tensor([[0, 2, 2], [1, 0, 1]], dtype=torch.long)
        emotion = torch.tensor(emotion2id[row["Emotion_3"].lower()], dtype=torch.long)

        return Data(x=x, edge_index=edge_index, y=emotion, mood=M3)
    except Exception as e:
        if verbose: print(f" Exception: {e}")
        return None


In [14]:
# Train graphs
graph_list_a = []
for i, (_, row) in enumerate(tqdm(train_df.iterrows(), total=len(train_df))):
    g = build_graph_version_a(row, verbose=(i < 3))  # debug first 3
    if g: graph_list_a.append(g)

# Test graphs
test_list_a = []
for i, (_, row) in enumerate(tqdm(test_df.iterrows(), total=len(test_df))):
    g = build_graph_version_a(row, verbose=(i < 3))
    if g: test_list_a.append(g)

print(f" {len(graph_list_a)} training graphs |  {len(test_list_a)} test graphs")

# Loaders
train_loader_a = DataLoader(graph_list_a, batch_size=16, shuffle=True)
test_loader_a = DataLoader(test_list_a, batch_size=16)

100%|██████████| 5208/5208 [13:28<00:00,  6.44it/s]
100%|██████████| 1302/1302 [03:19<00:00,  6.53it/s]

 5208 training graphs |  1302 test graphs





In [19]:
from torch_geometric.nn import GCNConv, global_mean_pool, BatchNorm
import torch.nn as nn

class PersonalityBERTGNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(768, 256)
        self.bn1 = BatchNorm(256)
        self.conv2 = GCNConv(256, 128)
        self.bn2 = BatchNorm(128)
        self.conv3 = GCNConv(128, 64)
        self.bn3 = BatchNorm(64)
        self.dropout = nn.Dropout(0.3)
        self.mood_head = nn.Linear(64, 768)
        self.emotion_head = nn.Linear(64, 7)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.bn1(self.conv1(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.conv2(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.bn3(self.conv3(x, edge_index)))
        x = global_mean_pool(x, batch)
        return self.mood_head(x), self.emotion_head(x)


In [20]:
def train_model(model, train_loader, save_path="version_a_gnn.pt"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_mood = nn.MSELoss()
    loss_emotion = nn.CrossEntropyLoss()

    print(" Training")
    for epoch in range(1, 251):
        model.train()
        total_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            mood_pred, emotion_pred = model(batch.x, batch.edge_index, batch.batch)
            mood_targets = torch.stack([g.mood for g in batch.to_data_list()]).to(device)
            loss = loss_emotion(emotion_pred, batch.y) + loss_mood(mood_pred, mood_targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch:03d} | Total Loss: {total_loss:.4f}")

    torch.save(model.state_dict(), save_path)
    print(f" Model saved to {save_path}")

In [21]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

def evaluate_model(model_class, model_path, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model_class()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()

    preds, labels, mood_losses = [], [], []
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            mood_pred, emotion_pred = model(batch.x, batch.edge_index, batch.batch)
            preds.extend(torch.argmax(emotion_pred, dim=1).cpu().tolist())
            labels.extend(batch.y.cpu().tolist())
            mood_targets = torch.stack([g.mood for g in batch.to_data_list()]).to(device)
            mood_losses.append(F.mse_loss(mood_pred, mood_targets).item())

    print("\n Evaluation — VERSION A")
    print("Mood MSE:", np.mean(mood_losses))
    print("Accuracy:", accuracy_score(labels, preds))
    print("F1 Macro:", f1_score(labels, preds, average='macro'))
    print("F1 Micro:", f1_score(labels, preds, average='micro'))
    print("F1 Weighted:", f1_score(labels, preds, average='weighted'))
    print("\nClassification Report:")
    print(classification_report(labels, preds, target_names=list(emotion2id.keys())))


In [18]:
model = PersonalityBERTGNN()
train_model(model, train_loader_a)

 Training
Epoch 001 | Total Loss: 563.3496
Epoch 002 | Total Loss: 548.8077
Epoch 003 | Total Loss: 542.7957
Epoch 004 | Total Loss: 538.1893
Epoch 005 | Total Loss: 533.3124
Epoch 006 | Total Loss: 531.7167
Epoch 007 | Total Loss: 524.8073
Epoch 008 | Total Loss: 519.7132
Epoch 009 | Total Loss: 514.9044
Epoch 010 | Total Loss: 509.4402
Epoch 011 | Total Loss: 501.1593
Epoch 012 | Total Loss: 494.1062
Epoch 013 | Total Loss: 481.7312
Epoch 014 | Total Loss: 474.6150
Epoch 015 | Total Loss: 461.1701
Epoch 016 | Total Loss: 450.7156
Epoch 017 | Total Loss: 439.2836
Epoch 018 | Total Loss: 427.1819
Epoch 019 | Total Loss: 415.9321
Epoch 020 | Total Loss: 401.3554
Epoch 021 | Total Loss: 389.9171
Epoch 022 | Total Loss: 377.3811
Epoch 023 | Total Loss: 366.7532
Epoch 024 | Total Loss: 356.4189
Epoch 025 | Total Loss: 342.2206
Epoch 026 | Total Loss: 332.3707
Epoch 027 | Total Loss: 325.7061
Epoch 028 | Total Loss: 309.3976
Epoch 029 | Total Loss: 303.7727
Epoch 030 | Total Loss: 292.3790


In [23]:
evaluate_model(PersonalityBERTGNN, "version_a_gnn.pt", test_loader_a)

  model.load_state_dict(torch.load(model_path, map_location=device))



 Evaluation — VERSION A
Mood MSE: 0.05350643637158522
Accuracy: 0.32181259600614437
F1 Macro: 0.18896991381430697
F1 Micro: 0.32181259600614437
F1 Weighted: 0.29996897325417193

Classification Report:
              precision    recall  f1-score   support

       anger       0.26      0.24      0.25       171
     disgust       0.08      0.03      0.05        29
        fear       0.14      0.10      0.12        97
         joy       0.18      0.16      0.17       225
     neutral       0.43      0.56      0.49       554
     sadness       0.25      0.14      0.18        99
    surprise       0.08      0.06      0.07       127

    accuracy                           0.32      1302
   macro avg       0.20      0.19      0.19      1302
weighted avg       0.29      0.32      0.30      1302

