In [1]:
import os
import torch
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torch.utils.data import DataLoader, TensorDataset

In [2]:
def set_seed(seed=7):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(7)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [4]:
cls_embeddings = torch.load("models/cls_embeddings.pt")          # shape: [1267, 768]
knowledge_tensor = torch.load("models/knowledge_tensor.pt")      # shape: [1267, 128]

print("✅ CLS shape:", cls_embeddings.shape)
print("✅ Knowledge shape:", knowledge_tensor.shape)

✅ CLS shape: torch.Size([1267, 768])
✅ Knowledge shape: torch.Size([1267, 128])


  cls_embeddings = torch.load("models/cls_embeddings.pt")          # shape: [1267, 768]
  knowledge_tensor = torch.load("models/knowledge_tensor.pt")      # shape: [1267, 128]


In [5]:
df = pd.read_csv("Data/liar_dataset/test.tsv", sep='\t', header=None)
raw_labels = df[1].tolist()  # Column 1: truth labels

label_map = {
    "true": 1, "mostly-true": 1, "half-true": 1,
    "barely-true": 0, "false": 0, "pants-fire": 0
}
labels = [label_map[label.strip().lower()] for label in raw_labels]
labels = torch.tensor(labels, dtype=torch.long)
print("✅ Labels shape:", labels.shape)

✅ Labels shape: torch.Size([1267])


In [6]:
X_train, X_val, K_train, K_val, y_train, y_val = train_test_split(
    cls_embeddings, knowledge_tensor, labels, test_size=0.2, random_state=7
)

train_dataset = TensorDataset(X_train, K_train, y_train)
val_dataset = TensorDataset(X_val, K_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [7]:
class FusionClassifier(nn.Module):
    def __init__(self, bert_dim=768, knowledge_dim=128, hidden_dim=256, num_classes=2):
        super(FusionClassifier, self).__init__()
        self.fusion = nn.Sequential(
            nn.Linear(bert_dim + knowledge_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, cls_embedding, knowledge_vector):
        fused = torch.cat((cls_embedding, knowledge_vector), dim=1)
        return self.fusion(fused)

In [8]:
fusion_model = FusionClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(fusion_model.parameters(), lr=1e-4)

best_f1 = 0
for epoch in range(10):  # You can increase this if needed
    fusion_model.train()
    total_loss = 0

    for cls_batch, k_batch, y_batch in train_loader:
        cls_batch = cls_batch.to(device)
        k_batch = k_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = fusion_model(cls_batch, k_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    fusion_model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for cls_batch, k_batch, y_batch in val_loader:
            cls_batch = cls_batch.to(device)
            k_batch = k_batch.to(device)
            outputs = fusion_model(cls_batch, k_batch)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(y_batch.tolist())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Val Acc: {acc:.4f} | F1: {f1:.4f}")

    if f1 > best_f1:
        best_f1 = f1
        torch.save(fusion_model.state_dict(), "models/fusion_model.pt")
        print(f"✅ Saved best fusion model with F1: {best_f1:.4f}")


Epoch 1 | Loss: 22.0279 | Val Acc: 0.6339 | F1: 0.7715
✅ Saved best fusion model with F1: 0.7715
Epoch 2 | Loss: 21.5770 | Val Acc: 0.6220 | F1: 0.7419
Epoch 3 | Loss: 21.3401 | Val Acc: 0.6220 | F1: 0.7273
Epoch 4 | Loss: 20.9746 | Val Acc: 0.6260 | F1: 0.7383
Epoch 5 | Loss: 21.0085 | Val Acc: 0.5748 | F1: 0.6516
Epoch 6 | Loss: 20.5154 | Val Acc: 0.6063 | F1: 0.7059
Epoch 7 | Loss: 20.6199 | Val Acc: 0.6142 | F1: 0.7101
Epoch 8 | Loss: 20.1964 | Val Acc: 0.6102 | F1: 0.6935
Epoch 9 | Loss: 20.0184 | Val Acc: 0.6260 | F1: 0.7383
Epoch 10 | Loss: 19.8036 | Val Acc: 0.5866 | F1: 0.6729
