In [1]:
from datasets import load_dataset_builder
from datasets import load_dataset
import torch
from torch_geometric.data import Data
import matplotlib.pyplot as plt

from datasets import ClassLabel, Sequence
import datasets
import random
import pandas as pd
from IPython.display import display, HTML

from transformers import AutoTokenizer, GPT2Tokenizer, GPT2LMHeadModel 

import numpy as np
import time

from collections import OrderedDict

from torch_geometric.nn import GCNConv, SimpleConv
from tqdm.auto import tqdm

from torch_geometric.data import Batch

from sklearn.manifold import TSNE

## Helper function

In [6]:
def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()
    

class ModuleHook:
    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.module = None
        self.features = []

    def hook_fn(self, module, input, output):
        self.module = module
        self.features.append(output.detach())

    def close(self):
        self.hook.remove()
        
        
def combine_data(examples):
    output = {"sentence": []}
    for i in range(len(examples)):
        output["sentence"].append(examples["premise"][i] + examples["hypothesis"][i])
    return output
        
def tokenize_data(examples):
    sentence = ["Question: " + examples["question"][i] + "\nIs answer in the following sentences: " + examples["sentence"][i] for i in range(len(examples["sentence"]))]
    tokenized_inputs = tokenizer(sentence, truncation=True)

    return tokenized_inputs

## Load in QNLI dataset from GLUE benchmark

In [7]:
from datasets import load_dataset, load_metric

# task = "mnli"
task = "qnli"

actual_task = "mnli" if task == "mnli-mm" else task
dataset = load_dataset("glue", actual_task)
metric = load_metric('glue', actual_task)

Reusing dataset glue (/n/home04/yidachen/.cache/huggingface/datasets/glue/qnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

## Tokenize the Question and Document into sequences of tokens

In [8]:
model_checkpoint = "gpt2"
batch_size = 16

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

tokenized_datasets = dataset.map(tokenize_data, batched=True)

{'question': 'When did the third Digimon series begin?',
 'sentence': 'Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese.',
 'label': 1,
 'idx': 0}

## Create Train & Test Split for Probing Evaluation

In [None]:
np.random.seed(123)

# split = "validation_matched"
split = "validation"
# num_samples = len(dataset["validation_matched"])
num_samples = 1000
train_portion = 0.8

sampled_indices = np.random.choice(np.arange(len(tokenized_datasets[split])),
                                   num_samples, 
                                   replace=False)


sampled_train = np.random.choice(np.arange(len(sampled_indices)), 
                                 int(num_samples * train_portion), 
                                 replace=False)


sampled_test = np.setdiff1d(np.arange(len(sampled_indices)), sampled_train)


train_mask = np.array([True] * num_samples)
train_mask[sampled_test] = False


test_mask = np.array([True] * num_samples)
test_mask[sampled_train] = False

## Create Unweighted Directed Graph Representation of Text from GPT-2 Model

In [12]:
model = GPT2LMHeadModel.from_pretrained("gpt2").to("cuda")
        
threshold = 0.2 
    
all_graphs = {}
for layer in range(12):
    all_graphs[layer] = {}
    for head in range(12):
        all_graphs[layer][head] = []
    
# graphs = []

for i in tqdm(sampled_indices):
    features = OrderedDict()
    for name, module in model.named_modules():
        if "c_attn" in name:
            features[name] = ModuleHook(module)


    with torch.no_grad():
        output = model(input_ids = torch.Tensor(tokenized_datasets[split]["input_ids"][i]).type(torch.long).to("cuda"),
                       attention_mask = torch.Tensor(tokenized_datasets[split]["attention_mask"][i]).to("cuda"),
                       output_attentions = True,
                       output_hidden_states = True)


    for feature in features.values():
        feature.close()

    y = tokenized_datasets[split]["label"][i]
    for layer in range(12):
        value_features = features[f'transformer.h.{layer}.attn.c_attn'].features[0].split(768, dim=2)[-1][0].clone()
        for head in range(12):
            weighted_adj_matrix = output["attentions"][layer][0][head].detach().cpu()

            # node_features = output["hidden_states"][layer - 1][0].detach().cpu()

            node_features = value_features[:, head * 64: (head + 1) * 64]

            adj_matrix = weighted_adj_matrix > threshold

            edge_index = adj_matrix.detach().clone().cpu().nonzero().t().contiguous().type(torch.long)
            x = node_features

            data = Data(x=x, edge_index=edge_index, y=y)

            all_graphs[layer][head].append(data)

  0%|          | 0/1000 [00:00<?, ?it/s]

## Graph Neural Network

In [13]:
from gcn_probe import GCNProbe, GCNNonlinearProbe, MLPProbe

## Create Train and Test Dataset

In [16]:
from torch_geometric.loader import DataLoader

metric = load_metric('glue', actual_task)

train_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if train_mask[i]]
test_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if test_mask[i]]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Train and Test Helper functions

In [17]:
def train(model, optimizer, criterion, loader):
    model.train()
    for data in loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y.to("cuda"))  # Compute the loss.
        
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

    return loss
        
        
def test(model, loader):
    model.eval()
    correct = 0
    preds = torch.Tensor([]).to("cuda")
    refs = torch.Tensor([]).to("cuda")
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y.to("cuda")).sum())  # Check against ground-truth labels.
        preds = torch.concat([preds, pred])
        refs = torch.concat([refs, data.y.to("cuda")])
        
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

## Convert large dataset into sets of mini-batch

In [20]:
from torch_geometric.loader import DataLoader

train_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if train_mask[i]]
test_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if test_mask[i]]

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

for step, data in enumerate(test_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 16
DataBatch(x=[907, 64], edge_index=[2, 1057], y=[16], batch=[907], ptr=[17])

Step 2:
Number of graphs in the current batch: 16
DataBatch(x=[976, 64], edge_index=[2, 1138], y=[16], batch=[976], ptr=[17])

Step 3:
Number of graphs in the current batch: 16
DataBatch(x=[942, 64], edge_index=[2, 1086], y=[16], batch=[942], ptr=[17])

Step 4:
Number of graphs in the current batch: 16
DataBatch(x=[865, 64], edge_index=[2, 1023], y=[16], batch=[865], ptr=[17])

Step 5:
Number of graphs in the current batch: 16
DataBatch(x=[928, 64], edge_index=[2, 1089], y=[16], batch=[928], ptr=[17])

Step 6:
Number of graphs in the current batch: 16
DataBatch(x=[952, 64], edge_index=[2, 1113], y=[16], batch=[952], ptr=[17])

Step 7:
Number of graphs in the current batch: 16
DataBatch(x=[967, 64], edge_index=[2, 1129], y=[16], batch=[967], ptr=[17])

Step 8:
Number of graphs in the current batch: 16
DataBatch(x=[940, 64], edge_index=[2, 1078], y=[16], batch=[9

## Train Probe on Graph representations of Text at 144 Heads

In [22]:
train_accs = {}
test_accs = {}

for layer in tqdm(range(12)):
    train_accs[layer] = {}
    test_accs[layer] = {}
    for head in range(12):
        probe = GCNProbe(64, 2).to("cuda")

        optimizer = torch.optim.Adam(probe.parameters(), lr=0.01)
        criterion = torch.nn.CrossEntropyLoss()

        train_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if train_mask[i]]
        test_dataset = [all_graphs[layer][head][i].to("cuda") for i in range(len(all_graphs[layer][head])) if test_mask[i]]

        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

        for epoch in range(1, 201):
            loss = train(probe, optimizer, criterion, train_loader)
            train_acc = test(probe, train_loader)

            # if (epoch - 1) % 20 == 0:
            #     print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
            #     print(f'Train Accuracy: {train_acc:.4f}')

        print("-" * 25 + f" layer {layer} head {head} " + "-" * 25)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
        print(f'Train Accuracy: {train_acc:.4f}')
        
        test_acc = test(probe, test_loader)
        print(f'Test Accuracy: {test_acc:.4f}')
        
        train_accs[layer][head] = train_acc
        test_accs[layer][head] = test_acc

  0%|          | 0/12 [00:00<?, ?it/s]

------------------------- layer 0 head 0 -------------------------
Epoch: 200, Loss: 0.7218
Train Accuracy: 0.6175
Test Accuracy: 0.5250
------------------------- layer 0 head 1 -------------------------
Epoch: 200, Loss: 0.6160
Train Accuracy: 0.6325
Test Accuracy: 0.5800
------------------------- layer 0 head 2 -------------------------
Epoch: 200, Loss: 0.6767
Train Accuracy: 0.6150
Test Accuracy: 0.4950
------------------------- layer 0 head 3 -------------------------
Epoch: 200, Loss: 0.6283
Train Accuracy: 0.6112
Test Accuracy: 0.5300
------------------------- layer 0 head 4 -------------------------
Epoch: 200, Loss: 0.6240
Train Accuracy: 0.6388
Test Accuracy: 0.5750
------------------------- layer 0 head 5 -------------------------
Epoch: 200, Loss: 0.6864
Train Accuracy: 0.6112
Test Accuracy: 0.5900
------------------------- layer 0 head 6 -------------------------
Epoch: 200, Loss: 0.6803
Train Accuracy: 0.6312
Test Accuracy: 0.5550
------------------------- layer 0 head 7 

In [None]:
from visualization_utils import heatmap, annotate_heatmap
import matplotlib.pyplot as plt
import numpy as np

score_matrix = []

for layer in range(12):
    score_matrix.append([])
    for head in range(12):
        score_matrix[layer].append(test_accs[layer][head])

plt.figure(figsize=(12, 9))

im, cbar = heatmap(np.array(score_matrix), [f"Layer {i + 1}" for i in range(12)], [f"H {i + 1}" for i in range(12)],
           cmap="bwr")

for t in cbar.ax.get_yticklabels():
     t.set_fontsize(18)

texts = annotate_heatmap(im, valfmt="{x:.2f} ", fontsize=13)