In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GATLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime


class GAT(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
        super(GAT, self).__init__()
        self.dropout = dropout
        self.embedding = nn.Embedding(nfeat, nhid)
        self.attentions = [GATLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        self.out_att = GATLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)

    def forward(self, x, adj):
        x = self.embedding(x)
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.elu(self.out_att(x, adj))
        return F.log_softmax(x, dim=1)

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# parameters for the GAT
nfeat = 300  # dimension of the word embeddings
nhid = 16    # dimension of the hidden layer
nclass = 10  # number of classes (this is task-dependent)
dropout = 0.6  # dropout
alpha = 0.2  # alpha for the leaky_relu
nheads = 8  # number of attention heads

# parameters for the LSTM
input_dim = nhid * nheads
hidden_dim = 32
num_layers = 2
output_dim = nclass  # again, this is task-dependent

# Create the GAT and LSTM
gat = GAT(nfeat=nfeat, nhid=nhid, nclass=nclass, dropout=dropout, alpha=alpha, nheads=nheads)
lstm = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers, output_dim=output_dim)


In [22]:
import pandas as pd
from datasets import load_dataset

In [23]:
dataset = load_dataset('cnn_dailymail', '3.0.0')

Found cached dataset cnn_dailymail (C:/Users/vijay/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)


  0%|          | 0/3 [00:00<?, ?it/s]

In [24]:
train_data = dataset['train']
train_df = pd.DataFrame(train_data).head(100)

In [25]:
test_data = dataset['test']
test_df = pd.DataFrame(test_data).head(100)

In [26]:
X_train, X_test, y_train, y_test = train_df['article'], test_df['article'], train_df['highlights'], test_df['highlights']

In [27]:
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset

class TextSummarizationDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=512):
        self.df = df
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        article = self.df.iloc[idx]['article']
        highlight = self.df.iloc[idx]['highlights']

        # Encode the article and the highlight using the tokenizer
        article_encoded = self.tokenizer.encode_plus(article, 
                                                     max_length=self.max_length, 
                                                     padding='max_length', 
                                                     truncation=True, 
                                                     return_tensors='pt')
        highlight_encoded = self.tokenizer.encode_plus(highlight, 
                                                       max_length=self.max_length, 
                                                       padding='max_length', 
                                                       truncation=True, 
                                                       return_tensors='pt')

        # Extract the input IDs and attention mask tensors
        article_input_ids = article_encoded['input_ids'].squeeze()
        article_attention_mask = article_encoded['attention_mask'].squeeze()
        highlight_input_ids = highlight_encoded['input_ids'].squeeze()
        highlight_attention_mask = highlight_encoded['attention_mask'].squeeze()

        return article_input_ids, article_attention_mask, highlight_input_ids, highlight_attention_mask

from transformers import BertTokenizer

# Instantiate the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create a TextSummarizationDataset from your dataframe
ts_dataset = TextSummarizationDataset(train_df, tokenizer)

# Define the DataLoader
train_loader = DataLoader(ts_dataset, batch_size=64, shuffle=True)


In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the models to the device
gat = gat.to(device)
lstm = lstm.to(device)

# Define a loss function and an optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(gat.parameters()) + list(lstm.parameters()), lr=0.001)


num_epochs = 5
# Training loop
for epoch in range(num_epochs):
    for i, (article_input_ids, article_attention_mask, highlight_input_ids, highlight_attention_mask) in enumerate(train_loader):
        article_input_ids = article_input_ids.to(device)
        article_attention_mask = article_attention_mask.to(device)
        highlight_input_ids = highlight_input_ids.to(device)
        highlight_attention_mask = highlight_attention_mask.to(device)

        # Forward pass
        gat_output = gat(article_input_ids, article_attention_mask)
        lstm_output = lstm(gat_output)
        loss = criterion(lstm_output, highlight_input_ids)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

IndexError: index out of range in self

In [None]:
# Evaluation
gat.eval()
lstm.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for articles, highlights in test_loader:
        articles = articles.to(device)
        highlights = highlights.to(device)
        gat_output = gat(articles, adjacency_matrix)
        lstm_output = lstm(gat_output)
        _, predicted = torch.max(lstm_output.data, 1)
        total += highlights.size(0)
        correct += (predicted == highlights).sum().item()

    print('Test Accuracy: {} %'.format(100 * correct / total))