## Prompt Tuning For E2E Table-to-Text Generation

Fine-tune a pre-trained GPT2 model for E2E table-to-text natrual language generation task using prompt tuning. E2E Dataset is used to measure the model's ability to generate coherent natural language from structured data (like tables or key-value pairs).

#### Load E2E Dataset

In [1]:
from datasets import load_dataset

In [2]:
dataset = load_dataset('e2e_nlg')

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['meaning_representation', 'human_reference'],
        num_rows: 42061
    })
    validation: Dataset({
        features: ['meaning_representation', 'human_reference'],
        num_rows: 4672
    })
    test: Dataset({
        features: ['meaning_representation', 'human_reference'],
        num_rows: 4693
    })
})

Each record contains two attributes: meaning_representation(MR) and human_reference(HR).

In [4]:
sample = dataset['train'][0]
print(sample)

{'meaning_representation': 'name[The Vaults], eatType[pub], priceRange[more than £30], customer rating[5 out of 5], near[Café Adriatic]', 'human_reference': 'The Vaults pub near Café Adriatic has a 5 star rating.  Prices start at £30.'}


#### Load Model

In [5]:
from transformers import AutoTokenizer
from model_tuning import PromptTuningGPT2

tokenizer = AutoTokenizer.from_pretrained('gpt2')

model = PromptTuningGPT2(
    gpt2_model='gpt2',
    num_prompt_tokens=30
)



In [6]:
print(model)

PromptTuningGPT2(
  (gpt2): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D(nf=2304, nx=768)
            (c_proj): Conv1D(nf=768, nx=768)
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D(nf=3072, nx=768)
            (c_proj): Conv1D(nf=768, nx=3072)
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_featu

#### Preprocess Dataset
Tokenize input sequences and generate labels for training.

In [7]:
def preprocess(sample):
    model_inputs = tokenizer(sample['meaning_representation'])
    labels = tokenizer(sample['meaning_representation'] + sample['human_reference'], padding="max_length", truncation=True, max_length=512)
    
    # print(len(model_inputs['input_ids'][0]))
    # labels["labels"] = labels['input_ids']
    labels['labels'] = labels['input_ids'].copy()
    labels['labels'][:len(model_inputs['input_ids'])] = [-100] * len(model_inputs['input_ids'])
    labels['labels'] = [-100 if token == tokenizer.pad_token_id else token for token in labels['labels']]
    return labels

# tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenized_dataset = dataset.map(preprocess)

train_dataset = tokenized_dataset['train']
val_dataset = tokenized_dataset['validation']
test_dataset = tokenized_dataset['test']

Map:   0%|          | 0/4693 [00:00<?, ? examples/s]

In [8]:
# print(train_dataset[0])

#### Test Forward Prop

In [9]:
import torch
import torch.nn as nn

input_ids = torch.tensor(train_dataset[0]['input_ids']).unsqueeze(0)
labels = torch.tensor(train_dataset[0]['labels']).unsqueeze(0)
attention_mask = torch.tensor(train_dataset[0]['attention_mask']).unsqueeze(0)

In [10]:
print(labels.shape)

torch.Size([1, 512])


In [12]:
outputs = model(input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
               )

# print(outputs.logits)
# print(labels)
loss_func = nn.CrossEntropyLoss(ignore_index=-100, reduction="mean")
logits = outputs.logits[:,30:]
print(logits.shape)
# loss = loss_func(logits.view(-1, logits.shape[-1]), labels.view(-1))
# Shift so that tokens < n predict n
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
# Flatten the tokens
loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
print(loss)
print(outputs.loss)

torch.Size([1, 512, 50257])
tensor(3.7670, grad_fn=<NllLossBackward0>)
tensor(3.7670, grad_fn=<NllLossBackward0>)


In [13]:
print(outputs.logits[:, 30:].shape)

torch.Size([1, 512, 50257])


#### Training

In [14]:
from transformers import TrainingArguments, Trainer

model.gpt2.tie_weights()
training_args = TrainingArguments(
    output_dir='./result',
    evaluation_strategy="epoch",
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

Detected kernel version 4.19.118, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,3.259,2.954715
2,3.055,2.760785
3,2.9022,2.608299
4,2.8373,2.526614
5,2.8064,2.508093


TrainOutput(global_step=26290, training_loss=3.041134145569103, metrics={'train_runtime': 8613.4121, 'train_samples_per_second': 24.416, 'train_steps_per_second': 3.052, 'total_flos': 0.0, 'train_loss': 3.041134145569103, 'epoch': 5.0})

#### Save Model Weights
Only learned prompt embeddings are saved.

In [16]:
save_path = "./result/learned_prompts.model"

model.save_learned_prompts(save_path)

In [20]:
for idx in range(10):
    input_text = test_dataset[idx]['meaning_representation']
    # print(input_text)
    inputs = tokenizer(input_text, return_tensors="pt").to('cuda')
    # print(inputs)
    
    for _ in range(50):
        outputs = model(**inputs, labels=inputs['input_ids'])
        logits = outputs.logits[0][-1,:]
        probs = nn.functional.softmax(logits, dim=-1)
        _, next_chr = torch.topk(probs, k=1, dim=-1)
        inputs['input_ids'] = torch.cat((inputs['input_ids'], next_chr.unsqueeze(0)), dim=-1)
        inputs['attention_mask'] = torch.cat((inputs['attention_mask'], torch.tensor([[1]]).to('cuda')), dim=-1)
    
    output_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)
    print({"meaning_representation": input_text, 
           "output_text": output_text,
           "ground_truth": test_dataset[idx]['human_reference']}
         )

{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[city centre]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[city centre]Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop', 'ground_truth': 'A coffee shop in the city centre area called Blue Spice.'}
{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[city centre]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[city centre]Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop located in Blue Spice. Blue Spice is a coffee shop', 'ground_truth': 'Blue Spice is a coffee shop in city centre.'}
{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], are

#### Compare Finetuned Model with Pre-trained Model

In [18]:
from transformers import GPT2LMHeadModel

GPT2Model = GPT2LMHeadModel.from_pretrained('gpt2')

for idx in range(10):
    input_text = test_dataset[idx]['meaning_representation']
    inputs = tokenizer(input_text, return_tensors="pt")
    
    output_tokens = GPT2Model.generate(**inputs, max_length=150)
    
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    
    print({"meaning_representation": input_text, 
           "output_text": output_text,
           "ground_truth": test_dataset[idx]['human_reference']}
         )

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[city centre]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[city centre] = "blue"\n\nif not area[city centre] then\n\nreturn\n\nelseif not area[city centre] then\n\nreturn\n\nend\n\nend\n\nend\n\nend\n\nend\n\nfunction getArea(area)\n\nlocal area = area[0]\n\nlocal area = area[1]\n\nlocal area = area[2]\n\nlocal area = area[3]\n\nlocal area = area[4]\n\nlocal area = area[5]\n\nlocal area = area[6]\n\nlocal area = area[7]\n\nlocal area = area[', 'ground_truth': 'A coffee shop in the city centre area called Blue Spice.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[city centre]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[city centre] = "blue"\n\nif not area[city centre] then\n\nreturn\n\nelseif not area[city centre] then\n\nreturn\n\nend\n\nend\n\nend\n\nend\n\nend\n\nfunction getArea(area)\n\nlocal area = area[0]\n\nlocal area = area[1]\n\nlocal area = area[2]\n\nlocal area = area[3]\n\nlocal area = area[4]\n\nlocal area = area[5]\n\nlocal area = area[6]\n\nlocal area = area[7]\n\nlocal area = area[', 'ground_truth': 'Blue Spice is a coffee shop in city centre.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[riverside]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[riverside] = "blue"\n\nif not area[riverside] then\n\nbreak\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend', 'ground_truth': 'There is a coffee shop Blue Spice in the riverside area.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[riverside]', 'output_text': 'name[Blue Spice], eatType[coffee shop], area[riverside] = "blue"\n\nif not area[riverside] then\n\nbreak\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend\n\nend', 'ground_truth': 'At the riverside, there is a coffee shop called The Blue Spice.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]', 'output_text': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]\n\n[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer', 'ground_truth': 'The coffee shop Blue Spice is based near Crowne Plaza Hotel and has a high customer rating of 5 out of 5.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]', 'output_text': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]\n\n[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer', 'ground_truth': 'The Blue Spice coffee shop, near Crowne Plaza Hotel, has a customer rating of 5 out of 5.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]', 'output_text': 'name[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel]\n\n[Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer rating[5 out of 5], near[Crowne Plaza Hotel] [Blue Spice], eatType[coffee shop], customer', 'ground_truth': 'If you want a coffee shop rated 5 out of 5 pick Blue Spice. It is located near Crowne Plaza Hotel.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], customer rating[average], near[Burger King]', 'output_text': 'name[Blue Spice], eatType[coffee shop], customer rating[average], near[Burger King]\n\nThe following table lists the average rating of each of the three categories.\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three', 'ground_truth': 'Burger King is near the coffee shop Blue Spice which has an average customer rating.'}


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], customer rating[average], near[Burger King]', 'output_text': 'name[Blue Spice], eatType[coffee shop], customer rating[average], near[Burger King]\n\nThe following table lists the average rating of each of the three categories.\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three categories\n\nAverage rating of the three', 'ground_truth': 'The Blue Spice coffee shop near Burger King has good customer ratings with excellent food and service, with a

In [19]:
model_load = PromptTuningGPT2('gpt2', num_prompt_tokens=10)

model_load.load_learned_prompts(save_path)
model_load.to('cuda')

import random
sampled_idx = random.sample(range(0, len(test_dataset)), 20)
                            
for idx in sampled_idx:
    input_text = test_dataset[idx]['meaning_representation']
    # print(input_text)
    inputs = tokenizer(input_text, return_tensors="pt").to('cuda')
    # print(inputs)
    
    for _ in range(50):
        outputs = model_load(**inputs, labels=inputs['input_ids'])
        logits = outputs.logits[0][-1,:]
        probs = nn.functional.softmax(logits, dim=-1)
        _, next_chr = torch.topk(probs, k=1, dim=-1)
        inputs['input_ids'] = torch.cat((inputs['input_ids'], next_chr.unsqueeze(0)), dim=-1)
        inputs['attention_mask'] = torch.cat((inputs['attention_mask'], torch.tensor([[1]]).to('cuda')), dim=-1)
    
    output_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)
    print({"meaning_representation": input_text, 
           "output_text": output_text,
           "ground_truth": test_dataset[idx]['human_reference']}
         )


{'meaning_representation': 'name[The Cricketers], eatType[coffee shop], customer rating[average], familyFriendly[yes], near[Café Sicilia]', 'output_text': 'name[The Cricketers], eatType[coffee shop], customer rating[average], familyFriendly[yes], near[Café Sicilia]The Cricketers are a café located in the city of Cagliari. They are a popular café for the customers who are looking for a good coffee. They are located in the city of Cagliari. They are a popular café', 'ground_truth': 'The Cricketers comes customer approved, they are located near Café Sicilia and offer family friendly dining.'}
{'meaning_representation': 'name[Blue Spice], eatType[restaurant], food[English], area[riverside], familyFriendly[yes], near[Rainbow Vegetarian Café]', 'output_text': 'name[Blue Spice], eatType[restaurant], food[English], area[riverside], familyFriendly[yes], near[Rainbow Vegetarian Café]Blue Spice is a restaurant located in the city of Blue Springs. It is located in the city of Blue Springs. Blue Sp