In [24]:
import torch
import pandas as pd
from transformers import pipeline, set_seed, GPT2Tokenizer, GPT2LMHeadModel
from torch import tensor, numel
from bertviz import model_view
set_seed(42)

tokenizer=GPT2Tokenizer.from_pretrained("gpt2")
model=GPT2LMHeadModel.from_pretrained("gpt2")
generator = pipeline( 'text-generation', model='gpt2' ,device="mps")

In [25]:
phrase = 'My friend was right about this class. It is so fun!'
encoded_phrase = tokenizer(phrase, return_tensors='pt')

response = model(**encoded_phrase, output_attentions=True, output_hidden_states=True)

len(response.attentions)

12

In [26]:
encoded_phrase

{'input_ids': tensor([[3666, 1545,  373,  826,  546,  428, 1398,   13,  632,  318,  523, 1257,
            0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [27]:
response.attentions[-1].shape

torch.Size([1, 12, 13, 13])

In [28]:
encoded_phrase['input_ids'].shape

torch.Size([1, 13])

In [29]:
tokens = tokenizer.convert_ids_to_tokens(encoded_phrase['input_ids'][0])

tokens

['My',
 'Ġfriend',
 'Ġwas',
 'Ġright',
 'Ġabout',
 'Ġthis',
 'Ġclass',
 '.',
 'ĠIt',
 'Ġis',
 'Ġso',
 'Ġfun',
 '!']

In [30]:
arr = response.attentions[9][0][0]

n_digits = 3

attention_df = pd.DataFrame((torch.round(arr * 10**n_digits) / (10**n_digits)).detach()).applymap(float)

attention_df.columns = tokens
attention_df.index = tokens

attention_df

Unnamed: 0,My,Ġfriend,Ġwas,Ġright,Ġabout,Ġthis,Ġclass,.,ĠIt,Ġis,Ġso,Ġfun,!
My,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġfriend,0.968,0.032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġwas,0.824,0.145,0.031,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġright,0.979,0.008,0.007,0.005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġabout,0.979,0.008,0.004,0.005,0.005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġthis,0.924,0.031,0.007,0.006,0.016,0.016,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ġclass,0.946,0.005,0.001,0.001,0.001,0.002,0.044,0.0,0.0,0.0,0.0,0.0,0.0
.,0.691,0.013,0.003,0.003,0.002,0.006,0.269,0.013,0.0,0.0,0.0,0.0,0.0
ĠIt,0.318,0.003,0.003,0.003,0.006,0.018,0.599,0.018,0.032,0.0,0.0,0.0,0.0
Ġis,0.331,0.006,0.002,0.002,0.003,0.018,0.533,0.013,0.062,0.03,0.0,0.0,0.0


In [31]:
tokens = tokenizer.convert_ids_to_tokens(encoded_phrase['input_ids'][0]) 
model_view(response.attentions, tokens)

<IPython.core.display.Javascript object>

In [32]:
response.hidden_states[-1].shape

torch.Size([1, 13, 768])

In [33]:
response.logits

tensor([[[ -33.0735,  -32.3349,  -35.2380,  ...,  -38.3577,  -38.4758,
           -33.0943],
         [ -98.8075,  -98.9729, -105.2267,  ..., -109.0421, -104.7380,
          -101.0801],
         [-131.2764, -130.8178, -135.2054,  ..., -138.3585, -134.9846,
          -134.1658],
         ...,
         [-120.6479, -122.7568, -127.1870,  ..., -130.4343, -129.5816,
          -125.1274],
         [ -73.8157,  -78.6168,  -85.3994,  ...,  -91.8286,  -91.7845,
           -81.4569],
         [-134.7545, -134.9494, -136.2912,  ..., -147.1822, -148.1732,
          -130.7045]]], grad_fn=<UnsafeViewBackward0>)

In [34]:
response.logits.shape

torch.Size([1, 13, 50257])

In [35]:
pd.DataFrame(
    zip(tokens, tokenizer.convert_ids_to_tokens(response.logits.argmax(2)[0])), 
    columns=['Sequence up until', 'Next token with highest probability']
)

Unnamed: 0,Sequence up until,Next token with highest probability
0,My,Ċ
1,Ġfriend,","
2,Ġwas,Ġa
3,Ġright,.
4,Ġabout,Ġthat
5,Ġthis,.
6,Ġclass,.
7,.,ĠI
8,ĠIt,'s
9,Ġis,Ġa


In [38]:
generator('My friend was right', max_length=20, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': 'My friend was right – I had no excuse not to come," he said.\n\n\nWith the'},
 {'generated_text': 'My friend was right down the street on this side of town," he said. "In that time'},
 {'generated_text': 'My friend was right, and the next day people had sent a bunch of pictures of us, and'},
 {'generated_text': "My friend was right to believe her son had been bullied at such a young age. It wouldn't"},
 {'generated_text': 'My friend was right, and it was pretty obvious that he did as well." I\'d had his'}]

In [39]:
generator(phrase, max_length=20, num_return_sequences=1, do_sample=False)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': 'My friend was right about this class. It is so fun! I love it! I love the'}]

In [40]:
generator(phrase, max_length=20, num_return_sequences=1, do_sample=True)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': 'My friend was right about this class. It is so fun! I went through so many classes on'}]