In [1]:
from transformer_lens import HookedTransformer

##### Example 1

In [2]:
model = HookedTransformer.from_pretrained("gpt2-small")

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


In [4]:
model.W_Q.shape

torch.Size([12, 12, 768, 64])

In [7]:
model.W_in.shape

torch.Size([12, 768, 3072])

##### Example 2

In [11]:
def decode(x): return model.to_string(x)

In [8]:
text = "Persistence is all you need."

In [20]:
type(model)

transformer_lens.HookedTransformer.HookedTransformer

In [21]:
text

'Persistence is all you need.'

Convert the text into tokens with a beginning-of-sequence token

In [22]:
tokens = model.to_tokens(text, prepend_bos=True)

In [23]:
tokens

tensor([[50256, 30946, 13274,   318,   477,   345,   761,    13]])

In [24]:
decode(tokens)

['<|endoftext|>Persistence is all you need.']

##### Example 2

In [26]:
from transformer_lens.utils import get_act_name

In [53]:
logits, cache = model.run_with_cache(tokens)

In [54]:
logits.shape

torch.Size([1, 8, 50257])

In [33]:
hook_name = get_act_name("pattern", 0)

In [46]:
layer_idx = 0

In [50]:
q_hook_name = get_act_name("q", layer_idx)
k_hook_name = get_act_name("k", layer_idx)

In [51]:
q = cache[q_hook_name]
k = cache[k_hook_name]

In [52]:
q.shape

torch.Size([1, 8, 12, 64])

##### Example 3

In [1]:
import torch
from transformer_lens import HookedTransformer

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f96a94f7e80>

In [2]:
model = HookedTransformer.from_pretrained("gpt2")

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer


In [3]:
text = "Persistence is all you need."

In [4]:
tokens = model.to_tokens(text)

In [119]:
tokens.shape[-1], type(model)

(8, transformer_lens.HookedTransformer.HookedTransformer)

In [120]:
tokens

tensor([[50256, 30946, 13274,   318,   477,   345,   761,    13]])

Visualize the attention patter of `tokens` using `circuitsvis`

**Hint**: `"pattern", 0, "attn"`

In [122]:
from transformer_lens.utils import get_act_name
import circuitsvis as cv
from einops import rearrange

In [121]:
_, cache = model.run_with_cache(tokens)

In [123]:
hook_name = get_act_name("pattern", 0, "attn")

In [124]:
hook_name

'blocks.0.attn.hook_pattern'

In [125]:
attention_pattern = cache[hook_name]
str_tokens = model.to_str_tokens(tokens)

In [126]:
attention_pattern.shape

torch.Size([1, 12, 8, 8])

In [127]:
attention_pattern = rearrange(attention_pattern, "1 ... -> ...")

In [128]:
str_tokens

['<|endoftext|>', 'Pers', 'istence', ' is', ' all', ' you', ' need', '.']

In [129]:
cv.attention.attention_patterns(
    tokens=str_tokens,
    attention=attention_pattern,
)

##### Example 4

In [164]:
params = {
    "d_model": 768,
    "d_head": 64,
    "n_heads": 12,
    "n_layers": 2,
    "n_ctx": 2048,
    "attn_only": True, # defaults to False
    "d_vocab": 50278,
}

In [172]:
params

{'d_model': 768,
 'd_head': 64,
 'n_heads': 12,
 'n_layers': 2,
 'n_ctx': 2048,
 'attn_only': True,
 'd_vocab': 50278}

Create a new custom model based on the configurations in `params`

In [173]:
from transformer_lens import HookedTransformer, HookedTransformerConfig

In [174]:
cfg = HookedTransformerConfig(**params)

In [175]:
model = HookedTransformer(cfg=cfg)

In [176]:
type(model)

transformer_lens.HookedTransformer.HookedTransformer

##### Example 5

In [40]:
import torch
from transformer_lens import HookedTransformer, HookedTransformerConfig

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fe41b6d62b0>

In [41]:
model = HookedTransformer.from_pretrained("gpt2")

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer


In [92]:
text = "Persistence is all you need."

In [119]:
type(model)

transformer_lens.HookedTransformer.HookedTransformer

In [120]:
text

'Persistence is all you need.'

Extract the unembedding vectors corresponding to the target tokens at each position in the `text`

In [121]:
tokens = model.to_tokens(text)
tokens.shape

torch.Size([1, 8])

In [122]:
tokens = tokens[0]
tokens.shape

torch.Size([8])

In [123]:
W_U = model.W_U
W_U.shape

torch.Size([768, 50257])

In [124]:
target_tokens = tokens[1:]
target_tokens.shape

torch.Size([7])

In [125]:
W_U_correct_tokens = W_U[:, target_tokens]

In [126]:
W_U_correct_tokens.shape

torch.Size([768, 7])

##### Example 6

In [1]:
import torch
from transformer_lens import HookedTransformer, HookedTransformerConfig
from transformer_lens.utils import get_act_name

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fef4136ca90>

In [2]:
model = HookedTransformer.from_pretrained("gpt2")

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer


In [3]:
text = "Persistence is all you need."

In [4]:
tokens = model.to_tokens(text)[0]

In [5]:
hook_name = get_act_name("embed")

In [24]:
type(model)

transformer_lens.HookedTransformer.HookedTransformer

In [25]:
tokens

tensor([50256, 30946, 13274,   318,   477,   345,   761,    13])

In [26]:
hook_name, tokens.shape

('hook_embed', torch.Size([8]))

Compute the logit attribution for the embedding layer. Explain

**Hints**:, `weight @ component`, `embed.shape = (seq, emd)`, `weight.shape = (emd, seq)`

In [27]:
from einops import einsum

In [28]:
_, cache = model.run_with_cache(tokens, remove_batch_dim=True)
W_U = model.W_U
embed_activations = cache[hook_name]

In [29]:
W_U.shape, embed_activations.shape

(torch.Size([768, 50257]), torch.Size([8, 768]))

In [30]:
target_tokens = tokens[1:]
W_U_target_tokens = unembed[:, target_tokens]

In [31]:
target_tokens.shape, unembed_target_tokens.shape

(torch.Size([7]), torch.Size([768, 7]))

In [32]:
embed_attributions = einsum(W_U_target_tokens, embed_activations[:-1],"emb seq, seq emb -> seq")

In [33]:
embed_attributions.shape

torch.Size([7])