In [1]:
import torch
torch.set_grad_enabled(False) 

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb988a78850>

In [2]:
from transformers import AutoModel, AutoTokenizer

In [3]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")

##### Example 0.1

In [27]:
prompt = "Persistence is all you need."
model = AutoModel.from_pretrained("gpt2")

In [28]:
input_ids = tokenizer(prompt, return_tensors="pt")

In [29]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2Model

In [30]:
input_ids

{'input_ids': tensor([[30946, 13274,   318,   477,   345,   761,    13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}

Print the shape of the input of the 2nd block in model given `prompt`

**Hint**: `input[0].shape`

In [31]:
def extract_the_shape(module, input):
    print(input[0].shape)

In [32]:
model.h[1].register_forward_pre_hook(extract_the_shape)

<torch.utils.hooks.RemovableHandle at 0x7fb989c20580>

In [33]:
_ = model(**input_ids)

torch.Size([1, 7, 768])


##### Example 0.2

In [6]:
prompt = "Persistence is all you need."

In [7]:
input_ids = tokenizer(prompt, return_tensors="pt")

In [8]:
def hook1(module, input):
    print('Hook 1')

def hook2(module, input):
    print('Hook 2')

def hook3(module, input):
    print('Hook 3')

In [9]:
hook1, hook2, hook3

(<function __main__.hook1(module, input)>,
 <function __main__.hook2(module, input)>,
 <function __main__.hook3(module, input)>)

In [10]:
hooks = [hook1, hook2, hook3]

In [11]:
input_ids["input_ids"]

tensor([[30946, 13274,   318,   477,   345,   761,    13]])

In [12]:
tokens = input_ids["input_ids"]

In [13]:
hooks

[<function __main__.hook1(module, input)>,
 <function __main__.hook2(module, input)>,
 <function __main__.hook3(module, input)>]

In [14]:
_ = [hook(None, None) for hook in hooks]

Hook 1
Hook 2
Hook 3


In [15]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2Model

In [16]:
tokens

tensor([[30946, 13274,   318,   477,   345,   761,    13]])

Before the forward pass, add `hook_1`, `hook_2`, and `hook_3` to the layer norm of `model`. Afterward, remove the second hook

**Hint**: `model.ln_f`

In [17]:
handles = []

In [18]:
for hook_func in hooks:
    handle = model.ln_f.register_forward_pre_hook(hook_func)
    handles.append(handle)

In [19]:
handles[1].remove()

In [20]:
_ = model(tokens)

Hook 1
Hook 3


##### Example 0.3

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
prompt = "Persistence is all you need."

In [3]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

In [4]:
input_ids = tokenizer(prompt, return_tensors="pt")

In [5]:
tokens = input_ids["input_ids"]

In [6]:
def hook1(module, input):
    print('Hook 1')

def hook2(module, input):
    print('Hook 2')

def hook3(module, input):
    print('Hook 3')

In [7]:
hook1, hook2, hook3

(<function __main__.hook1(module, input)>,
 <function __main__.hook2(module, input)>,
 <function __main__.hook3(module, input)>)

In [8]:
hooks = [hook1, hook2, hook3]

In [9]:
input_ids

{'input_ids': tensor([[30946, 13274,   318,   477,   345,   761,    13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}

In [10]:
type(model)

transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel

In [11]:
hooks

[<function __main__.hook1(module, input)>,
 <function __main__.hook2(module, input)>,
 <function __main__.hook3(module, input)>]

First, add all the hooks in `hooks` to the second block in the `model`. Then, generate text based on `input_ids` with `max_new_tokens=1`. Finally, automatically remove all the hooks

In [12]:
from contextlib import contextmanager

In [13]:
@contextmanager
def use_hooks(model, hooks):
    try:
        last_block = model.transformer.h[1]
        handles = [last_block.register_forward_pre_hook(hook) for hook in hooks]
        yield handles
    finally:
        for handle in handles:
            handle.remove()

In [14]:
with use_hooks(model, hooks):
    generated_tokens = model.generate(**input_ids, max_new_tokens=1)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Hook 1
Hook 2
Hook 3


In [17]:
model.transformer.h[1]._forward_pre_hooks

OrderedDict()

##### Example 1

In [1]:
from transformers import AutoModel, AutoTokenizer

In [2]:
steer_positive_prompt = "Love"

In [3]:
steer_negative_prompt = "Hate"

In [4]:
prompts = [steer_positive_prompt, steer_negative_prompt]

In [None]:
prompts = [tokenizer.bos_token + p for p in prompts]
tokenized_prompts = tokenizer(prompts, return_tensors='pt', padding=True)