Here's an example of training a vector on the difference of the same prompt between two models, instead of the difference of two prompts on the same model.

Needs `datasets`: `pip install datasets`

In [2]:
import datasets
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from repeng import ControlModel, ControlVector, DatasetEntry
from repeng.extract import batched_get_hiddens

In [3]:
MODEL_A = "Qwen/Qwen2.5-7B"
MODEL_B = "Qwen/Qwen2.5-7B-Instruct"
DATASET = "agentlans/wikipedia-paragraphs"

tokenizer = AutoTokenizer.from_pretrained(MODEL_A)
model_a = AutoModelForCausalLM.from_pretrained(
    MODEL_A, dtype=torch.bfloat16, device_map="cuda"
)
model_b = AutoModelForCausalLM.from_pretrained(
    MODEL_B, dtype=torch.bfloat16, device_map="cuda"
)
dataset = datasets.load_dataset(DATASET)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
train_dataset = []
for text in dataset["train"].take(100)["text"]:
    text = " ".join(text.split(" ")[:100])
    chat = tokenizer.apply_chat_template(
        [{"role": "user", "content": text}], tokenize=False, add_generation_prompt=True
    )
    train_dataset.append(DatasetEntry(positive=text, negative=chat))

In [5]:
# we get passed model, tokenizer, ... from ControlVector.train
# we don't need these, so ignore them with **kwargs
def compute_hiddens(train_strs, hidden_layers, batch_size, **kwargs):
    print("Hooked compute_hiddens")

    a_train_strs, b_train_strs = train_strs[::2], train_strs[1::2]
    assert len(a_train_strs) == len(b_train_strs)

    a_hiddens = batched_get_hiddens(
        model_a, tokenizer, a_train_strs, hidden_layers, batch_size
    )
    b_hiddens = batched_get_hiddens(
        model_b, tokenizer, b_train_strs, hidden_layers, batch_size
    )
    interleaved = {}
    for layer in hidden_layers:
        ah, bh = a_hiddens[layer], b_hiddens[layer]
        i = np.stack((ah, bh))
        i = i.transpose(1, 0, *range(2, i.ndim))
        i = i.reshape((ah.shape[0] + bh.shape[0], ah.shape[1]))  # ex*2, hidden_dim
        interleaved[layer] = i
    return interleaved


completion_vector = ControlVector.train(
    model=model_a,
    tokenizer=tokenizer,
    dataset=train_dataset,
    compute_hiddens=compute_hiddens,
    method="pca_center",
)

Hooked compute_hiddens


100%|█████████████████████████████████████████████| 4/4 [00:01<00:00,  2.35it/s]
100%|█████████████████████████████████████████████| 4/4 [00:01<00:00,  2.48it/s]
100%|███████████████████████████████████████████| 27/27 [00:00<00:00, 57.41it/s]


In [6]:
from transformers import TextStreamer


class TokenStreamer(TextStreamer):
    def _is_chinese_char(*args, **kwargs):
        return True


def generate_with_vector(
    prompt: str,
    vectors,
    model=model_a,
    max_new_tokens: int = 128,
):
    ctl = ControlModel(model, list(range(1, 28)))
    input_ids = tokenizer(prompt, return_tensors="pt")
    settings = {
        "pad_token_id": tokenizer.eos_token_id,  # silence warning
        "do_sample": False,  # temperature=0
        "max_new_tokens": max_new_tokens,
    }

    def generate():
        ctl.generate(
            streamer=TokenStreamer(tokenizer), **input_ids.to(ctl.device), **settings
        )

    ctl.reset()
    print("# baseline:")
    generate()
    for label, v in vectors:
        print(f"\n# {label}")
        ctl.set_control(v)
        generate()
    ctl.reset()
    ctl.unwrap()


generate_with_vector(
    "Hurt-",
    [
        ("steered towards instruct", completion_vector * -1.5),
        ("steered away from instruct", completion_vector * 2.0),
    ],
)

# baseline:
Hurt-Proofing Your Child
By: Dr. Michael R. Thompson
The most important thing you can do to help your child is to help him or her develop a sense of self-worth. This is the most important ingredient in a child's ability to cope with the world. A child who feels good about himself or herself is more likely to be able to handle the challenges of life. A child who feels good about himself or herself is more likely to be able to handle the challenges of life. A child who feels good about himself or herself is more likely to be able to handle the challenges of life. A child who feels good about

# steered towards instruct
Hurt-Proofing Your Child: How to Help Your Child Build Resilience
Resilience is the ability to bounce back from adversity. It’s the ability to adapt to change and to learn from mistakes.It’s the ability to handle stress and to cope with difficult situations.
Resilience is a skill that can be learned and developed over time. It’s not something that you’re born w