In [1]:
from transformer_lens import HookedTransformer
import torch
import circuitsvis as cv
import einops
from IPython.display import display
import numpy as np
from pprint import pprint
from datasets import load_dataset
import random
from tqdm import tqdm

2025-02-26 11:42:12.233066: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740570132.255266   72624 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740570132.262072   72624 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
device = torch.device(
    "mps" if torch.backends.mps.is_available() else 
    "cuda" if torch.cuda.is_available() else 
    "cpu"
)
print('Got device:', device)

Got device: cuda


For this notebook to work, please add 

```python
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
```

to the `OFFICIAL_MODEL_NAMES` list in the `loading_from_pretrained.py` file under the `TransformerLens` library after you've downloaded it locally.

I also increased the `n_ctx` for architectures `"QWenLMHeadModel"` and `"QWen2ForCausalLM"` from 2048 to 4096 because the documentation mentions that they are capped due to memory constraints.

In [7]:
model = HookedTransformer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
model = model.to(device)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loaded pretrained model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B into HookedTransformer
Moving model to device:  cuda


In [8]:
print(f"📏 Model context length: {model.cfg.n_ctx}")
print(f"🧠 Model layers: {model.cfg.n_layers}")
print(f"🔤 Vocabulary size: {model.cfg.d_vocab}")
print(f"📊 Hidden dimension: {model.cfg.d_model}")
print(f"🧩 Attention heads: {model.cfg.n_heads}")
print(f"🏷️ Model name: {model.cfg.model_name}")

📏 Model context length: 2048
🧠 Model layers: 28
🔤 Vocabulary size: 151936
📊 Hidden dimension: 1536
🧩 Attention heads: 12
🏷️ Model name: DeepSeek-R1-Distill-Qwen-1.5B


In [None]:
lambda_prompt = lambda problem: f"""Solve this math problem step by step. Put your final answer in \\boxed{{}}. Problem: {problem} Solution: \n<think>\n"""
result = model.generate([lambda_prompt(x) for x in ["2x+3=11", "What is the meaning of life?", "4x^2 + 3y^2 + 2x + 3y = 11", "Riemann Hypothesis"]], 
                        temperature=0,
                        max_new_tokens=1400,
                        top_p=0.92)

In [None]:
result 

In [None]:
logits, activations = model.run_with_cache("Hello World")

print(logits)
print(activations)

In [10]:
prompt = """
## Loading Models

HookedTransformer comes loaded with >40 open source GPT-style models. You can load any of them in with `HookedTransformer.from_pretrained(MODEL_NAME)`. Each model is loaded into the consistent HookedTransformer architecture, designed to be clean, consistent and interpretability-friendly.

For this demo notebook we'll look at GPT-2 Small, an 80M parameter model. To try the model the model out, let's find the loss on this paragraph!
"""

In [None]:
loss = model(prompt, return_type="loss")
print(loss)

In [None]:
model.to_str_tokens(prompt)

In [None]:
logits = model(prompt, return_type="logits")
prediction = logits.argmax(dim=-1).squeeze()[:-1]
print(prediction)

In [None]:
true_tokens = model.to_tokens(prompt).squeeze()[1:]
is_correct = prediction == true_tokens

print(f"Model accuracy: {is_correct.sum()}/{len(true_tokens)}")
print(f"Correct tokens: {model.to_str_tokens(prediction[is_correct])}")

In [None]:
model.blocks[0].attn.W_Q

In [None]:
text = "Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets."
tokens = model.to_tokens(text)
logits, cache = model.run_with_cache(tokens, remove_batch_dim=True)

print(type(logits), type(cache))

In [19]:
attn_patterns_from_shorthand = cache["pattern", 0]
attn_patterns_from_full_name = cache["blocks.0.attn.hook_pattern"]

torch.testing.assert_close(attn_patterns_from_shorthand, attn_patterns_from_full_name)

In [None]:
# TODO: This part fails, is it because the attention mechanisms for Qwen (1.5B) and GPT-2 (small) are different?
layer0_pattern_from_cache = cache["pattern", 0]

q, k = cache["q", 0], cache["k", 0]
seq, nhead, headsize = q.shape
layer0_attn_scores = einops.einsum(q, k, "seqQ n h, seqK n h -> n seqQ seqK")
mask = torch.triu(torch.ones((seq, seq), dtype=torch.bool), diagonal=1).to(device)
layer0_attn_scores.masked_fill_(mask, -1e9)
layer0_pattern_from_q_and_k = (layer0_attn_scores / headsize**0.5).softmax(-1)

torch.testing.assert_close(layer0_pattern_from_cache, layer0_pattern_from_q_and_k)
print("Tests passed!")

In [None]:
print(type(cache))
attention_pattern = cache["pattern", 0]
print(attention_pattern.shape)
str_tokens = model.to_str_tokens(text)

print("Layer 0 Head Attention Patterns:")
display(
    cv.attention.attention_patterns(
        tokens=str_tokens,
        attention=attention_pattern,
        #attention_head_names=[f"L0H{i}" for i in range(12)],
    )
)

In [None]:
neuron_activations_for_all_layers = torch.stack([cache["post", layer] for layer in range(model.cfg.n_layers)], dim=1)
# shape = (seq_pos, layers, neurons)

cv.activations.text_neuron_activations(
    tokens=str_tokens,
    activations=neuron_activations_for_all_layers
)

In [33]:
def to_numpy(tensor):
    return tensor.detach().cpu().numpy()

In [None]:
neuron_activations_for_all_layers_rearranged = to_numpy(einops.rearrange(neuron_activations_for_all_layers, "seq layers neurons -> 1 layers seq neurons"))

cv.topk_tokens.topk_tokens(
    # Some weird indexing required here ¯\_(ツ)_/¯
    tokens=[str_tokens],
    activations=neuron_activations_for_all_layers_rearranged,
    max_k=7,
    first_dimension_name="Layer",
    third_dimension_name="Neuron",
    first_dimension_labels=list(range(12))
)

In [None]:
model.generate("What is 5+5?", max_new_tokens=20)

In [47]:
logits, cache = model.run_with_cache("What is 5+5 divided 16?", remove_batch_dim=True)

In [None]:
prompt = """Let's solve this step by step:

Question: What is Einstein's law of general relativity?

Let's break this down:
"""

result = model.generate(prompt, 
                        temperature=0.6, 
                        max_new_tokens=500,
                        top_p=0.95)
pprint(result)

In [3]:
# Load the MATH dataset
math_dataset = load_dataset("fdyrd/math")
print(f"Dataset structure: {math_dataset}")

# Examine the dataset structure
print(f"Available splits: {math_dataset.keys()}")
print(f"Number of examples in train: {len(math_dataset['train'])}")
print(f"Number of examples in test: {len(math_dataset['test'])}")

# Look at the first example to understand the format
print("\nExample from the dataset:")
example = math_dataset['train'][0]
print(f"Problem: {example['problem']}")
print(f"Level: {example['level']}")
print(f"Type: {example['type']}")
print(f"Solution: {example['solution']}")

Dataset structure: DatasetDict({
    train: Dataset({
        features: ['problem', 'level', 'type', 'solution'],
        num_rows: 7500
    })
    test: Dataset({
        features: ['problem', 'level', 'type', 'solution'],
        num_rows: 5000
    })
})
Available splits: dict_keys(['train', 'test'])
Number of examples in train: 7500
Number of examples in test: 5000

Example from the dataset:
Problem: Let \[f(x) = \left\{
\begin{array}{cl} ax+3, &\text{ if }x>2, \\
x-5 &\text{ if } -2 \le x \le 2, \\
2x-b &\text{ if } x <-2.
\end{array}
\right.\]Find $a+b$ if the piecewise function is continuous (which means that its graph can be drawn without lifting your pencil from the paper).
Level: Level 5
Type: Algebra
Solution: For the piecewise function to be continuous, the cases must "meet" at $2$ and $-2$. For example, $ax+3$ and $x-5$ must be equal when $x=2$. This implies $a(2)+3=2-5$, which we solve to get $2a=-6 \Rightarrow a=-3$. Similarly, $x-5$ and $2x-b$ must be equal when $x=-2$. 

In [4]:
# Function to sample problems from the dataset
def sample_math_problems(dataset, n=5, level=None, problem_type=None):
    """
    Sample n problems from the dataset, optionally filtering by level or type.
    
    Args:
        dataset: The MATH dataset
        n: Number of problems to sample
        level: Optional filter for problem difficulty (e.g., "Level 1")
        problem_type: Optional filter for problem type (e.g., "Algebra")
    
    Returns:
        List of sampled problems
    """
    filtered_dataset = dataset['train']
    
    if level:
        filtered_dataset = [ex for ex in filtered_dataset if ex['level'] == level]
    
    if problem_type:
        filtered_dataset = [ex for ex in filtered_dataset if ex['type'] == problem_type]
    
    filtered_dataset = list(filtered_dataset)  # Convert to list to ensure it's a sequence
    return random.sample(filtered_dataset, min(n, len(filtered_dataset)))

In [17]:
sampled_problems = sample_math_problems(math_dataset, n=3, level="Level 3")
print("\nSampled problems for testing:")
for i, problem in enumerate(sampled_problems):
    print(f"\nProblem {i+1}:")
    print(f"Type: {problem['type']}, Level: {problem['level']}")
    print(f"Problem statement: {problem['problem']}")


Sampled problems for testing:

Problem 1:
Type: Geometry, Level: Level 3
Problem statement: Two triangles are similar. The ratio of their areas is 1:4. If the height of the smaller triangle is 3 cm, how long is the corresponding height of the larger triangle, in centimeters?

Problem 2:
Type: Counting & Probability, Level: Level 3
Problem statement: I have 6 shirts, 4 pairs of pants, and 6 hats. The pants come in tan, black, blue, and gray. The shirts and hats come in those colors, and also white and yellow. I refuse to wear an outfit in which all 3 items are the same color. How many choices for outfits, consisting of one shirt, one hat, and one pair of pants, do I have?

Problem 3:
Type: Algebra, Level: Level 3
Problem statement: Lulu has a quadratic of the form $x^2+bx+44$, where $b$ is a specific positive number. Using her knowledge of how to complete the square, Lulu is able to rewrite this quadratic in the form $(x+m)^2+8$. What is $b$?


In [18]:
# Function to generate CoT using the model
def generate_cot_for_problem(
    model: HookedTransformer, 
    problem: str, 
    temperature: float = 0.4, 
    max_new_tokens: int = 1500, 
    top_p: float = 0.92
):
    """
    Generate a chain-of-thought solution for a given math problem.
    
    Args:
        model: The HookedTransformer model
        problem: The math problem text
        temperature: The temperature for the model
        max_new_tokens: The maximum number of tokens to generate
        top_p: The top-p value for the model
    Returns:
        The generated chain-of-thought solution
    """
    prompt = f"""Solve this math problem step by step. Put your final answer in \\boxed{{}}. Problem: {problem} Solution: \n<think>\n"""
    result = model.generate(prompt, 
                            temperature=temperature,
                            max_new_tokens=max_new_tokens,
                            top_p=top_p)
    return result

In [19]:
# Select a problem
problem_text = sampled_problems[0]['problem']

# Generate CoT
cot_solution = generate_cot_for_problem(
    model, 
    problem_text, 
    temperature=0.6, 
    max_new_tokens=1500, 
    top_p=0.92
)
print("\nGenerated Chain-of-Thought solution:")
print(cot_solution)

  0%|          | 0/1500 [00:00<?, ?it/s]


Generated Chain-of-Thought solution:
Solve this math problem step by step. Put your final answer in \boxed{}. Problem: Two triangles are similar. The ratio of their areas is 1:4. If the height of the smaller triangle is 3 cm, how long is the corresponding height of the larger triangle, in centimeters? Solution: 
<think>
First, I recognize that the ratio of the areas of similar triangles is 1:4. This means that the square of the ratio of their corresponding heights is also 1:4.

Let h be the height of the larger triangle. According to the ratio, (h / 3)^2 = 1/4.

Taking the square root of both sides gives h / 3 = 1/2.

Finally, solving for h gives h = 3 * (1/2) = 1.5 cm.
</think>

**Solution:**

We are given two similar triangles with the following information:
- The ratio of their areas is 1:4.
- The height of the smaller triangle is 3 cm.

We need to find the corresponding height of the larger triangle.

**Step 1: Understand the relationship between the areas and heights of similar t

In [35]:
# Function to batch process multiple problems
def batch_generate_cot(
    model, 
    problems, 
    batch_size=4,  # Process this many problems in parallel
    temperature=0.6, 
    max_new_tokens=1500, 
    top_p=0.92, 
    save_every=5,
    save_path=None
):
    """
    Generate CoT solutions for multiple problems in parallel batches.
    
    Args:
        model: The HookedTransformer model
        problems: List of problem dictionaries
        batch_size: Number of problems to process in parallel
        temperature: The temperature for the model
        max_new_tokens: The maximum number of tokens to generate
        top_p: The top-p value for the model
        save_every: How often to save intermediate results
        save_path: Optional path to save results
    
    Returns:
        List of dictionaries containing problems and their CoT solutions
    """
    results = []
    
    # Process problems in batches
    for i in tqdm(range(0, len(problems), batch_size), desc="Processing batches"):
        batch_problems = problems[i:i+batch_size]

        # Prepare prompts for the batch
        prompts = [
            f"""Solve this math problem step by step. Put your final answer in \\boxed{{}}. Problem: {problem['problem']} Solution: \n<think>\n"""
            for problem in batch_problems
        ]
        
        # Generate solutions for the batch in parallel
        try:
            batch_solutions = model.generate(
                prompts,
                temperature=temperature,
                max_new_tokens=max_new_tokens,
                top_p=top_p,
            )
        except Exception as e:
            print(f"Error generating solutions for batch {i}: {e}")
            continue
        
        # Process and store results
        for j, (problem, solution) in enumerate(zip(batch_problems, batch_solutions if batch_size > 1 else [batch_solutions])):
            results.append({
                "problem_id": i + j,
                "problem_text": problem['problem'],
                "problem_type": problem['type'],
                "problem_level": problem['level'],
                "ground_truth_solution": problem['solution'],
                "generated_cot": solution
            })
        
        # Save intermediate results
        if (i // batch_size) % save_every == 0 and save_path:
            print(f"Saving results to {save_path}...")
            import json
            with open(save_path, 'w') as f:
                json.dump(results, f, indent=2)
    
    # Save final results
    if save_path:
        import json
        with open(save_path, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to {save_path}")
    
    return results

In [None]:
# Sample a small set of problems for testing
random.seed(42)
np.random.seed(42)
test_problems = sample_math_problems(math_dataset, n=1000)

temperature = 0.6
max_new_tokens = 1500
top_p = 0.92
batch_size = 1

# Generate CoT solutions for the test problems
cot_results = batch_generate_cot(
    model, 
    test_problems, 
    batch_size=batch_size,
    temperature=temperature, 
    max_new_tokens=max_new_tokens, 
    top_p=top_p, 
    save_path=f"math_cot_results_t={temperature}_mnt={max_new_tokens}_tp={top_p}.json",
    save_every=1
)