In [1]:
import os
os.environ["PIP_CACHE_DIR"] = "/workspace/.cache/pip"
os.environ["HF_HOME"] = "/workspace/.cache/huggingface"

In [2]:
!pip install --no-cache-dir -U "transformers>=4.51.0" accelerate datasets torch pandas tqdm nnsight

Collecting transformers>=4.51.0
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting accelerate
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets
  Downloading datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting torch
  Downloading torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting nnsight
  Downloading nnsight-0.5.13-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (15 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers>=4.51.0)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers>=4.51.0)
  Downloading regex-2025.11.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_

In [29]:
!pip install tqdm

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
!pip install --no-cache-dir typing-extensions --upgrade

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [4]:
!pip uninstall -y torchvision

Found existing installation: torchvision 0.19.1+cu124
Uninstalling torchvision-0.19.1+cu124:
  Successfully uninstalled torchvision-0.19.1+cu124
[0m

In [1]:
import typing_extensions, torch
# print("typing_extensions:", typing_extensions.__version__)
print("torch:", torch.__version__)


torch: 2.9.1+cu128


In [38]:
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import json

In [3]:
ds = load_dataset("deepmind/aqua_rat", "raw")
train = ds["train"]

def format_aqua_example(ex):
    q = ex["question"].strip()
    opts = "\n".join(ex["options"])
    gold = ex["correct"].strip()
    return q, opts, gold

def make_user(q, opts):
    return f"Question:\n{q}\n\nOptions:\n{opts}\n"

README.md: 0.00B [00:00, ?B/s]

raw/train-00000-of-00001.parquet:   0%|          | 0.00/25.4M [00:00<?, ?B/s]

raw/test-00000-of-00001.parquet:   0%|          | 0.00/74.0k [00:00<?, ?B/s]

raw/validation-00000-of-00001.parquet:   0%|          | 0.00/76.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/97467 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/254 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/254 [00:00<?, ? examples/s]

In [4]:
q, opts, correct = format_aqua_example(train[0])

In [5]:
SYSTEM = (
  "Solve the multiple-choice question. "
  "Think step by step. "
  "At the end, output exactly one line: Answer: X "
  "where X is one of A, B, C, D, E."
)

In [25]:
def build_chat_input(tokenizer, q, opts, enable_thinking=True):
    messages = [
        {"role": "system", "content": SYSTEM},
        {"role": "user", "content": make_user(q, opts)},
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=enable_thinking,  # Qwen3 supports this :contentReference[oaicite:4]{index=4}
    )
    return text


In [35]:
MODEL = "Qwen/Qwen3-0.6B"  # later swap to 1.7B
tok = AutoTokenizer.from_pretrained(MODEL)
tok.padding_side = "left"
if tok.pad_token_id is None:
    tok.pad_token = tok.eos_token
model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype="auto", device_map="auto")
model.eval()

@torch.inference_mode()
def generate_one(q, opts, max_new_tokens=1024):
    prompt = build_chat_input(tok, q, opts, enable_thinking=True)
    inputs = tok([prompt], return_tensors="pt").to(model.device)
    out = model.generate(
        **inputs,
        do_sample=True,
        temperature=0.6,
        top_p=0.95,
        top_k=20,
        max_new_tokens=max_new_tokens,
        eos_token_id=tok.eos_token_id,
        pad_token_id=tok.pad_token_id
    )
    full = tok.decode(out[0], skip_special_tokens=False)
    # Split prompt vs generated assistant text:
    gen_ids = out[0][inputs["input_ids"].shape[1]:]
    gen_text = tok.decode(gen_ids, skip_special_tokens=False)
    return prompt, gen_text

@torch.inference_mode()
def generate_batch(q_list, opts_list, *, max_new_tokens=512, enable_thinking=True,
                   temperature=0.6, top_p=0.95, top_k=20, seed=None):
    prompts = [build_chat_input(tok, q, o, enable_thinking=enable_thinking)
               for q, o in zip(q_list, opts_list)]

    inputs = tok(
        prompts,
        return_tensors="pt",
        padding=True,
        truncation=False,
    ).to(model.device)

    # Deterministic sampling (works on older transformers)
    if seed is not None:
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)

    out = model.generate(
        **inputs,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        max_new_tokens=max_new_tokens,
        eos_token_id=tok.eos_token_id,
        pad_token_id=tok.pad_token_id,
    )

    input_len = inputs["input_ids"].shape[1]
    gen_ids = out[:, input_len:]
    gen_texts = tok.batch_decode(gen_ids, skip_special_tokens=False)
    return prompts, gen_texts


In [40]:
def run_and_save_jsonl(
    train_split,
    out_path="aqua_rollouts.jsonl",
    n_problems=100,
    n_rollouts=10,
    batch_size=16,
    max_new_tokens=2048,
    base_seed=42,
):
    # Pick first N; replace with random sampling if you want
    examples = [train_split[i] for i in range(n_problems)]
    parsed = [format_aqua_example(ex) for ex in examples]  # (q, opts, gold)

    with open(out_path, "w") as f:
        for r in range(n_rollouts):
            rollout_seed = base_seed + r

            for start in tqdm(range(0, n_problems, batch_size),
                              desc=f"rollout {r+1}/{n_rollouts}"):
                batch = parsed[start:start+batch_size]
                q_list  = [x[0] for x in batch]
                o_list  = [x[1] for x in batch]
                golds   = [x[2] for x in batch]

                prompts, gen_texts = generate_batch(
                    q_list, o_list,
                    max_new_tokens=max_new_tokens,
                    enable_thinking=True,
                    seed=rollout_seed,
                )

                for i, (q, opts, gold, prompt, gen) in enumerate(
                    zip(q_list, o_list, golds, prompts, gen_texts)
                ):
                    record = {
                        "problem_idx": start + i,
                        "rollout_idx": r,
                        "question": q,
                        "options": opts,
                        "gold": gold,
                        "prompt": prompt,
                        "gen_text": gen,
                        "gen_cfg": {
                            "max_new_tokens": max_new_tokens,
                            "temperature": 0.6,
                            "top_p": 0.95,
                            "top_k": 20,
                            "seed": rollout_seed,
                        }
                    }
                    f.write(json.dumps(record) + "\n")

    print(f"Wrote: {out_path}")


In [41]:
run_and_save_jsonl(train)

rollout 1/10: 100%|██████████| 7/7 [09:44<00:00, 83.47s/it]
rollout 2/10: 100%|██████████| 7/7 [09:43<00:00, 83.41s/it]
rollout 3/10: 100%|██████████| 7/7 [09:44<00:00, 83.48s/it]
rollout 4/10: 100%|██████████| 7/7 [09:42<00:00, 83.28s/it]
rollout 5/10: 100%|██████████| 7/7 [09:43<00:00, 83.33s/it]
rollout 6/10: 100%|██████████| 7/7 [09:43<00:00, 83.40s/it]
rollout 7/10: 100%|██████████| 7/7 [09:44<00:00, 83.45s/it]
rollout 8/10: 100%|██████████| 7/7 [09:43<00:00, 83.41s/it]
rollout 9/10: 100%|██████████| 7/7 [09:42<00:00, 83.25s/it]
rollout 10/10: 100%|██████████| 7/7 [09:42<00:00, 83.23s/it]

Wrote: aqua_rollouts.jsonl





In [22]:
print(generate_one(q, opts, max_new_tokens=2000))

("<|im_start|>system\nSolve the multiple-choice question. Think step by step. At the end, output exactly one line: Answer: X where X is one of A, B, C, D, E.<|im_end|>\n<|im_start|>user\nQuestion:\nTwo friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?\n\nOptions:\nA)21\nB)21.5\nC)22\nD)22.5\nE)23\n<|im_end|>\n<|im_start|>assistant\n", "<think>\nOkay, let's see. So there are two friends walking along a 43-km trail, starting from opposite ends at the same time. Friend P is faster than Friend Q by 15%. The question is asking how many kilometers Friend P has walked when they meet. The options are A to E.\n\nFirst, I need to figure out their speeds. Let me denote Friend Q's speed as a certain value. Since Friend P is 15% faster, his speed would be 1.15 times Q's speed. Let me write that down: P's speed = 1.15 * Q's speed.

In [19]:
tok.special_tokens_map

{'eos_token': '<|im_end|>',
 'pad_token': '<|endoftext|>',
 'additional_special_tokens': ['<|im_start|>',
  '<|im_end|>',
  '<|object_ref_start|>',
  '<|object_ref_end|>',
  '<|box_start|>',
  '<|box_end|>',
  '<|quad_start|>',
  '<|quad_end|>',
  '<|vision_start|>',
  '<|vision_end|>',
  '<|vision_pad|>',
  '<|image_pad|>',
  '<|video_pad|>']}

In [21]:
eos_ids

[151645, 151645]