In [None]:
import os
os.environ["PIP_CACHE_DIR"] = "/workspace/.cache/pip"
os.environ["HF_HOME"] = "/workspace/.cache/huggingface"

import sys
sys.path.append('.')
from src import (
    load_model_and_tokenizer, load_gsm8k_dataset,
    build_chat_input, generate_batch, save_results_jsonl,
    MODEL_NAME
)
from tqdm import tqdm
import json

In [2]:
!pip install --no-cache-dir -U "transformers>=4.51.0" accelerate datasets torch pandas tqdm nnsight

Collecting transformers>=4.51.0
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting accelerate
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets
  Downloading datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting torch
  Downloading torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting nnsight
  Downloading nnsight-0.5.13-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (15 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers>=4.51.0)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers>=4.51.0)
  Downloading regex-2025.11.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_

In [29]:
!pip install tqdm

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
!pip install --no-cache-dir typing-extensions --upgrade

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [4]:
!pip uninstall -y torchvision

Found existing installation: torchvision 0.19.1+cu124
Uninstalling torchvision-0.19.1+cu124:
  Successfully uninstalled torchvision-0.19.1+cu124
[0m

In [1]:
import typing_extensions, torch
# print("typing_extensions:", typing_extensions.__version__)
print("torch:", torch.__version__)


torch: 2.9.1+cu128


In [None]:
train_problems = load_gsm8k_dataset(split="train", n_problems=500)


In [None]:
model, tok, config = load_model_and_tokenizer(MODEL_NAME)

In [None]:
def run_and_save_jsonl(
    problems,
    out_path="data/rollouts/gsm8k_rollouts.jsonl",
    n_problems=500,
    n_rollouts=10,
    batch_size=16,
    max_new_tokens=800,
    base_seed=42,
):
    """Generate rollouts for GSM8K problems."""
    problems = problems[:n_problems]
    records = []

    for r in range(n_rollouts):
        rollout_seed = base_seed + r

        for start in tqdm(range(0, n_problems, batch_size),
                          desc=f"rollout {r+1}/{n_rollouts}"):
            batch = problems[start:start+batch_size]
            q_list = [(p["question"], "baseline", None) for p in batch]

            prompts, gen_texts, _ = generate_batch(
                tok, model, q_list,
                max_new_tokens=max_new_tokens,
                enable_thinking=True,
                seed=rollout_seed,
            )

            for i, (p, prompt, gen) in enumerate(zip(batch, prompts, gen_texts)):
                records.append({
                    "problem_idx": p["idx"],
                    "rollout_idx": r,
                    "question": p["question"],
                    "answer": p["answer"],
                    "prompt": prompt,
                    "gen_text": gen,
                    "gen_cfg": {
                        "max_new_tokens": max_new_tokens,
                        "temperature": 0.6,
                        "top_p": 0.95,
                        "top_k": 20,
                        "seed": rollout_seed,
                    }
                })

    save_results_jsonl(records, out_path)


In [None]:
run_and_save_jsonl(
    train_problems,
    out_path="gsm8k_rollouts.jsonl",
    n_problems=500,
    n_rollouts=10,
    batch_size=16,
    max_new_tokens=800,
    base_seed=42,
)


rollout 1/10: 100%|██████████| 7/7 [09:44<00:00, 83.47s/it]
rollout 2/10: 100%|██████████| 7/7 [09:43<00:00, 83.41s/it]
rollout 3/10: 100%|██████████| 7/7 [09:44<00:00, 83.48s/it]
rollout 4/10: 100%|██████████| 7/7 [09:42<00:00, 83.28s/it]
rollout 5/10: 100%|██████████| 7/7 [09:43<00:00, 83.33s/it]
rollout 6/10: 100%|██████████| 7/7 [09:43<00:00, 83.40s/it]
rollout 7/10: 100%|██████████| 7/7 [09:44<00:00, 83.45s/it]
rollout 8/10: 100%|██████████| 7/7 [09:43<00:00, 83.41s/it]
rollout 9/10: 100%|██████████| 7/7 [09:42<00:00, 83.25s/it]
rollout 10/10: 100%|██████████| 7/7 [09:42<00:00, 83.23s/it]

Wrote: aqua_rollouts.jsonl





In [None]:
from src import generate_one
prompt, gen_text, _ = generate_one(tok, model, train_problems[2]["question"], "baseline", None, max_new_tokens=2000)
print((prompt, gen_text))

('<|im_start|>system\nSolve the math question. Think step by step. At the end, output exactly one line: Answer: X where X is the numerical answer (an integer).<|im_end|>\n<|im_start|>user\nQuestion:\n{\'question\': \'Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?\', \'answer\': "In the beginning, Betty has only 100 / 2 = $<<100/2=50>>50.\\nBetty\'s grandparents gave her 15 * 2 = $<<15*2=30>>30.\\nThis means, Betty needs 100 - 50 - 30 - 15 = $<<100-50-30-15=5>>5 more.\\n#### 5"}\n<|im_end|>\n<|im_start|>assistant\n', "<think>\nOkay, let's see. The problem is about Betty saving money for a wallet that costs $100. She has half of the money she needs. Her parents give her $15, and her grandparents give her twice as much as her parents. The question is how much more money she need