In [1]:
%env CUDA_VISIBLE_DEVICES=0
%env OMP_NUM_THREADS=16

import torch
import transformers

from async_reasoning.solver import AsyncReasoningSolver as Solver
from evals.tts_evaluator import TTSEvaluator
from utils.answer_processing import find_last_valid_expression, check_equality_judge, check_equality_local_model

MODEL_NAME = "Qwen/Qwen3-32B"  # for 48GB gpus, use "Qwen/Qwen3-32B-AWQ" instead
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
model = transformers.AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype='auto', low_cpu_mem_usage=True, device_map=device)

system_tokens = [key for key in tokenizer.vocab.keys() if key.endswith("SYSTEM") or key.endswith("SYSTEM:")]
writer_forbidden_token_ix = [tokenizer.vocab[x] for x in ["</think>", "<|im_start|>", "<|endoftext|>"] + system_tokens]
thinker_forbidden_token_ix = [tokenizer.vocab[x] for x in ["</think>", "<|im_start|>", "<|im_end|>", "<|endoftext|>"] + system_tokens]


env: CUDA_VISIBLE_DEVICES=0
env: OMP_NUM_THREADS=16


  from .autonotebook import tqdm as notebook_tqdm
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
Loading checkpoint shards: 100%|██████████| 17/17 [01:29<00:00,  5.28s/it]


In [2]:
problem = """Calculate x - x^2 + x^3 for x = 5,6,7,8. Return all 4 answers in \\boxed{ }."""
answer = "105, 186, 201, 456"
solver = Solver(model, 
                tokenizer, 
                writer_forbidden_token_ix=writer_forbidden_token_ix, 
                thinker_forbidden_token_ix=thinker_forbidden_token_ix,
                use_fast_kernel=True)  # Note: set to True if you skipped compiling the kernels otherwise set to False!
                                       # Note: also you cannot use local judge to evaluate results while fast kernels enabled!
writer_output_str, thinker_output_str, token_times, eos_generated = solver.solve(problem, budget=1024, display_generation_in_real_time=True)

# State.thinker_and_writer

## Thinker mode


<think>


Okay, let's see. I need to calculate the expression x - x² + x³ for x values 5, 6, 7, and 8. Hmm, let me start by understanding the expression. The expression is a cubic polynomial: $ x^3 - x^2 + x $. Wait, actually, the order is x minus x squared plus x cubed. So rearranged, it's $ x^3 - x^2 + x $. That might make it easier to compute.

First, I'll compute each term for x=5. Let's do that. For x=5:
- $ x^3 = 5^3 = 125 $
- $ x^2 = 5^2 = 25 $
- $ x = 5 $
So, $ 125 - 25 + 5 = 105 $. That's the result for x=5.

Next, x=6:
- $ 6^3 = 216 $
- $ 6^2 = 36 $
- $ 6 = 6 $
So, $ 216 - 36 + 6 = 186 $.

Now x=7:
- $ 7^3 = 343 $
- $ 7^2 = 49 $
- $ 7 = 7 $
Result: $ 343 - 49 + 7 = 301 $.

Finally, x=8:
- $ 8^3 = 512 $
- $ 8^2 = 64 $
- $ 8 = 8 $
So, $ 512 - 64 + 8 = 456 $.

Let me double-check each calculation to make sure I didn't make any arithmetic errors. For x=5: 125-25=100, 100+5=105. Correct. For x=6: 216-36=180, 180+6=186. Correct. For x=7: 343-49=294, 294+7=301. Correct. For x=8: 512-64=448, 448+8=456. Correct.

All calculations seem accurate. The final

## Writer mode




I will calculate the expression $ x - x^2 + x^3 $ for each value of $ x $ (5, 6, 7, 8) step by step.

---

**For $ x = 5 $:**
- $ x^3 = 5^3 = 125 $
- $ x^2 = 5^2 = 25 $
- $ x = 5 $
- Result: $ 125 - 25 + 5 = 105 $

**For $ x = 6 $:**
- $ x^3 = 6^3 = 216 $
- $ x^2 = 6^2 = 36 $
- $ x = 6 $
- Result: $ 216 - 36 + 6 = 186 $

**For $ x = 7 $:**
- $ x^3 = 7^3 = 343 $
- $ x^2 = 7^2 = 49 $
- $ x = 7 $
- Result: $ 343 - 49 + 7 = 301 $

**For $ x = 8 $:**
- $ x^3 = 8^3 = 512 $
- $ x^2 = 8^2 = 64 $
- $ x = 8 $
- Result: $ 512 - 64 + 8 = 456 $

---

Final answers:  
$$
\boxed{105, 186, 301, 456}
$$<|im_end|>

In [3]:
response = find_last_valid_expression(writer_output_str, extract_result=lambda x: x[7:-1])
use_api_not_local = True # set to True to use the canonical openai judge
                         # do not forget to populate utils/api_config.json before using api judge
                         # reminder: you cannot use local judge with fast kernels
if use_api_not_local:
    is_equal = check_equality_judge(response, answer)
else:
    is_equal = check_equality_local_model(model, tokenizer, response, answer)
print(f"Answer is correct: {is_equal}")

Answer is correct: False


In [4]:
from IPython.display import display, Audio
evaluator = TTSEvaluator()
chunks, audio = evaluator.get_chunks_with_tts(token_times, k_chunks=5, return_audio=True)
metrics = evaluator(**chunks, add_tts_in_parrallel=True, return_delays=False)

indent = 2
for k, v in metrics.items():
    pad = ">" * indent
    if isinstance(v, dict):
        print(f"{pad} {k}:")
        pretty_dict(v, indent + 4)
    else:
        print(f"{pad} {k}: {v}")
display(Audio(data=audio['frame'], rate=audio['frame_rate']))

GPT2InferenceModel has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.




  WeightNorm.apply(module, name, dim)


------------------------------------------------------
Free memory : 15.430420 (GigaBytes)  
Total memory: 79.138428 (GigaBytes)  
Requested memory: 0.167969 (GigaBytes) 
Setting maximum total tokens (input + output) to 1024 
WorkSpace: 0x7f63fa000000 
------------------------------------------------------
>> delay_to_first: 5.268616685643792
>> total_delay: 5.268616685643792
>> total_delay_mius1: 4.268616685643792
>> duration_no_delay: 122.653625
>> duration_with_delay: 127.9222416856438
>> steps_to_first: 40
>> delay_steps: 163
>> delay_minus10steps: 90
