In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import art
import asyncio
from dotenv import load_dotenv
import json
import random
import re
from typing import TypedDict

from art.local import LocalBackend

load_dotenv()


class TemporalCluePuzzle(TypedDict):
    num_clues: int
    prompt: str
    solution: dict[str, str]


puzzles_path = "../data/temporal-clue/puzzles.json"
puzzles: list[TemporalCluePuzzle] = json.loads(open(puzzles_path).read())
val_puzzles = puzzles[:64]
test_puzzles = puzzles[64:128]
train_puzzles = puzzles[128:]
random.seed(42)
random.shuffle(train_puzzles)


async def rollout(model: art.Model, puzzle: TemporalCluePuzzle) -> art.Trajectory:
    messages: art.Messages = [{"role": "user", "content": puzzle["prompt"]}]
    client = model.openai_client()
    chat_completion = await client.chat.completions.create(
        messages=messages, model=model.name
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    num_correct = 0
    for key, value in puzzle["solution"].items():
        if matches := re.findall(rf"{key}\. ([A-Za-z \.:-]+)", content):
            match = matches[-1]
            if match.strip().lower() == value.lower():
                num_correct += 1
    reward = acc = num_correct / len(puzzle["solution"])
    return art.Trajectory(
        messages_and_choices=[*messages, choice], reward=reward, metrics={"acc": acc}
    )


model = art.TrainableModel(
    name="002",
    project="temporal-clue",
    base_model="willcb/Qwen3-14B",
    _internal_config={
        "engine_args": {
            "tensor_parallel_size": 2,
            "gpu_memory_utilization": 0.7,
            "max_num_seqs": 512,
        },
        "torchtune_args": {
            "model": "qwen3_14b_instruct",
            "model_type": "QWEN3",
            "async_weight_syncing": True,
        },
    },
)
backend = LocalBackend()
await model.register(backend)

stride = 4
for i in range(await model.get_step(), 1_000):
    val_groups, train_groups = await asyncio.gather(
        art.gather_trajectory_groups(
            (
                art.TrajectoryGroup(rollout(model, puzzle) for _ in range(1))
                for puzzle in val_puzzles
            ),
            pbar_desc="val",
            pbar_total_completion_tokens=False,
        ),
        art.gather_trajectory_groups(
            (
                art.TrajectoryGroup(rollout(model, puzzle) for _ in range(16))
                for puzzle in train_puzzles[i * stride : (i + 1) * stride]
            ),
            pbar_desc="train",
            pbar_total_completion_tokens=False,
        ),
    )
    await model.log(val_groups)
    await model.delete_checkpoints()
    await model.train(
        train_groups,
        config=art.TrainConfig(learning_rate=5e-6),
    )

[34m[1mwandb[0m: Currently logged in as: [33mbradhilton[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO 07-10 21:49:15 [__init__.py:244] Automatically detected platform cuda.
INFO 07-10 21:49:21 [__init__.py:244] Automatically detected platform cuda.
/home/ubuntu/.cache/huggingface/hub/models--willcb--Qwen3-14B/snapshots/ad504088bc654f8e9e4f0af2461743db6877fa32
INFO 07-10 21:49:30 [config.py:823] This model supports multiple tasks: {'embed', 'classify', 'generate', 'score', 'reward'}. Defaulting to 'generate'.
INFO 07-10 21:49:31 [config.py:1946] Defaulting to use mp for distributed inference
INFO 07-10 21:49:31 [config.py:2195] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 07-10 21:49:34 [__init__.py:244] Automatically detected platform cuda.
INFO 07-10 21:49:36 [core.py:455] Waiting for init message from front-end.
INFO 07-10 21:49:36 [core.py:70] Initializing a V1 LLM engine (v0.9.1) with config: model='willcb/Qwen3-14B', speculative_config=None, tokenizer='willcb/Qwen3-14B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config

Loading safetensors checkpoint shards:   0% Completed | 0/6 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  17% Completed | 1/6 [00:00<00:02,  1.68it/s]
Loading safetensors checkpoint shards:  33% Completed | 2/6 [00:01<00:02,  1.42it/s]
Loading safetensors checkpoint shards:  50% Completed | 3/6 [00:02<00:02,  1.38it/s]
Loading safetensors checkpoint shards:  67% Completed | 4/6 [00:02<00:01,  1.32it/s]
Loading safetensors checkpoint shards:  83% Completed | 5/6 [00:03<00:00,  1.38it/s]
Loading safetensors checkpoint shards: 100% Completed | 6/6 [00:04<00:00,  1.37it/s]
Loading safetensors checkpoint shards: 100% Completed | 6/6 [00:04<00:00,  1.38it/s]
[1;36m(VllmWorker rank=0 pid=71557)[0;0m 


[1;36m(VllmWorker rank=0 pid=71557)[0;0m INFO 07-10 21:49:48 [default_loader.py:272] Loading weights took 4.38 seconds
[1;36m(VllmWorker rank=1 pid=71560)[0;0m INFO 07-10 21:49:48 [default_loader.py:272] Loading weights took 4.34 seconds
[1;36m(VllmWorker rank=0 pid=71557)[0;0m INFO 07-10 21:49:49 [gpu_model_runner.py:1624] Model loading took 13.8818 GiB and 4.778461 seconds
[1;36m(VllmWorker rank=1 pid=71560)[0;0m INFO 07-10 21:49:49 [gpu_model_runner.py:1624] Model loading took 13.8818 GiB and 4.848332 seconds
[1;36m(VllmWorker rank=0 pid=71557)[0;0m INFO 07-10 21:49:57 [backends.py:462] Using cache directory: /home/ubuntu/.cache/vllm/torch_compile_cache/b081e4db96/rank_0_0 for vLLM's torch.compile
[1;36m(VllmWorker rank=0 pid=71557)[0;0m INFO 07-10 21:49:57 [backends.py:472] Dynamo bytecode transform time: 8.39 s
[1;36m(VllmWorker rank=1 pid=71560)[0;0m INFO 07-10 21:49:57 [backends.py:462] Using cache directory: /home/ubuntu/.cache/vllm/torch_compile_cache/b081e4db96/

val:   0%|          | 0/64 [00:00<?, ?it/s]

train:   0%|          | 0/64 [00:00<?, ?it/s]

Packed 64 trajectories into 64 sequences of length 26624


train:   0%|          | 0/32 [00:00<?, ?it/s]

RuntimeError: Train process exited early. See /home/ubuntu/sky_workdir/.art/temporal-clue/models/002/logs/train.log for details.