# 09. Trajectory to Training Dataset

Convert trajectory logs into training-ready samples.


In [None]:
from __future__ import annotations

import json
import os
import math
import random
import statistics
from pathlib import Path


def find_project_root(start: Path) -> Path:
    for candidate in [start, *start.parents]:
        if (candidate / 'README.md').exists() and (candidate / 'main_langgraph.py').exists():
            return candidate
    return start


PROJECT_ROOT = find_project_root(Path.cwd().resolve())
os.chdir(PROJECT_ROOT)
print('PROJECT_ROOT =', PROJECT_ROOT)


In [None]:
trace_candidates = sorted((PROJECT_ROOT / 'test_outputs').glob('**/run_*.jsonl'))

if trace_candidates:
    jsonl_path = trace_candidates[-1]
    rows = [json.loads(line) for line in jsonl_path.read_text().splitlines() if line.strip()]
    print('Using trace:', jsonl_path)
else:
    rows = [
        {'run_id': 'mock', 'idx': 1, 'event_type': 'tool_call', 'payload': {'tool': 'web_search', 'kwargs': {'query': 'AI planning'}}},
        {'run_id': 'mock', 'idx': 2, 'event_type': 'tool_result', 'payload': {'tool': 'web_search', 'ok': True, 'latency_ms': 100, 'result_preview': 'sample'}},
    ]
    print('Using synthetic rows')


In [None]:
tool_calls = [r for r in rows if r.get('event_type') == 'tool_call']
tool_results = [r for r in rows if r.get('event_type') == 'tool_result']
paired = min(len(tool_calls), len(tool_results))

sft_samples = []
for i in range(paired):
    c = tool_calls[i]
    r = tool_results[i]
    sft_samples.append({
        'prompt': f"Tool={c['payload'].get('tool')} kwargs={c['payload'].get('kwargs')}",
        'completion': f"ok={r['payload'].get('ok')} preview={r['payload'].get('result_preview')}",
    })

print('sft_samples=', len(sft_samples))
assert len(sft_samples) >= 1


In [None]:
out_dir = PROJECT_ROOT / 'test_outputs' / 'series_datasets'
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / 'sft_samples.jsonl'

with out_path.open('w', encoding='utf-8') as f:
    for item in sft_samples:
        f.write(json.dumps(item, ensure_ascii=True) + '\n')

print('saved:', out_path)
