# 10. End-to-End Research Agent Lab

This lab links inference, tracing, and dataset extraction.

Default mode is reproducible and does not require live API inference.
Set `RUN_REAL_INFERENCE=True` when you want a real run.


In [None]:
from __future__ import annotations

import json
import os
import math
import random
import statistics
from pathlib import Path


def find_project_root(start: Path) -> Path:
    for candidate in [start, *start.parents]:
        if (candidate / 'README.md').exists() and (candidate / 'main_langgraph.py').exists():
            return candidate
    return start


PROJECT_ROOT = find_project_root(Path.cwd().resolve())
os.chdir(PROJECT_ROOT)
print('PROJECT_ROOT =', PROJECT_ROOT)


In [None]:
import subprocess
import sys

TOPIC = 'AI agent planning strategy market snapshot 2026'
RUN_REAL_INFERENCE = False
LAB_DIR = PROJECT_ROOT / 'test_outputs' / 'series_e2e_lab'
TRACE_DIR = LAB_DIR / 'trajectories'
REPORT_PATH = LAB_DIR / 'report.md'

LAB_DIR.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
if RUN_REAL_INFERENCE:
    cmd = [
        sys.executable,
        'main_langgraph.py',
        TOPIC,
        '--provider', 'openai',
        '--model', os.getenv('OPENAI_MODEL', 'gpt-4.1-mini'),
        '--output', str(REPORT_PATH),
        '--trace-dir', str(TRACE_DIR),
    ]
    env = os.environ.copy()
    env['SERPER_API_KEY'] = ''
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, cwd=str(PROJECT_ROOT), env=env, text=True, check=True)
else:
    # Reproducible synthetic mode
    synthetic_jsonl = TRACE_DIR / 'run_mock.jsonl'
    synthetic_summary = TRACE_DIR / 'run_mock.summary.json'
    rows = [
        {'ts_utc': '2026-01-01T00:00:00Z', 'run_id': 'run_mock', 'idx': 1, 'event_type': 'run_started', 'payload': {}},
        {'ts_utc': '2026-01-01T00:00:01Z', 'run_id': 'run_mock', 'idx': 2, 'event_type': 'tool_call', 'payload': {'tool': 'web_search', 'kwargs': {'query': TOPIC}}},
        {'ts_utc': '2026-01-01T00:00:02Z', 'run_id': 'run_mock', 'idx': 3, 'event_type': 'tool_result', 'payload': {'tool': 'web_search', 'ok': True, 'latency_ms': 100, 'result_preview': 'sample'}},
        {'ts_utc': '2026-01-01T00:00:03Z', 'run_id': 'run_mock', 'idx': 4, 'event_type': 'run_completed', 'payload': {'status': 'success', 'error': None}},
        {'ts_utc': '2026-01-01T00:00:04Z', 'run_id': 'run_mock', 'idx': 5, 'event_type': 'final_report', 'payload': {'report_len': 100, 'report_sha256': 'mock'}},
    ]
    with synthetic_jsonl.open('w', encoding='utf-8') as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=True) + '\n')
    synthetic_summary.write_text(json.dumps({
        'run_id': 'run_mock',
        'status': 'success',
        'event_count': 5,
        'tool_call_count': 1,
        'tool_error_count': 0,
    }, indent=2))
    REPORT_PATH.write_text('# Synthetic report\n\nThis run used synthetic mode.')
    print('Synthetic mode artifacts created.')


In [None]:
summary_files = sorted(TRACE_DIR.glob('run_*.summary.json'))
jsonl_files = sorted(TRACE_DIR.glob('run_*.jsonl'))
assert summary_files and jsonl_files

summary = json.loads(summary_files[-1].read_text())
rows = [json.loads(line) for line in jsonl_files[-1].read_text().splitlines() if line.strip()]

tool_calls = [r for r in rows if r.get('event_type') == 'tool_call']
tool_results = [r for r in rows if r.get('event_type') == 'tool_result']

samples = []
for i in range(min(len(tool_calls), len(tool_results))):
    samples.append({
        'tool': tool_calls[i]['payload'].get('tool'),
        'ok': tool_results[i]['payload'].get('ok'),
    })

print('summary_status=', summary.get('status'))
print('samples=', samples)
assert summary.get('status') == 'success'
assert len(samples) >= 1


## Production switch

When you are ready to run real inference:
1. Set `RUN_REAL_INFERENCE=True`.
2. Ensure `OPENAI_API_KEY` is available in environment.
3. Keep `SERPER_API_KEY` empty if you want DuckDuckGo/Wikipedia mode.
