# Council of Elders — Backend Testing Notebook

This notebook imports and runs the test cases from `test_cases.py`.

**Test groups:**
- `adapters` — Adapter factory, model registry (offline)
- `adapters_online` — Key validation, generation, streaming (requires API keys)
- `consensus` — Agreement markers, stagnation detection, composite scoring (offline)
- `session` — Session create/retrieve/purge (offline)
- `cost` — Cost estimation utilities (offline)
- `orchestrator` — Mini debate end-to-end (requires API keys)

## Setup

```bash
cd backend && pip install -r requirements.txt
```

**Paste your API keys below.** They will only exist in this notebook's runtime memory.

In [None]:
import sys
sys.path.insert(0, '..')

# ============================================================
# PASTE YOUR API KEYS HERE (they stay in this notebook's memory only)
# ============================================================
API_KEYS = {
    "anthropic": "",   # e.g., "sk-ant-..."
    "openai": "",      # e.g., "sk-..."
    "google": "",      # e.g., "AIza..."
    "xai": "",         # e.g., "xai-..."
}

available = {k: v for k, v in API_KEYS.items() if v}
print(f"Available providers: {list(available.keys()) if available else 'None — paste keys above!'}")

In [None]:
from test_cases import run_all_tests, run_test_group

---
## 1. Run ALL Tests

Runs every offline test, plus online tests for providers where you've supplied a key.

In [None]:
suite = await run_all_tests(api_keys=API_KEYS)
print(suite.summary())

---
## 2. Run Tests by Group

Use these cells to run specific test groups individually.

### 2.1 Adapter Tests (offline)

In [None]:
suite = await run_test_group("adapters")
print(suite.summary())

### 2.2 Adapter Tests (online — requires API keys)

In [None]:
suite = await run_test_group("adapters_online", api_keys=API_KEYS)
print(suite.summary())

### 2.3 Consensus Detection Tests (offline)

In [None]:
suite = await run_test_group("consensus")
print(suite.summary())

### 2.4 Session Management Tests (offline)

In [None]:
suite = await run_test_group("session")
print(suite.summary())

### 2.5 Cost Estimation Tests (offline)

In [None]:
suite = await run_test_group("cost")
print(suite.summary())

### 2.6 Orchestrator / Mini Debate Test (online — requires API keys)

In [None]:
suite = await run_test_group("orchestrator", api_keys=API_KEYS)
print(suite.summary())

---
## 3. Interactive Exploration

The cells below let you interactively test individual components in more detail.

### 3.1 Run a Full Debate with Streaming Output

Watch a live debate between LLMs with streaming text.

In [None]:
from app.models.debate import DebateConfig, DebateSession, Participant, Provider
from app.orchestrator.engine import DebateOrchestrator
from test_cases import TEST_MODELS, PROVIDER_TO_ENUM

# Build participants from available keys
participants = []
keys_for_debate = {}

for provider, key in available.items():
    keys_for_debate[provider] = key
    model = TEST_MODELS[provider]
    participants.append(Participant(
        provider=PROVIDER_TO_ENUM[provider],
        model=model,
        display_name=f"{provider.title()} ({model})",
    ))

# Need at least 2 participants
if len(participants) < 2 and participants:
    dup = participants[0].model_copy()
    dup.display_name = f"{dup.display_name} (2nd)"
    participants.append(dup)

if len(participants) >= 2:
    config = DebateConfig(
        topic="Is direct democracy or representative democracy better for modern society?",
        participants=participants,
        max_rounds=2,
        max_tokens_per_turn=300,
        temperature=0.7,
        consensus_threshold=0.85,
    )
    session = DebateSession(config=config, api_keys=keys_for_debate)
    orchestrator = DebateOrchestrator(session)

    print(f"Debate: {config.topic}")
    print(f"Participants: {[p.display_name for p in participants]}")
    print("=" * 60)

    async for event in orchestrator.run():
        if event.event_type == "debate:turn_start":
            print(f"\n--- {event.data['speaker']} (Round {event.data['round']}) ---")
        elif event.event_type == "debate:token_stream":
            print(event.data["token"], end="", flush=True)
        elif event.event_type == "debate:turn_end":
            print(f"\n  [~{event.data['token_count']} tokens]")
        elif event.event_type == "debate:consensus_check":
            print(f"\n*** Consensus: {event.data['consensus_score']:.2f} ***")
            if event.data.get('summary'):
                print(f"    {event.data['summary']}")
        elif event.event_type == "debate:concluded":
            print(f"\n{'=' * 60}")
            print(f"Concluded: {event.data['total_rounds']} rounds, consensus: {event.data['final_consensus']:.2f}")
        elif event.event_type == "debate:error":
            print(f"\n*** ERROR: {event.data['error']} ***")
else:
    print("Need at least 1 API key to run a debate.")

### 3.2 Generate Conspectus from the Debate Above

In [None]:
from app.services.conspectus import generate_conspectus
from IPython.display import Markdown, display

if len(participants) >= 2 and session.transcript:
    session.api_keys = keys_for_debate  # Re-attach (normally purged)
    print("Generating conspectus...")
    conspectus = await generate_conspectus(session)
    display(Markdown(conspectus))
else:
    print("No debate transcript available. Run the debate cell first.")

### 3.3 Cost Estimation Preview

In [None]:
from app.services.cost import estimate_debate_cost

sample_participants = [
    {"model": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet"},
    {"model": "gpt-4o", "display_name": "GPT-4o"},
    {"model": "gemini-2.0-flash", "display_name": "Gemini Flash"},
    {"model": "grok-3", "display_name": "Grok 3"},
]

estimates = estimate_debate_cost(sample_participants, max_rounds=10)

print("Cost Estimation (10 rounds, 4 participants)")
print("=" * 40)
total = 0
for name, cost in estimates.items():
    print(f"  {name}: ${cost:.4f}")
    total += cost
print(f"  {'—' * 30}")
print(f"  Total estimated: ${total:.4f}")

### 3.4 Test FastAPI Endpoints (requires running server)

Start the server first: `uvicorn app.main:socket_app --reload`

In [None]:
import httpx

BASE_URL = "http://localhost:8000"

async def test_endpoints():
    async with httpx.AsyncClient() as client:
        r = await client.get(f"{BASE_URL}/health")
        print(f"Health: {r.json()}")

        r = await client.get(f"{BASE_URL}/api/keys/providers")
        print(f"Providers: {r.json()}")

        for provider, key in available.items():
            r = await client.post(f"{BASE_URL}/api/keys/validate", json={
                "provider": provider,
                "api_key": key,
            })
            print(f"Validate {provider}: {r.json()}")
            break

try:
    await test_endpoints()
except httpx.ConnectError:
    print("Server not running. Start with: uvicorn app.main:socket_app --reload")