# Combat Dataset vs Phantom Simulator

Loads `resources/datasets/combat.pkl.xz`, runs each sample through `phantom/micro/simulator.py`
(via `NumpyLanchesterSimulator`), and compares prediction quality against simple baselines.

In [1]:
from __future__ import annotations

import lzma
import pickle
import sys
from dataclasses import dataclass
from pathlib import Path

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

ROOT = Path.cwd()
if not (ROOT / "phantom").exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from phantom.micro.simulator import ModelCombatSetup, NumpyLanchesterSimulator, SimulationUnit

DATASET_PATH = ROOT / "resources/datasets/combat.pkl.xz"
MAX_SAMPLES: int | None = None  # set e.g. 2000 for a faster pass
SEED = 7

rng = np.random.default_rng(SEED)
print(f"Dataset: {DATASET_PATH}")

Dataset: C:\Users\volke\PycharmProjects\phantom-sc2\resources\datasets\combat.pkl.xz


In [2]:
with lzma.open(DATASET_PATH, "rb") as f:
    raw_data = pickle.load(f)

indices = np.arange(len(raw_data))
if MAX_SAMPLES is not None and MAX_SAMPLES < len(indices):
    indices = rng.choice(indices, size=MAX_SAMPLES, replace=False)

data = [raw_data[i] for i in indices]
print(f"Loaded {len(data):,} samples (from {len(raw_data):,} total)")
print("Example keys:", sorted(data[0].keys()))
print("Serialized unit keys:", sorted(data[0]["units"][0].keys()))

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\volke\\PycharmProjects\\phantom-sc2\\resources\\datasets\\combat.pkl.xz'

In [None]:
@dataclass
class NotebookParameters:
    time_distribution_lambda: float = 1.0
    lancester_dimension: float = 1.5
    enemy_range_bonus: float = 1.0


def to_simulation_unit(u: dict) -> SimulationUnit:
    return SimulationUnit(
        tag=int(u["tag"]),
        is_enemy=bool(u["is_enemy"]),
        is_flying=bool(u["is_flying"]),
        health=float(u["health"]),
        shield=float(u["shield"]),
        ground_dps=float(u["ground_dps"]),
        air_dps=float(u["air_dps"]),
        ground_range=float(u["ground_range"]),
        air_range=float(u["air_range"]),
        radius=float(u["radius"]),
        real_speed=float(u["real_speed"]),
        position=(float(u["position"][0]), float(u["position"][1])),
    )


def to_model_setup(sample: dict) -> ModelCombatSetup:
    units1: list[SimulationUnit] = []
    units2: list[SimulationUnit] = []
    for unit_dict in sample["units"]:
        unit = to_simulation_unit(unit_dict)
        if unit.is_enemy:
            units2.append(unit)
        else:
            units1.append(unit)
    attacking = set[int]()
    attacking.update(u.tag for u in units1)
    attacking.update(u.tag for u in units2)
    return ModelCombatSetup(
        units1=units1,
        units2=units2,
        attacking=attacking,
    )


def sum_hp(units: list[SimulationUnit]) -> float:
    return float(sum(u.health + u.shield for u in units))


def sum_dps(units: list[SimulationUnit]) -> float:
    return float(sum(max(u.ground_dps, u.air_dps) for u in units))


def sum_force(units: list[SimulationUnit], lanchester_power: float) -> float:
    return float(
        sum(
            ((u.health + u.shield) ** lanchester_power) * max(u.ground_dps, u.air_dps)
            for u in units
        )
    )


sim = NumpyLanchesterSimulator(NotebookParameters(), num_steps=10)

In [None]:
rows = []
for sample in data:
    setup = to_model_setup(sample)
    if not setup.units1 or not setup.units2:
        continue

    pred = sim.simulate(setup).outcome_global
    hp1 = sum_hp(setup.units1)
    hp2 = sum_hp(setup.units2)
    dps1 = sum_dps(setup.units1)
    dps2 = sum_dps(setup.units2)
    force1 = sum_force(setup.units1, sim.parameters.lancester_dimension)
    force2 = sum_force(setup.units2, sim.parameters.lancester_dimension)

    rows.append({
        "true_raw": float(sample["advantage_log"]),
        "pred_sim": float(pred),
        "hp_ratio": (hp1 - hp2) / max(1e-9, hp1 + hp2),
        "dps_ratio": (dps1 - dps2) / max(1e-9, dps1 + dps2),
        "force": (force1 - force2) / max(1e-9, force1 + force2),
        "n1": len(setup.units1),
        "n2": len(setup.units2),
        "hp1": hp1,
        "hp2": hp2,
    })

print(f"Evaluated {len(rows):,} valid samples")
rows[:2]

In [None]:
true_raw = np.array([r["true_raw"] for r in rows], dtype=float)
pred_sim = np.array([r["pred_sim"] for r in rows], dtype=float)
hp_ratio = np.array([r["hp_ratio"] for r in rows], dtype=float)
dps_ratio = np.array([r["dps_ratio"] for r in rows], dtype=float)
force = np.array([r["force"] for r in rows], dtype=float)

def sign_no_zero(x: np.ndarray) -> np.ndarray:
    y = np.sign(x)
    y[y == 0] = 1
    return y

y_sign = sign_no_zero(true_raw)

def metrics_for(name: str, pred: np.ndarray) -> dict:
    return {
        "model": name,
        "sign_accuracy": float((sign_no_zero(pred) == y_sign).mean()),
        "corr_to_true_raw": float(np.corrcoef(pred, true_raw)[0, 1]),
        "mae_to_true_raw": float(np.mean(np.abs(pred - true_raw))),
    }

metrics = [
    metrics_for("simulator", pred_sim),
    metrics_for("hp_ratio", hp_ratio),
    metrics_for("dps_ratio", dps_ratio),
    metrics_for("force", force),
]
metrics

In [None]:
pred_range = (-2, 2)
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=(
        "Raw Outcome Distribution",
        "Predicted vs True (raw outcomes)",
        "Sign Accuracy Comparison",
    ),
)

fig.add_trace(
    go.Histogram(x=true_raw, nbinsx=40, name="true_raw", opacity=0.8),
    row=1,
    col=1,
)
fig.add_trace(
    go.Histogram(x=pred_sim, nbinsx=40, name="simulator", opacity=0.6),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=true_raw,
        y=pred_sim,
        mode="markers",
        name="simulator",
        marker={"size": 5, "opacity": 0.35},
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Scatter(
        x=true_raw,
        y=hp_ratio,
        mode="markers",
        name="hp_ratio",
        marker={"size": 5, "opacity": 0.25},
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Scatter(
        x=true_raw,
        y=dps_ratio,
        mode="markers",
        name="dps_ratio",
        marker={"size": 5, "opacity": 0.25},
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Scatter(
        x=true_raw,
        y=force,
        mode="markers",
        name="force",
        marker={"size": 5, "opacity": 0.25},
    ),
    row=1,
    col=2,
)

models = [m["model"] for m in metrics]
acc = [m["sign_accuracy"] for m in metrics]
fig.add_trace(
    go.Bar(x=models, y=acc, name="accuracy", text=[f"{v:.3f}" for v in acc], textposition="outside"),
    row=1,
    col=3,
)

fig.update_xaxes(title_text="value", range=list(pred_range), row=1, col=1)
fig.update_xaxes(title_text="true_raw", range=list(pred_range), row=1, col=2)
fig.update_xaxes(title_text="model", row=1, col=3)
fig.update_yaxes(title_text="count", row=1, col=1)
fig.update_yaxes(title_text="prediction", range=list(pred_range), row=1, col=2)
fig.update_yaxes(title_text="accuracy", range=[0.0, 1.0], row=1, col=3)
fig.update_layout(height=450, width=1400, barmode="overlay", template="plotly_white")
fig.show()

In [None]:
bins = np.linspace(pred_sim.min(), pred_sim.max(), 11)
digitized = np.digitize(pred_sim, bins)
calibration = []
for b in range(1, len(bins)):
    mask = digitized == b
    count = int(mask.sum())
    if count == 0:
        continue
    calibration.append({
        "bin_center": float((bins[b - 1] + bins[b]) / 2),
        "mean_pred": float(pred_sim[mask].mean()),
        "mean_true": float(true_raw[mask].mean()),
        "count": count,
    })

cal_pred = np.array([c["mean_pred"] for c in calibration])
cal_true = np.array([c["mean_true"] for c in calibration])
cal_count = np.array([c["count"] for c in calibration])

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=cal_pred,
        y=cal_true,
        mode="lines+markers+text",
        text=[str(int(c)) for c in cal_count],
        textposition="top center",
        name="simulator",
    )
)
fig.update_layout(
    title="Simulator Calibration on Raw Outcomes",
    xaxis_title="mean predicted",
    yaxis_title="mean true_raw",
    width=600,
    height=500,
    template="plotly_white",
)
fig.show()

calibration

### Notes
- Comparisons are on raw outcome values (`result`) without normalization.
- `sign_accuracy` compares winner direction only (`result > 0` means `units1` wins).
- Absolute-scale metrics (`mae_to_true_raw`) are included as-is; known scale mismatch is a TODO.
- You can tune `NotebookParameters` to test alternative simulator parameter sets.