# Hierarchical Multi-Agent Reinforcement Learning for Congestion-Aware Vessel Scheduling

This notebook is now **analysis-first** and **module-driven**.
All core logic lives in `hmarl_mvp/` and this notebook only orchestrates experiments and plots.


## Notebook workflow

1. Import module APIs from `hmarl_mvp`
2. Run reproducible baseline sweeps
3. Visualize policy/ablation behavior
4. Export artifacts for reporting


In [None]:
from pathlib import Path
import sys

import pandas as pd

# Ensure imports work whether the notebook is opened from repo root or nested dirs.
repo_root = Path.cwd()
if not (repo_root / "hmarl_mvp").exists() and (repo_root.parent / "hmarl_mvp").exists():
    repo_root = repo_root.parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from hmarl_mvp import (
    MaritimeEnv,
    get_default_config,
    run_horizon_sweep,
    run_noise_sweep,
    run_policy_sweep,
    run_sharing_sweep,
    summarize_policy_results,
)
from hmarl_mvp.plotting import (
    plot_horizon_sweep,
    plot_noise_sweep,
    plot_policy_comparison,
    plot_sharing_sweep,
)

SEED = 42
CONFIG = get_default_config()
CONFIG


## Environment sanity check

Run one reset/step to verify observation and reward wiring before larger sweeps.


In [None]:
env = MaritimeEnv(config=CONFIG, seed=SEED)
obs = env.reset()
actions = env.sample_stub_actions()
next_obs, rewards, done, info = env.step(actions)

{
    "coordinator_obs_shape": obs["coordinator"].shape,
    "vessel_obs_shape": obs["vessels"][0].shape,
    "port_obs_shape": obs["ports"][0].shape,
    "global_state_shape": env.get_global_state().shape,
    "done": done,
    "port_metrics": info["port_metrics"],
}


## Baseline policy sweep (RQ2)

Compare `independent`, `reactive`, `forecast`, and `oracle` baselines.


In [None]:
STEPS = CONFIG["rollout_steps"]
policy_results = run_policy_sweep(steps=STEPS, seed=SEED, config=CONFIG)
summary = summarize_policy_results(policy_results)
summary


In [None]:
plot_policy_comparison(policy_results)


## Forecast horizon ablation (RQ3)


In [None]:
horizon_results = run_horizon_sweep(horizons=[6, 12, 24], steps=STEPS, seed=SEED, config=CONFIG)
plot_horizon_sweep(horizon_results)


## Forecast noise ablation (RQ3)


In [None]:
noise_results = run_noise_sweep(noise_levels=[0.0, 0.3, 0.5, 1.0, 2.0], steps=STEPS, seed=SEED, config=CONFIG)
plot_noise_sweep(noise_results)


## Forecast sharing ablation (RQ3)


In [None]:
sharing_results = run_sharing_sweep(
    sharing_modes={"shared": True, "coordinator_only": False},
    steps=STEPS,
    seed=SEED,
    config=CONFIG,
)
plot_sharing_sweep(sharing_results)


## Export artifacts (optional)

Set `SAVE_RESULTS = True` to write CSV files under `runs/notebook_exports/`.


In [None]:
SAVE_RESULTS = False

if SAVE_RESULTS:
    out_dir = repo_root / "runs" / "notebook_exports"
    out_dir.mkdir(parents=True, exist_ok=True)

    all_results = pd.concat(policy_results.values(), ignore_index=True)
    all_results.to_csv(out_dir / "policy_all_results.csv", index=False)
    summary.to_csv(out_dir / "policy_summary.csv")

    for k, df in policy_results.items():
        df.to_csv(out_dir / f"policy_{k}.csv", index=False)
    for h, df in horizon_results.items():
        df.to_csv(out_dir / f"horizon_{h}.csv", index=False)
    for n, df in noise_results.items():
        df.to_csv(out_dir / f"noise_{n}.csv", index=False)
    for label, df in sharing_results.items():
        df.to_csv(out_dir / f"sharing_{label}.csv", index=False)

    print(f"Saved notebook exports to: {out_dir}")
else:
    print("Set SAVE_RESULTS=True to export CSV artifacts.")


## Notes

- Keep core logic changes inside `hmarl_mvp/`.
- Keep notebook edits focused on analysis and presentation.
- For reproducible batch runs, prefer `scripts/run_baselines.py`.
