# Tutorial 02 - CLI and YAML grids

This notebook demonstrates how to drive the pymts CLI, expand YAML grids, and validate deterministic outputs on disk.

In [None]:
import json
import subprocess
import sys
from pathlib import Path

CONFIG_PATH = Path('configs/tutorial_grid.yaml')
OUTDIR = Path('data/tutorial_cli')


## 1. Compose a YAML grid
We create a small configuration file mixing Kuramoto and GBM sweeps.

In [None]:
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
CONFIG_PATH.write_text('''
configs:
  - model: kuramoto
    M: [3]
    T: [64]
    K: [0.5, 0.9]
    n_realizations: 1
    seed: 123
  - model: gbm
    M: 2
    T: 64
    mu: [0.0, 0.05]
    sigma: 0.2
    dt: 0.02
    n_realizations: 1
    seed: 321
''')
CONFIG_PATH.read_text()


## 2. Dry run the CLI
--dry-run reports the expanded configs without generating data.

In [None]:
dry_run_cmd = [
    sys.executable,
    '-m',
    'pymts',
    'generate',
    '--config',
    str(CONFIG_PATH),
    '--dry-run',
]
dry_run = subprocess.run(dry_run_cmd, capture_output=True, text=True, check=True)
dry_run.stdout


## 3. Generate and persist
We request Parquet + metadata (and CSV) under a dedicated output directory.

In [None]:
OUTDIR.mkdir(parents=True, exist_ok=True)
run_cmd = [
    sys.executable,
    '-m',
    'pymts',
    'generate',
    '--config',
    str(CONFIG_PATH),
    '--outdir',
    str(OUTDIR),
    '--save',
    '--csv',
]
run = subprocess.run(run_cmd, capture_output=True, text=True, check=True)
run.stdout


## 4. Inspect outputs
Each configuration writes to data/tutorial_cli/<model>/<config_id>/.

In [None]:
parquet_files = sorted(OUTDIR.rglob('*.parquet'))
metadata_files = sorted(OUTDIR.rglob('*.metadata.json'))
len(parquet_files), len(metadata_files), parquet_files[0] if parquet_files else None


## 5. Validate metadata
We load one metadata file to confirm the stored config fingerprint.

In [None]:
sample_meta = json.loads(metadata_files[0].read_text())
sample_meta['config_id'], sample_meta['hash8'], sample_meta['params']['model']


### Scaling up tips
- Use --limit during exploratory runs to keep grids manageable.
- Store YAML configs and base seeds in version control.
- The CLI reuses pymts' deterministic SeedSequence sub-seeding, so reruns are reproducible.