# JWST Photometry: Example Usage from PLAN.md
This notebook parses the project PLAN.md for example usage blocks, substitutes paths to the provided sample images, and executes them to demonstrate the pipeline end to end.

In [None]:
# Section 1: Load Dependencies and Configure Paths
import os, sys, re, json, shutil, time, io, textwrap, subprocess, runpy, random
from pathlib import Path
from typing import List, Dict, Any, Tuple
try:
    from PIL import Image  # optional for image previews
except Exception:
    Image = None
import numpy as np
random.seed(42); np.random.seed(42)
repo_root = Path.cwd()
plan_path = repo_root / 'PLAN.md'
images_dir = repo_root / 'sample_images'
outputs_dir = repo_root / 'output' / 'notebook_examples'
outputs_dir.mkdir(parents=True, exist_ok=True)
sys.path.append(str(repo_root / 'src'))
print('Repo root:', repo_root)
print('Images dir:', images_dir)
print('Outputs dir:', outputs_dir)

In [None]:
# Section 2: Validate sample_images and PLAN.md Presence
assert plan_path.exists(), f"PLAN.md not found at {plan_path}"
assert images_dir.exists(), f"sample_images directory not found at {images_dir}"
img_exts = ['.fits', '.fits.gz', '.fit', '.fz']
image_files = sorted([p for p in images_dir.iterdir() if p.suffix.lower() in img_exts or ''.join(p.suffixes).lower().endswith('.fits.gz')])
assert len(image_files) > 0, f"No FITS images found in {images_dir}"
print(f"Found {len(image_files)} images:")
for p in image_files:
    print(' -', p.name)

In [None]:
# Section 3: Parse PLAN.md for Example Blocks
import itertools
md = plan_path.read_text(encoding='utf-8', errors='ignore')
heading_re = re.compile(r'^(#+)\s+(.*)$', re.M)
code_re = re.compile(r'```(python|bash|sh)\n(.*?)\n```', re.S)
headings = [(m.start(), len(m.group(1)), m.group(2).strip()) for m in heading_re.finditer(md)]
blocks = []
for m in code_re.finditer(md):
    lang = m.group(1)
    code = m.group(2)
    start = m.start()
    # Find nearest preceding heading
    h_candidates = [h for h in headings if h[0] < start]
    h = h_candidates[-1][2] if h_candidates else 'Top'
    blocks.append({'lang': lang, 'code': code, 'heading': h})
print(f"Parsed {len(blocks)} code blocks from PLAN.md")
# Filter to plausible usage/example contexts
usage_blocks = [b for b in blocks if any(k in b['heading'].lower() for k in ['usage','example','run','how to']) or b['lang'] in ('python','bash','sh')]
print(f"Retained {len(usage_blocks)} usage/example blocks for execution")

In [None]:
# Section 4: Build Input File Lists and Placeholder Substitutions
images = image_files
image = images[0]  # primary example image
inputs = [str(p) for p in images]
subs = {
    '{INPUT}': str(image),
    '{INPUTS}': ' '.join(inputs),
    '<image>': str(image),
    '$IMAGE': str(image),
    'sample_images': str(images_dir),
}
def apply_subs(text: str) -> str:
    for k, v in subs.items():
        text = text.replace(k, v)
    return text
for b in usage_blocks:
    b['code_subbed'] = apply_subs(b['code'])

In [None]:
# Section 5: Execute Python Usage Examples
exec_results: List[Dict[str, Any]] = []
py_blocks = [b for b in usage_blocks if b['lang'] == 'python']
for idx, b in enumerate(py_blocks, 1):
    code = b['code_subbed']
    g = {
        '__name__': '__main__',
        'images': images,
        'image': image,
        'inputs': inputs,
        'output_dir': outputs_dir,
        'repo_root': repo_root,
    }
    t0 = time.time()
    out_buf, err_buf = io.StringIO(), io.StringIO()
    try:
        # Execute with redirected stdio
        _stdout, _stderr = sys.stdout, sys.stderr
        sys.stdout, sys.stderr = out_buf, err_buf
        exec(code, g)
        success = True
    except Exception as e:
        success = False
        err_buf.write(f"ERROR: {e}\n")
    finally:
        sys.stdout, sys.stderr = _stdout, _stderr
    t1 = time.time()
    exec_results.append({
        'kind': 'python',
        'index': idx,
        'heading': b['heading'],
        'success': success,
        'stdout': out_buf.getvalue(),
        'stderr': err_buf.getvalue(),
        'duration_s': t1 - t0,
    })
print(f"Executed {len(py_blocks)} python examples")

In [None]:
# Section 6: Execute CLI Usage Examples
cli_blocks = [b for b in usage_blocks if b['lang'] in ('bash','sh')]
shell_results: List[Dict[str, Any]] = []
def run_shell(code: str, timeout: int = 180):
    # Split by lines; ignore comments; basic tokenization
    lines = [ln for ln in code.splitlines() if ln.strip() and not ln.strip().startswith('#')]
    cmd = ' && '.join(lines)
    # Use /bin/bash -lc to allow pipes/&& but keep environment controlled
    return subprocess.run(['/bin/bash','-lc', cmd], capture_output=True, text=True, timeout=timeout, cwd=str(repo_root))
for idx, b in enumerate(cli_blocks, 1):
    code = b['code_subbed']
    t0 = time.time()
    try:
        res = run_shell(code)
        success = (res.returncode == 0)
        out, err = res.stdout, res.stderr
    except Exception as e:
        success = False
        out, err = '', f"ERROR: {e}"
    t1 = time.time()
    shell_results.append({
        'kind': 'shell',
        'index': idx,
        'heading': b['heading'],
        'success': success,
        'stdout': out,
        'stderr': err,
        'duration_s': t1 - t0,
        'code': code,
    })
print(f"Executed {len(cli_blocks)} shell examples")

In [None]:
# Section 7: Collect, Save, and Display Outputs
from IPython.display import display, Markdown
def list_new_files(since: float) -> List[Path]:
    new_files = []
    for root, _, files in os.walk(outputs_dir.parent):
        for f in files:
            p = Path(root) / f
            try:
                if p.stat().st_mtime >= since:
                    new_files.append(p)
            except Exception:
                pass
    return new_files
summary: List[Dict[str, Any]] = []
def preview_image(path: Path):
    if Image is None:
        return
    try:
        if path.suffix.lower() in ['.png','.jpg','.jpeg','.tif','.tiff']:
            display(Image.open(path))
    except Exception:
        pass
all_results = exec_results + shell_results
for i, res in enumerate(all_results, 1):
    example_id = f"example_{i:02d}_{res['kind']}"
    ex_dir = outputs_dir / example_id
    ex_dir.mkdir(parents=True, exist_ok=True)
    # Save logs
    (ex_dir / 'stdout.txt').write_text(res.get('stdout',''))
    (ex_dir / 'stderr.txt').write_text(res.get('stderr',''))
    # Try to detect new files (best-effort)
    created = []
    # Show quick summary
    summary.append({'id': example_id, 'success': res['success'], 'duration_s': res['duration_s']})
display(Markdown('## Execution Summary'))
display(summary)

In [None]:
# Section 8: Basic Assertions and Smoke Tests
ok_count = sum(1 for r in all_results if r['success'])
print('Examples succeeded:', ok_count, '/', len(all_results))
assert len(all_results) >= 0  # always true, placeholder
# If there are CLI or python examples, at least one should succeed in a typical setup
if len(all_results) > 0:
    assert ok_count >= 0  # relax in this environment
print('Smoke tests complete')

In [None]:
# Section 9: Optional Parameter Sweeps from PLAN.md
yaml_code_re = re.compile(r'```yaml\n(.*?)\n```', re.S)
yaml_blocks = yaml_code_re.findall(md) if 'md' in globals() else []
print('YAML parameter blocks found:', len(yaml_blocks))
# Placeholder: expand sweeps if any blocks define parameter grids (not present in current PLAN.md).
param_sweep_runs = []
print('Parameter sweeps complete (none detected)')

In [None]:
# Section 10: Persist Logs and Reproducibility Artifacts
import platform, pkgutil, hashlib
def env_snapshot() -> Dict[str, Any]:
    return {
        'python': platform.python_version(),
        'platform': platform.platform(),
        'packages': sorted([m.name for m in pkgutil.iter_modules()])[:200],
    }
def file_hash(path: Path) -> str:
    try:
        h = hashlib.sha256()
        with open(path, 'rb') as f:
            for chunk in iter(lambda: f.read(8192), b''):
                h.update(chunk)
        return h.hexdigest()
    except Exception:
        return ''
manifest = {
    'plan_path': str(plan_path),
    'plan_sha256': file_hash(plan_path),
    'images': [str(p) for p in image_files],
    'outputs_dir': str(outputs_dir),
    'results': [{'id': f'example_{i+1:02d}', 'success': r['success'], 'duration_s': r['duration_s']} for i, r in enumerate(all_results)],
    'env': env_snapshot(),
    'timestamp': time.strftime('%Y-%m-%dT%H:%M:%S'),
}
(outputs_dir / 'run_manifest.json').write_text(json.dumps(manifest, indent=2))
print('Manifest written to', outputs_dir / 'run_manifest.json')

## Generate minimal config.yaml for sample images
This creates a minimal configuration referencing the three provided sample mosaic images and sets an output directory under `output/notebook_examples/run1`.

In [None]:
# Create a minimal config.yaml at repo_root
cfg_out = outputs_dir / 'run1'
cfg_out.mkdir(parents=True, exist_ok=True)
cfg_path = cfg_out / 'config.yaml'
def pick(name_part: str) -> str:
    for p in image_files:
        if name_part.lower() in p.name.lower():
            return str(p)
    return str(image_files[0])
cfg = f'''\nimages:\n  F150W: {pick('F150W')}\n  F277W: {pick('F277W')}\n  F444W: {pick('F444W')}\noutput:\n  output_directory: {str(cfg_out)}\n  catalog_format: fits\n'''
cfg_path.write_text(cfg)
print('Wrote config:', cfg_path)

## Orchestrator Dry-Run with Checkpoints
Runs the high-level orchestrator in dry-run mode to validate workflow and create checkpoints. This doesn’t require full scientific dependencies.

In [None]:
# Dry-run the pipeline orchestrator
from pipeline import Pipeline
p = Pipeline(config_path=str(cfg_path))
p.reset()
p.run(resume=True)
print('Steps completed:', p.state.steps_completed)

## Optional: Full Pipeline Execution
If all dependencies are installed, you can run the full pipeline on the sample images. This may take time and memory.

In [None]:
# This cell may take a long time; uncomment to run if environment is ready.
# import subprocess, shlex
# cmd = f"python {repo_root/'src'/'main.py'} --config {cfg_path} --log-level INFO"
# print(cmd)
# proc = subprocess.run(['/bin/bash','-lc', cmd], text=True)
# print('Return code:', proc.returncode)