SPDX-License-Identifier: GPL-3.0-or-later — (c) 2025 Ulrich Warring and contributors.

This notebook demonstrates the **Fast Triad** A–D–M screening on simulated sandbox data. It loads three CSV tables, computes screening flags, and produces diagnostic plots and reports. See [docs/DATASETS.md](../docs/DATASETS.md) for dataset details. The expected CSV schema is: `heating.csv` with `trap_id, run_id, mode, frequency_hz, heating_rate_quanta_per_s, heating_rate_err`; `sb_trials.csv` with `trap_id, run_id, sequence, outcome, t_rel_s`; and `events.csv` with `trap_id, run_id, t_s`.

In [None]:
import sys
import numpy as np
import pandas as pd
import scipy
import matplotlib
import matplotlib.pyplot as plt

print('Python:', sys.version)
print('numpy:', np.__version__)
print('pandas:', pd.__version__)
print('scipy:', scipy.__version__)
print('matplotlib:', matplotlib.__version__)

%matplotlib inline
np.random.seed(2025)


In [None]:
# Parameters (edit as needed)
from pathlib import Path

DATA_ROOT = 'data-sandbox/clean'  # or 'data' after unzipping other archives
OUT_DIR = 'out'
PLOT_DIR = 'out/plots'

Path(OUT_DIR).mkdir(parents=True, exist_ok=True)
Path(PLOT_DIR).mkdir(parents=True, exist_ok=True)


In [None]:
# Import FastTriadAnalyzer
import sys
from pathlib import Path
sys.path.append(str(Path('src').resolve()))
from flyby.triad import FastTriadAnalyzer


In [None]:
# Load sandbox data
analyzer = FastTriadAnalyzer()
analyzer.load_data(DATA_ROOT)

dfs = {'heating': analyzer.heating, 'sb_trials': analyzer.trials, 'events': analyzer.events}
for name, df in dfs.items():
    print(f'{name}:')
    if df is not None and not df.empty:
        display(df.head())
    else:
        print('  no data')


In [None]:
# Evaluate A–D–M statistics
results = analyzer.evaluate_data()
results = results[['trap_id','run_id','A_stat','A_p','b_A','D_stat','D_p','b_D','M_stat','M_p','b_M']]
results


In [None]:
# Generate CSV/JSON summaries and default plots
analyzer.generate_output(OUT_DIR)

from pathlib import Path
summary_path = Path(OUT_DIR) / 'triad_summary.csv'
report_path = Path(OUT_DIR) / 'triad_report.json'
png_paths = sorted(Path(PLOT_DIR).glob('*.png'))
print('Created:', summary_path, report_path)
print('Plots:')
for p in png_paths:
    print(' -', p)


In [None]:
# Inline diagnostics per run
from pathlib import Path
if analyzer.results is not None and not analyzer.results.empty:
    for _, row in analyzer.results.iterrows():
        trap_id, run_id = row['trap_id'], row['run_id']
        fig, axes = plt.subplots(3, 1, figsize=(8, 10))
        fig.suptitle(f'{trap_id} / {run_id} — Fast Triad diagnostics')
        # Heating vs frequency
        dfH = analyzer.heating.query('trap_id==@trap_id and run_id==@run_id')
        if dfH is not None and not dfH.empty:
            dH = dfH.dropna(subset=['frequency_hz', 'heating_rate_quanta_per_s'])
            axes[0].loglog(dH['frequency_hz'], dH['heating_rate_quanta_per_s'], 'o')
        axes[0].set_xlabel('Mode frequency (Hz)')
        axes[0].set_ylabel('Heating rate (quanta/s)')
        axes[0].grid(True, which='both', ls=':')
        # Binary outcomes
        dfT = analyzer.trials.query('trap_id==@trap_id and run_id==@run_id')
        if dfT is not None and not dfT.empty and 't_rel_s' in dfT:
            dT = dfT.sort_values('t_rel_s')
            axes[1].step(dT['t_rel_s'], dT['outcome'], where='post')
        axes[1].set_xlabel('t_rel (s)')
        axes[1].set_ylabel('Outcome (0/1)')
        axes[1].grid(True, ls=':')
        # Event counts
        dfE = analyzer.events.query('trap_id==@trap_id and run_id==@run_id')
        if dfE is not None and not dfE.empty and 't_s' in dfE:
            dE = dfE.sort_values('t_s')
            if len(dE) > 1:
                bin_s = max((dE['t_s'].max() - dE['t_s'].min()) / 30.0, 1.0)
                bins = np.arange(dE['t_s'].min(), dE['t_s'].max() + bin_s, bin_s)
                counts, edges = np.histogram(dE['t_s'], bins=bins)
                centers = 0.5 * (edges[:-1] + edges[1:])
                axes[2].plot(centers, counts, '-o')
        axes[2].set_xlabel('t (s)')
        axes[2].set_ylabel('Event count per bin')
        axes[2].grid(True, ls=':')
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        fig.savefig(Path(PLOT_DIR) / f'{trap_id}__{run_id}.png', dpi=150)
        display(fig)
        plt.close(fig)
else:
    print('No runs to plot.')


The A/D/M flags (b_A, b_D, b_M) highlight runs where the corresponding analog, digital, or memory test yielded p-values below 5e-3. These results are a screening step only; flagged runs merit deeper investigation but do not constitute a discovery claim. The datasets used here are fully simulated, yet the same schema applies to real measurements.

### Optional: run with alternative dataset

Unzip a larger sandbox dataset and update `DATA_ROOT` before rerunning the analysis cells above.

In [None]:
# !unzip datasets/sandbox_flyby_dataset.zip -d data/
# DATA_ROOT = 'data/flyby'
# analyzer.load_data(DATA_ROOT)
# (re-run evaluation and plotting cells)


---
Analysis environment specified in `environment.yml`. This notebook is licensed under GPL-3.0-or-later; please keep analysis steps transparent and reproducible in any contributions.