# Master Offline Bootstrap/Build Batch Results

This notebook aggregates `batch_<batch_number>_results.csv` logs generated by `run_batch_tests.py`.

It computes per-scenario and per-batch timing/exit-code statistics, visualizes timing behavior, and flags anomalies (unexpected exit codes and timing outliers).

In [None]:
# Validate Python libraries and mitigate missing deps by downgrading to warning mode.
import importlib

required = ('pandas', 'matplotlib')
missing = []
for pkg in required:
    try:
        importlib.import_module(pkg)
    except ModuleNotFoundError:
        missing.append(pkg)

if missing:
    print('Dependency warning: missing optional libraries:', ', '.join(missing))
    print('Notebook summary cells can run, but plotting/analytics cells need these packages installed.')
else:
    print('Python environment check passed for:', ', '.join(required))


In [None]:
# Notebook configuration and imports.
# Plotting rationale: combine distribution plots (histograms/boxplots) with trend plots
# to reveal stability, drift, and exit-code anomalies across batches.
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')

DATA_DIR = Path('.')  # Update if CSV logs live elsewhere
PINNED_DURATION_SECONDS = 1.0  # Required numeric threshold in range [0, 10]
if not (0 <= PINNED_DURATION_SECONDS <= 10):
    raise ValueError('PINNED_DURATION_SECONDS must be a numeric value between 0 and 10')
ALLOWED_EXIT_CODES = {
    'fail_fast_bootstrap': {42},
    'toolchain_presence_check': {0},
    'compile_only_build_attempt': {0},
    'missing_sdk': {42},
    'missing_staged_archive': {42},
    'invalid_architecture': {42, 43},
    'recovery_success': {0},
    'recovery_failure': {44},
}


In [None]:
# CSV ingestion format: each row captures batch/test identifiers, scenario metadata,
# exit code validation, wall-clock duration seconds, and combined stdout/stderr logs.
csv_files = sorted(DATA_DIR.glob('batch_*_results.csv'))
if not csv_files:
    raise FileNotFoundError(f'No batch CSVs found in {DATA_DIR.resolve()}')

df = pd.concat((pd.read_csv(path) for path in csv_files), ignore_index=True)

# `run_batch_tests.py` writes `stdout_stderr`; normalize escaped newlines only
# on valid rows (mask) and avoid coercing null/empty payloads into synthetic data.
required_columns = [
    'batch_number',
    'test_id',
    'scenario_key',
    'exit_code',
    'execution_time_seconds',
    'unexpected_exit_code',
    'stdout_stderr',
]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
    raise KeyError(f'Missing required CSV columns from run_batch_tests.py: {missing_columns}')

invalid_required_mask = (
    df[['batch_number', 'test_id', 'scenario_key', 'exit_code', 'execution_time_seconds', 'unexpected_exit_code']]
      .isna()
      .any(axis=1)
    | (df['stdout_stderr'].isna())
    | (df['stdout_stderr'].astype(str).str.len() == 0)
)
if invalid_required_mask.any():
    bad_rows = df.loc[invalid_required_mask, ['batch_number', 'test_id', 'scenario_key']].head(10)
    raise ValueError(
        'Detected null/empty required fields in batch CSV rows; '
        f'count={int(invalid_required_mask.sum())}; sample={bad_rows.to_dict(orient="records")}'
    )

log_mask = df['stdout_stderr'].notna() & (df['stdout_stderr'].astype(str).str.len() > 0)
df.loc[log_mask, 'stdout_stderr'] = (
    df.loc[log_mask, 'stdout_stderr']
      .astype(str)
      .str.replace('\\n', '\n', regex=False)
)

df['execution_time_seconds'] = pd.to_numeric(df['execution_time_seconds'], errors='raise')
df['exit_code'] = pd.to_numeric(df['exit_code'], errors='raise').astype('Int64')
df['batch_number'] = pd.to_numeric(df['batch_number'], errors='raise').astype('Int64')
df['test_id'] = pd.to_numeric(df['test_id'], errors='raise').astype('Int64')
df['unexpected_exit_code'] = pd.to_numeric(df['unexpected_exit_code'], errors='raise').astype('Int64')

if (df['execution_time_seconds'] <= 0).any():
    raise ValueError('execution_time_seconds contains non-positive values')
if (df['unexpected_exit_code'] < 0).any():
    raise ValueError('unexpected_exit_code contains negative values')

df.head()


In [None]:
# Aggregate statistics per scenario and include exit-code distribution.
scenario_stats = df.groupby('scenario_key')['execution_time_seconds'].agg(['count', 'mean', 'min', 'max', 'std']).reset_index()
exit_counts = df.groupby(['scenario_key', 'exit_code']).size().reset_index(name='count')
scenario_stats


In [None]:
exit_counts


In [None]:
# Batch summary tables (timing + exit quality) for quick QA rollups.
batch_summary = (
    df.groupby(['batch_number', 'scenario_key'])
      .agg(
          tests=('test_id', 'count'),
          mean_time_s=('execution_time_seconds', 'mean'),
          min_time_s=('execution_time_seconds', 'min'),
          max_time_s=('execution_time_seconds', 'max'),
          std_time_s=('execution_time_seconds', 'std'),
          unexpected_exit_count=('unexpected_exit_code', 'sum')
      )
      .reset_index()
)
batch_summary


In [None]:
# Anomaly detection:
# 1) Unexpected exit codes by scenario contract.
# 2) Timing outliers using IQR fences per scenario.
def is_unexpected_exit(row):
    allowed = ALLOWED_EXIT_CODES.get(row['scenario_key'], set())
    return row['exit_code'] not in allowed

df['unexpected_exit'] = df.apply(is_unexpected_exit, axis=1)

outlier_flags = []
for scenario, group in df.groupby('scenario_key'):
    q1 = group['execution_time_seconds'].quantile(0.25)
    q3 = group['execution_time_seconds'].quantile(0.75)
    iqr = q3 - q1
    lower = q1 - 1.5 * iqr
    upper = q3 + 1.5 * iqr
    mask = (group['execution_time_seconds'] < lower) | (group['execution_time_seconds'] > upper)
    flagged = group.loc[mask, ['batch_number', 'test_id', 'scenario_key', 'execution_time_seconds', 'exit_code']]
    outlier_flags.append(flagged)

outliers = pd.concat(outlier_flags, ignore_index=True) if outlier_flags else pd.DataFrame()
unexpected_rows = df[df['unexpected_exit']]

print('Unexpected exit rows:', len(unexpected_rows))
print('Timing outliers:', len(outliers))
unexpected_rows.head(), outliers.head()


In [None]:
# Threshold highlighting for pinned constant duration.
breaches = df[df['execution_time_seconds'] > PINNED_DURATION_SECONDS]
print(f'Tests above pinned duration ({PINNED_DURATION_SECONDS}s):', len(breaches))
display(breaches[['batch_number', 'test_id', 'scenario_key', 'execution_time_seconds', 'exit_code']].head(20))


In [None]:
# Histograms of execution durations per scenario.
for scenario, group in df.groupby('scenario_key'):
    plt.figure(figsize=(8, 4))
    plt.hist(group['execution_time_seconds'].dropna(), bins=15, edgecolor='black')
    plt.title(f'Execution Duration Histogram - {scenario}')
    plt.xlabel('Execution time (seconds)')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.show()


In [None]:
# Boxplots of exit codes vs scenarios.
plt.figure(figsize=(11, 5))
ordered = sorted(df['scenario_key'].dropna().unique())
box_data = [df.loc[df['scenario_key'] == key, 'exit_code'].dropna().astype(int) for key in ordered]
plt.boxplot(box_data, labels=ordered, showmeans=True)
plt.title('Exit Code Distribution by Scenario')
plt.xlabel('Scenario')
plt.ylabel('Exit code')
plt.xticks(rotation=25, ha='right')
plt.tight_layout()
plt.show()


In [None]:
# Line plots of timing variation across batches by scenario.
time_by_batch = (
    df.groupby(['batch_number', 'scenario_key'])['execution_time_seconds']
      .mean()
      .reset_index()
)

plt.figure(figsize=(11, 5))
for scenario, group in time_by_batch.groupby('scenario_key'):
    plt.plot(group['batch_number'], group['execution_time_seconds'], marker='o', label=scenario)

plt.title('Mean Execution Time Variation Across Batches')
plt.xlabel('Batch number')
plt.ylabel('Mean execution time (seconds)')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
