In [1]:
# Setup
# If running in Colab, install the package first:
# !git clone https://github.com/victorlavrenko/rofa
# %cd rofa
# %pip install -e .

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# set puthon directory to the parent directory of the notebook
import os
import sys
sys.path.append(os.path.abspath('..'))

from rofa import analysis
from rofa.io import download, unpack_zip


In [2]:
# Get run artifacts
from pathlib import Path
from urllib.parse import urlparse
import os

# -----------------------------
# Input: two explicit runs
# -----------------------------

# Greedy run (local folder OR release asset URL)
GREEDY_RUN_DIR = ""  # e.g. r"C:\Users\...\rofa_runs\greedy_YYYYMMDD_HHMMSS"
GREEDY_ASSET_URL = "https://github.com/victorlavrenko/rofa/releases/download/N-10/greedy_e056184e9edc_20260108_132422.zip"  # e.g. "https://github.com/.../greedy_....zip"

# Branches run (local folder OR release asset URL)
BRANCHES_RUN_DIR = ""  # e.g. r"C:\Users\...\rofa_runs\branches_YYYYMMDD_HHMMSS"
BRANCHES_ASSET_URL = "https://github.com/victorlavrenko/rofa/releases/download/N-10/branches_e056184e9edc_20260108_132826.zip"

def _download_and_unpack(asset_url: str) -> str:
    runs_root = Path('runs')
    runs_root.mkdir(exist_ok=True)
    filename = Path(urlparse(asset_url).path).name or 'run.zip'
    zip_path = runs_root / filename
    download(asset_url, str(zip_path))
    run_dir = runs_root / zip_path.stem
    unpack_zip(str(zip_path), str(run_dir))
    return str(run_dir)

def _normalize_run_dir(run_dir: str) -> str:
    if not run_dir:
        return ""
    summary_path = Path(run_dir) / 'summary.jsonl'
    if summary_path.exists():
        return run_dir
    candidates = [p for p in Path(run_dir).iterdir() if p.is_dir()]
    if len(candidates) == 1 and (candidates[0] / 'summary.jsonl').exists():
        return str(candidates[0])
    return run_dir

def resolve_run_dir(run_dir: str, asset_url: str) -> str:
    # Return a normalized run directory path.
    # - If run_dir is provided, use it.
    # - Else if asset_url is provided, download + unpack and return unpacked folder path.
    # - Else return empty string.
    if run_dir:
        return _normalize_run_dir(run_dir)
    if asset_url:
        return _normalize_run_dir(_download_and_unpack(asset_url))
    return ""

GREEDY_RUN_DIR = resolve_run_dir(GREEDY_RUN_DIR, GREEDY_ASSET_URL)
BRANCHES_RUN_DIR = resolve_run_dir(BRANCHES_RUN_DIR, BRANCHES_ASSET_URL)

print("Greedy run:", GREEDY_RUN_DIR)
print("Branches run:", BRANCHES_RUN_DIR)

if not GREEDY_RUN_DIR and not BRANCHES_RUN_DIR:
    raise ValueError("Provide at least one run: GREEDY_* or BRANCHES_*.")



Greedy run: runs\greedy_e056184e9edc_20260108_132422\greedy_e056184e9edc_20260108_132422
Branches run: runs\branches_e056184e9edc_20260108_132826\branches_e056184e9edc_20260108_132826


In [3]:
# Load + parse
import json
from pathlib import Path

from rofa import analysis

df_greedy = None
df_branches = None

if GREEDY_RUN_DIR:
    df_greedy = analysis.load_summary(GREEDY_RUN_DIR)

if BRANCHES_RUN_DIR:
    df_branches = analysis.load_summary(BRANCHES_RUN_DIR)

required_cols = {'gold'}

if df_greedy is not None:
    missing = required_cols - set(df_greedy.columns)
    if missing:
        raise ValueError(f'Greedy run missing required columns: {missing}')

if df_branches is not None:
    missing = required_cols - set(df_branches.columns)
    if missing:
        raise ValueError(f'Branches run missing required columns: {missing}')

print('df_greedy:', None if df_greedy is None else df_greedy.shape)
print('df_branches:', None if df_branches is None else df_branches.shape)

# default: use branches if present, else greedy
df = df_branches if df_branches is not None else df_greedy

run_dir = Path(GREEDY_RUN_DIR or BRANCHES_RUN_DIR)
summary_path = run_dir / 'summary.jsonl'
manifest_path = run_dir / 'manifest.json'
question_set_path = run_dir / 'question_set.json'

manifest = None
if manifest_path.exists():
    manifest = json.loads(manifest_path.read_text())
    print('Loaded manifest:', manifest.get('run_id'), manifest.get('method'))

if question_set_path.exists():
    qs = json.loads(question_set_path.read_text())
    print('Question set:', qs.get('qs_id'), 'n=', len(qs.get('examples', [])))



df_greedy: (10, 14)
df_branches: (10, 16)
Loaded manifest: greedy_e056184e9edc_20260108_132422 greedy
Question set: e056184e9edc n= 10


In [4]:
# R1: greedy accuracy
greedy_accuracy = None
if 'prediction' in df_greedy.columns:
    greedy_accuracy = analysis.accuracy_greedy(df_greedy)
    print('Greedy accuracy:', greedy_accuracy)
else:
    print('Not a greedy run; skipping.')


Greedy accuracy: 0.5


In [5]:
# R2: leader accuracy
leader_accuracy = None
if 'leader_correct' in df.columns:
    leader_accuracy = analysis.accuracy_leader(df)
    print('Leader accuracy:', leader_accuracy)
else:
    print('Not a branch run; skipping.')


Leader accuracy: 0.5


In [6]:
# R3: distribution of max_frac
max_frac_hist = None
if 'max_frac' in df.columns:
    max_frac_hist = analysis.max_frac_distribution(df)
    display(max_frac_hist)
else:
    print('No max_frac column; skipping.')


max_frac
(-0.001, 0.5]    0
(0.5, 0.8]       4
(0.8, 0.9]       1
(0.9, 1.0]       5
Name: count, dtype: int64

In [7]:
# R4: unanimous stats
unanimous = None
if 'max_frac' in df.columns:
    unanimous = analysis.unanimous_stats(df)
    print(unanimous)
else:
    print('No max_frac column; skipping.')


{'count': 5, 'accuracy': np.float64(0.6)}


In [8]:
# R5: near-unanimous stats
near_unanimous = None
if 'max_frac' in df.columns:
    near_unanimous = analysis.near_unanimous_stats(df, threshold=0.9)
    print(near_unanimous)
else:
    print('No max_frac column; skipping.')


{'count': 6, 'accuracy': np.float64(0.5)}


In [9]:
# R6: top-2 coverage
top2_rate = None
if 'branch_preds' in df.columns:
    top2_rate = analysis.top2_coverage(df)
    print('Top-2 coverage rate:', top2_rate)
else:
    print('No branch predictions; skipping.')


Top-2 coverage rate: 0.6


In [10]:
# R7: R/W/Other breakdown by max_frac bins
rw_other = None
if 'max_frac' in df.columns:
    rw_other = analysis.rw_other_breakdown(df)
    display(rw_other)
else:
    print('No max_frac column; skipping.')


  .groupby(["bin", "label"])


label,Other,R,W
bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(-0.001, 0.5]",0,0,0
"(0.5, 0.8]",4,0,0
"(0.8, 0.9]",0,0,1
"(0.9, 1.0]",0,3,2


In [11]:
# R8: error modes (unanimous wrong)
unanimous_wrong = None
if 'max_frac' in df.columns:
    unanimous_wrong = analysis.unanimous_wrong(df)
    print('Unanimous wrong count:', len(unanimous_wrong))
    display(unanimous_wrong.head())
else:
    print('No max_frac column; skipping.')


Unanimous wrong count: 2


Unnamed: 0,index,picked_index,id,gold,branch_preds,leader,max_frac,valid_n,none_n,variation_ratio,entropy_bits,correct_fraction,leader_correct,class,subject_name,timestamp
4,4,5,d8a284c5-175a-4439-8ef2-5ff24b41f3cc,B,"[A, A, A, A, A, A, A, A, A, A]",A,1.0,10,0,0.0,-1.442823e-12,0.0,False,unanimous,Microbiology,2026-01-08 13:41:52.700453+00:00
7,7,8,e2dd93d8-8047-4e9a-9389-e2b1781f8a35,B,"[A, A, A, A, A, A, A, A, A, A]",A,1.0,10,0,0.0,-1.442823e-12,0.0,False,unanimous,Surgery,2026-01-08 13:49:47.576100+00:00


In [None]:
# R9: majority vote does not help (greedy vs leader)
majority_vote_gap = None

if df_greedy is None or df_branches is None:
    print("R9 skipped: provide both GREEDY and BRANCHES runs.")
else:
    key_candidates = ["id", "index"]
    key_cols = [c for c in key_candidates if c in df_greedy.columns and c in df_branches.columns]
    if not key_cols:
        raise ValueError("No shared key columns to compare GREEDY and BRANCHES runs.")

    merged = df_greedy.merge(
        df_branches,
        on=key_cols,
        how="inner",
        suffixes=("_greedy", "_branches"),
    )

    # Greedy correctness (your merged has 'is_correct' without suffix)
    if "is_correct" in merged.columns:
        greedy_correct = merged["is_correct"].fillna(False).astype(bool)
    elif "prediction" in merged.columns and "gold_greedy" in merged.columns:
        greedy_correct = merged["prediction"] == merged["gold_greedy"]
    elif "prediction_greedy" in merged.columns and "gold_greedy" in merged.columns:
        greedy_correct = merged["prediction_greedy"] == merged["gold_greedy"]
    else:
        raise ValueError(f"Greedy run missing correctness fields. Available: {sorted(merged.columns)}")

    # Branches/leader correctness (your merged has 'leader_correct' without suffix)
    if "leader_correct" in merged.columns:
        leader_correct = merged["leader_correct"].fillna(False).astype(bool)
    elif "leader_correct_branches" in merged.columns:
        leader_correct = merged["leader_correct_branches"].fillna(False).astype(bool)
    elif "leader" in merged.columns and ("gold_branches" in merged.columns or "gold" in merged.columns):
        gold_col = "gold_branches" if "gold_branches" in merged.columns else "gold"
        leader_correct = merged["leader"] == merged[gold_col]
    elif "leader_branches" in merged.columns and "gold_branches" in merged.columns:
        leader_correct = merged["leader_branches"] == merged["gold_branches"]
    else:
        raise ValueError(f"Branches run missing leader fields. Available: {sorted(merged.columns)}")

    greedy_accuracy = greedy_correct.mean()
    leader_accuracy = leader_correct.mean()
    majority_vote_gap = leader_accuracy - greedy_accuracy

    print("Greedy accuracy:", greedy_accuracy)
    print("Branches accuracy:", leader_accuracy)
    print("Branches - Greedy:", majority_vote_gap)


ValueError: Greedy run missing prediction fields for comparison.

In [None]:
print("Merged columns:")
print(sorted(merged.columns))

Merged cols: ['prediction', 'is_correct', 'correct_fraction', 'leader_correct']
Merged shape: (10, 28)


In [None]:
# R10: subject-wise breakdown (optional)
subject_breakdown = None
if 'subject_name' in df.columns:
    accuracy_field = 'leader_correct' if 'leader_correct' in df.columns else 'is_correct'
    subject_breakdown = analysis.subject_accuracy(df, accuracy_field=accuracy_field)
    display(subject_breakdown.head(10))
else:
    print('No subject_name column; skipping.')


In [None]:
# R11: export paper tables
from pathlib import Path
import json

run_id = Path(GREEDY_RUN_DIR or BRANCHES_RUN_DIR).name if (GREEDY_RUN_DIR or BRANCHES_RUN_DIR) else 'run'
report_dir = Path('reports') / run_id
report_dir.mkdir(parents=True, exist_ok=True)

paper_report = {
    'greedy_accuracy': greedy_accuracy,
    'leader_accuracy': leader_accuracy,
    'unanimous': unanimous,
    'near_unanimous': near_unanimous,
    'top2_coverage': top2_rate,
    'majority_vote_gap': majority_vote_gap,
}

with open(report_dir / 'paper_report.json', 'w', encoding='utf-8') as f:
    json.dump(paper_report, f, indent=2)

if max_frac_hist is not None:
    max_frac_hist.to_csv(report_dir / 'max_frac_distribution.csv')
if rw_other is not None:
    rw_other.to_csv(report_dir / 'rw_other_breakdown.csv')
if subject_breakdown is not None:
    subject_breakdown.to_csv(report_dir / 'subject_accuracy.csv')

print('Saved reports to', report_dir)



## Add your own analysis below
