In [None]:
# Cell 0 — Bootstrap
!git clone https://github.com/victorlavrenko/rofa
%cd rofa
%pip install -e .
# If editable install fails, fallback to PYTHONPATH:
# import sys
# sys.path.append('/content/rofa')

from google.colab import drive
drive.mount('/content/drive')

OUT_BASE = '/content/drive/MyDrive/rofa_runs'


In [None]:
# Cell 1 — Prepare the fixed question set (IDs)
from pathlib import Path

from rofa.question_set import create_question_set, save_question_set

DATASET_NAME = 'openlifescienceai/medmcqa'
DATASET_SPLIT = 'validation'
SEED = 42
N = 200
SUBJECTS = 20

qs = create_question_set(
    {'dataset_name': DATASET_NAME, 'dataset_split': DATASET_SPLIT},
    {
        'seed': SEED,
        'n': N,
        'subjects': SUBJECTS,
        'max_per_subject': N / SUBJECTS * 1.1 + 1,
    },
)

qs_dir = Path(OUT_BASE) / 'question_sets'
qs_dir.mkdir(parents=True, exist_ok=True)
qs_path = qs_dir / f'{qs.qs_id}.json'
save_question_set(qs, str(qs_path))

qs_id = qs.qs_id
print(f'Saved question set: {qs_id} -> {qs_path}')


In [None]:
# Cell 2 — Run Greedy generation (native Python call)
from pathlib import Path
from datetime import datetime

from rofa.methods import GreedyDecode
from rofa.model import MODEL_ID, load_model_with_fallback, load_tokenizer
from rofa.runner import run_generation
from rofa.schemas import GenerationConfig

RUN_ID_GREEDY = f"greedy_{qs_id}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
RUNS_DIR = Path(OUT_BASE) / 'runs'
RUNS_DIR.mkdir(parents=True, exist_ok=True)

tokenizer = load_tokenizer()
model = load_model_with_fallback()

config = GenerationConfig(
    method='greedy',
    model_id=MODEL_ID,
    out_dir=str(RUNS_DIR),
    run_id=RUN_ID_GREEDY,
    seed=SEED,
    max_new_tokens=1024,
    n=N,
    subjects=SUBJECTS,
    dataset_name=DATASET_NAME,
    dataset_split=DATASET_SPLIT,
    question_set_path=str(qs_path),
    progress=True,
    heartbeat_every=10,
    write_full_records=False,
    tokenizer=tokenizer,
    model=model,
    method_impl=GreedyDecode(),
)

run_generation(config)


In [None]:
# Cell 3 — Run Branches generation (native Python call)
from pathlib import Path
from datetime import datetime

from rofa.methods import BranchSamplingEnsemble
from rofa.model import MODEL_ID, load_model_with_fallback, load_tokenizer
from rofa.runner import run_generation
from rofa.schemas import GenerationConfig

RUN_ID_BRANCHES = f"branches_{qs_id}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
RUNS_DIR = Path(OUT_BASE) / 'runs'
RUNS_DIR.mkdir(parents=True, exist_ok=True)

tokenizer = load_tokenizer()
model = load_model_with_fallback()

config = GenerationConfig(
    method='branches',
    model_id=MODEL_ID,
    out_dir=str(RUNS_DIR),
    run_id=RUN_ID_BRANCHES,
    seed=SEED,
    max_new_tokens=1024,
    n=N,
    subjects=SUBJECTS,
    dataset_name=DATASET_NAME,
    dataset_split=DATASET_SPLIT,
    question_set_path=str(qs_path),
    n_branches=10,
    temperature=0.8,
    top_p=0.8,
    top_k=50,
    progress=True,
    heartbeat_every=10,
    write_full_records=True,
    tokenizer=tokenizer,
    model=model,
    method_impl=BranchSamplingEnsemble(n_branches=10, temperature=0.8, top_p=0.8, top_k=50),
)

run_generation(config)


## Publish your run artifacts to GitHub Releases (manual)

1. Open Google Drive and locate your run folder under `OUT_BASE/runs/<run_id>/`.
2. Download the run folder as a `.zip`.
3. Create a new GitHub Release in your repository.
4. Upload the `.zip` as a release asset.
5. Paste the asset URL into the analysis notebook so it can download the artifacts.
