# CSIRO OOF Prediction EDA (tiled)
This notebook builds OOF predictions using a saved checkpoint and a recreated CV split.


In [1]:
# -------------------------
# 0) CONFIG (edit these)
# -------------------------
import os, sys

CSIRO_CODE_DIR = "/notebooks/CSIRO"
DINO_REPO = "/notebooks/dinov3"
DINO_WEIGHTS = "/notebooks/kaggle/csiro/weights/dinov3/dinov3_vitb16_pretrain.pth"
PT_PATH = "/notebooks/kaggle/csiro/output/v7_tile_swapTruetiled_inpTruen_models2_e15.pt"

DATA_ROOT = "/notebooks/kaggle/csiro"
TRAIN_CSV = f"{DATA_ROOT}/train.csv"

IMG_SIZE = 512
BATCH_SIZE = 64
NUM_WORKERS = max(0, (os.cpu_count() or 0) - 2)
DEVICE = "cuda"

CV_PARAMS = dict(mode="gkf", cv_seed=126015, n_splits=5)

OUT_PATH = "/notebooks/kaggle/csiro/oof/oof_preds.csv"

# Guard rails
for name, val in {
    "CSIRO_CODE_DIR": CSIRO_CODE_DIR,
    "DINO_REPO": DINO_REPO,
    "DINO_WEIGHTS": DINO_WEIGHTS,
    "PT_PATH": PT_PATH,
    "TRAIN_CSV": TRAIN_CSV,
}.items():
    if val is None:
        raise ValueError(f"{name} is None; set it before running.")

sys.path.insert(0, CSIRO_CODE_DIR)
sys.path.insert(0, DINO_REPO)
os.environ["DINO_WEIGHTS_PATH"] = DINO_WEIGHTS


In [2]:
# -------------------------
# 1) Imports
# -------------------------
import torch
import pandas as pd

from csiro.config import TARGETS, DEFAULT_MODEL_SIZE, DEFAULT_PLUS, dino_hub_name
from csiro.data import BiomassTiledCached, load_train_wide
from csiro.utils_v2 import make_oof_predictions


ModuleNotFoundError: No module named 'pandas'

In [None]:
# -------------------------
# 2) Load data + backbone
# -------------------------
wide_df = load_train_wide(TRAIN_CSV, root=DATA_ROOT)
dataset = BiomassTiledCached(wide_df, img_size=IMG_SIZE)

backbone = torch.hub.load(
    DINO_REPO,
    dino_hub_name(model_size=str(DEFAULT_MODEL_SIZE), plus=str(DEFAULT_PLUS)),
    source="local",
    weights=DINO_WEIGHTS,
).to(DEVICE)
backbone.eval()

print("rows", len(wide_df), "targets", TARGETS)


In [None]:
# -------------------------
# 3) Build OOF predictions
# -------------------------
oof = make_oof_predictions(
    dataset=dataset,
    wide_df=wide_df,
    backbone=backbone,
    pt_paths=[PT_PATH],
    cv_params=CV_PARAMS,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    device=DEVICE,
    inner_agg="mean",
    outer_agg="mean",
)

preds = oof["preds"].numpy()
fold_id = oof["fold_id"]
print("preds shape", preds.shape, "folds", pd.Series(fold_id).nunique())


In [None]:
# -------------------------
# 4) Assemble OOF table
# -------------------------
df_out = wide_df.copy()
df_out["fold_id"] = fold_id
for i, t in enumerate(TARGETS):
    df_out[f"{t}_pred"] = preds[:, i]

os.makedirs(os.path.dirname(OUT_PATH), exist_ok=True)
df_out.to_csv(OUT_PATH, index=False)
print("Wrote", OUT_PATH)
df_out.head()
