In [None]:
import os
import pandas as pd

if os.path.exists("/kaggle"):
    DATA_DIR = "/kaggle/input/csiro-biomass"
    WORK_DIR = "/kaggle/working"
elif os.path.exists("~/kaggle"):
    DATA_DIR = "~/kaggle/csiro/CSIRO-Image2Biomass/data"
    WORK_DIR = "~/kaggle/csiro/CSIRO-Image2Biomass/working"
else:
    DATA_DIR = "./data"
    WORK_DIR = "./working"

train_long = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))  # long format: one row per (image, target)

weights = {
    "Dry_Green_g": 0.1,
    "Dry_Dead_g": 0.1,
    "Dry_Clover_g": 0.1,
    "GDM_g": 0.2,
    "Dry_Total_g": 0.5,
}

train_long["weight"] = train_long["target_name"].map(weights)
train_long["prediction"] = 0.0  # trivial baseline assigns zero everywhere

# Weighted residual sum of squares (SS_res)
ss_res = ((train_long["target"] - train_long["prediction"]) ** 2 * train_long["weight"]).sum()

# Weighted total sum of squares (SS_tot)
mean_per_target = train_long.groupby("target_name")["target"].transform("mean")
ss_tot = ((train_long["target"] - mean_per_target) ** 2 * train_long["weight"]).sum()

r2_weighted = 1.0 - ss_res / ss_tot

print(f"Weighted R^2 for the zero baseline on the training set: {r2_weighted:.6f}")
