In [30]:
# ----------  # <CELL: imports & device>
import os, glob, pickle
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
import holidays

from transformers import (
    PatchTSTConfig, PatchTSTForPrediction,
    TrainingArguments, Trainer, EarlyStoppingCallback, set_seed
)
from tsfm_public.toolkit.dataset import ForecastDFDataset
from torch.utils.data import Subset
from transformers import TrainerCallback

print("CUDA available:", torch.cuda.is_available())
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
set_seed(42)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass
torch.backends.cuda.matmul.allow_tf32 = True
print("DEVICE:", DEVICE)

CUDA available: True
DEVICE: cuda


In [None]:
# ----------  # <CELL: global configs & paths>
CONTEXT_LEN = 28
# CONTEXT_LEN = 21
PRED_LEN    = 7
PATCH_LEN   = 7
PATCH_STRIDE= 7         # 7 / 1
DATA_STRIDE = 1

K_FOLDS = 5
PURGE_GAP_WEEKS = 1
ANCHOR_STEP = 7

SAVE_DIR = "./patchtst_sales_forecast"
os.makedirs(SAVE_DIR, exist_ok=True)
LE_PATH = os.path.join(SAVE_DIR, "label_encoder.pkl")

STORE_LE_PATH = os.path.join(SAVE_DIR, "store_label_encoder.pkl")

ROOT_CANDIDATES = ["./dataset", ".", "/mnt/data"]

def find_train_csv():
    for root in ROOT_CANDIDATES:
        for rel in ["train/train.csv", "train.csv"]:
            p = os.path.join(root, rel)
            if os.path.exists(p):
                return p
    raise FileNotFoundError("train.csv not found (tried ./dataset/train/train.csv, ./dataset/train.csv, /mnt/data/...).")

def find_test_files():
    # Ïö∞ÏÑ† ./dataset/test/TEST_*.csv Ï∞æÍ≥†, ÏóÜÏúºÎ©¥ /mnt/data/TEST_*.csv
    for root in ROOT_CANDIDATES:
        pats = sorted(glob.glob(os.path.join(root, "test", "TEST_*.csv")))
        if pats:
            return pats
    pats = sorted(glob.glob("/mnt/data/TEST_*.csv"))
    return pats

CAP_MULT = 1.4                 # ÏÉÅÌïú Ïó¨Ïú† Î∞∞Ïàò
ENSEMBLE_NAIVE_W = 0.50  # Î™®Îç∏:(1-Œ±)=0.50, ÎÇòÏù¥Î∏å:Œ±=0.50  (Í∂åÏû• ÌÉêÏÉâ 0.2~0.5)
SUBMISSION_ROUND_INT = True    # Í∑úÏ†ïÏù¥ Ï†ïÏàò ÌïÑÏàòÎ©¥ True Ïú†ÏßÄ
SMALL_VALUE_CUTOFF  = 0.0      # 0.9 Îì±ÏúºÎ°ú ÎëêÎ©¥ Í∑∏ ÎØ∏ÎßåÏùÄ 0 Í∞ïÏ†ú
FOLD_ENSEMBLE = True           # Ìè¥Îìú ÏïôÏÉÅÎ∏î Ï∂îÎ°† ÌôúÏÑ±Ìôî

# Loss Í∞ÄÏ§ëÏπò(Ïõê-Ïä§ÏºÄÏùº sMAPE Ï§ëÏã¨ + log-MAE Î≥¥Í∞ï + 0-overshoot Ìå®ÎÑêÌã∞)
SPLIT_OBJECTIVE = "SMAPE"   # Í∏∞Ï°¥ LEADERBOARD_OBJECTIVEÏôÄ ÏùòÎØ∏ ÎèôÏùº
SMAPE_WEIGHT    = 0.7
MAE_WEIGHT      = 0.0       # zero-heavy Îç∞Ïù¥ÌÑ∞Î©¥ Ïõê-MAE ÎπÑÏ§ëÏùÄ ÎÇÆÏ∂îÎäî Í≤å sMAPEÏóê Ïú†Î¶¨
LOG_MAE_WEIGHT  = 0.3       # log-space ÏïàÏ†ïÌôî(Ï†ÄÏàòÎüâ/Ï†úÎ°ú Í∑ºÏ≤ò ÏßÑÎèô ÏñµÏ†ú)
SMAPE_EPS       = 1e-6      # sMAPE Î∂ÑÎ™® ÏïàÏ†ïÌôîÏö©(ÏõêÌïúÎã§Î©¥ 1e-5~1e-4Î°ú ÏÉÅÌñ• ÌÖåÏä§Ìä∏)

# y_true==0Ïùº Îïå ÏñëÏàò ÏòàÏ∏°(overshoot)Ïóê ÎåÄÌïú Î≥ÑÎèÑ Ìå®ÎÑêÌã∞(ÏûëÍ≤åÎùºÎèÑ ÏñëÏàò Ï∞çÎäî ÏäµÏÑ± ÏñµÏ†ú)
ZERO_OVERSHOOT_PENALTY = 0.25   # Œª_zero (0.15~0.5 Í∂åÏû• Î≤îÏúÑ)

# EarlyStopping Í≥µÌÜµ ÏÑ§Ï†ï(Ïù¥ÎØ∏ Ïì∞ÏÖ®Îã§Î©¥ Í∑∏ÎåÄÎ°ú ÎëêÏÖîÎèÑ Îê©ÎãàÎã§)
EARLY_STOP_PATIENCE = 6  # CV/Final Î™®Îëê ÎèôÏùºÌïòÍ≤å ÏÇ¨Ïö©

# Ï∂îÎ°† Îã®Í≥Ñ(Î¶¨ÎçîÎ≥¥Îìú ÏßÅÍ≤∞) ÏïàÏ†ÑÏû•Ïπò
USE_INT_ROUND      = False   # Ï†úÏ∂úÏù¥ Ï†ïÏàò ÌïÑÏàò ÏïÑÎãàÎùºÍ≥† ÌïòÏÖ®ÏúºÎØÄÎ°ú Í∏∞Î≥∏ False Í∂åÏû•
CUT_THRESHOLD      = None    # Ïù¥ÌïòÎ©¥ 0ÏúºÎ°ú Ïª∑(0.7~1.0 ÏÇ¨Ïù¥ ÌÉêÏÉâ)
ZERO_RUN_GUARD_DAYS= 0      # ÏßÅÏ†Ñ KÏùº Ìï©Ïù¥ 0Ïù¥Î©¥ ÎØ∏Îûò 7Ïùº Ï†ÑÎ∂Ä 0 Í∞ïÏ†ú

In [32]:
# ----------  # <CELL: io & features>

def load_train_df():
    p = find_train_csv()
    print("Using train.csv:", p)
    df = pd.read_csv(p)
    # Í∏∞ÎåÄ Ïª¨Îüº: ÏòÅÏóÖÏùºÏûê, ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö, Îß§Ï∂úÏàòÎüâ
    df["date"] = pd.to_datetime(df["ÏòÅÏóÖÏùºÏûê"])
    df["sales"] = pd.to_numeric(df["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0)
    df.loc[df["sales"] < 0, "sales"] = 0
    s = df["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"].astype(str).str.split("_", n=1, expand=True)
    df["store_name"] = s[0]; df["menu_name"] = s[1]
    df["store_menu"] = df["store_name"] + "_" + df["menu_name"]
    return df

def fit_or_load_label_encoder(series: pd.Series) -> LabelEncoder:
    if os.path.exists(LE_PATH):
        with open(LE_PATH, "rb") as f:
            le = pickle.load(f)
        new = sorted(set(series.astype(str)) - set(le.classes_))
        if new:
            le.classes_ = np.array(list(le.classes_) + list(new))
            # ‚òÖ ÌôïÏû• Ïãú Ï¶âÏãú Ï†ÄÏû•
            with open(LE_PATH, "wb") as f:
                pickle.dump(le, f)
    else:
        le = LabelEncoder().fit(series.astype(str))
        with open(LE_PATH, "wb") as f:
            pickle.dump(le, f)
    return le

from holidays import country_holidays

def add_time_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()  # ÏõêÎ≥∏ Î≥¥Ï°¥
    df["weekday"] = df["date"].dt.dayofweek
    df["is_weekend"] = df["weekday"].isin([5, 6]).astype(int)
    df["month"] = df["date"].dt.month
    df["is_ski_season"] = df["month"].isin([12, 1, 2]).astype(int)

    years = sorted(df["date"].dt.year.unique().tolist())
    kr = set(country_holidays("KR", years=years))  # membership Í≤ÄÏÇ¨ Îπ†Î•¥Í≤å
    df["is_holiday"] = df["date"].dt.date.map(lambda d: int(d in kr)).astype(int)

    df["weekday_sin"] = np.sin(2 * np.pi * df["weekday"] / 7.0)
    df["weekday_cos"] = np.cos(2 * np.pi * df["weekday"] / 7.0)
    df["month_sin"]   = np.sin(2 * np.pi * (df["month"] - 1) / 12.0)
    df["month_cos"]   = np.cos(2 * np.pi * (df["month"] - 1) / 12.0)
    return df


def finalize_columns(df: pd.DataFrame, le: LabelEncoder) -> pd.DataFrame:
    out = df.copy()
    # --- Í∏∞Ï°¥ sales/sales_log/Ï†ïÎ†¨/Ï£ºÏ∞® Î°úÏßÅ Í∑∏ÎåÄÎ°ú ---
    if "sales" not in out.columns:
        if "Îß§Ï∂úÏàòÎüâ" in out.columns:
            out["sales"] = pd.to_numeric(out["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0)
        else:
            out["sales"] = 0
    out.loc[out["sales"] < 0, "sales"] = 0
    out["sales_log"] = np.log1p(out["sales"])

    out["store_menu_id"] = le.transform(out["store_menu"].astype(str))

    # ADD ‚Üì Ï†ïÏ†Å Ïπ¥ÌÖåÍ≥†Î¶¨Î°ú Ïì∏ store_id ÏÉùÏÑ±
    store_le = fit_or_load_store_le(out["store_name"])
    out["store_id"] = store_le.transform(out["store_name"].astype(str))

    out = out.sort_values(["store_menu_id", "date"]).reset_index(drop=True)
    out["week_idx"] = ((out["date"] - out["date"].min()).dt.days // 7)
    return out

def build_item_caps_from_original():
    # Î∂àÏó∞ÏÜç Î≥¥Í∞ï Ï†ÑÏùò ÏõêÎ≥∏ Î∂ÑÌè¨ Í∏∞Î∞ò(ÏñëÏàòÎßå)ÏúºÎ°ú Í≤¨Í≥†Ìïú ÏÉÅÌïú Í≥ÑÏÇ∞
    orig = load_train_df()  # ÏõêÎ≥∏ Î°úÎìú
    orig["date"] = pd.to_datetime(orig["ÏòÅÏóÖÏùºÏûê"])
    orig["Îß§Ï∂úÏàòÎüâ"] = pd.to_numeric(orig["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0)
    pos = orig[orig["Îß§Ï∂úÏàòÎüâ"] > 0].copy()
    if pos.empty:
        return {}
    def robust_cap(g):
        a = g["Îß§Ï∂úÏàòÎüâ"].to_numpy()
        q95 = np.quantile(a, 0.95)
        r = g.sort_values("date").tail(90)["Îß§Ï∂úÏàòÎüâ"].to_numpy()
        r_q99 = np.quantile(r, 0.99) if r.size else q95
        return max(q95, r_q99)
    return pos.groupby("ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö").apply(robust_cap).to_dict()

def enforce_regular_daily(df: pd.DataFrame) -> pd.DataFrame:
    """
    ÏïÑÏù¥ÌÖú(store_menu)Î≥Ñ Í¥ÄÏ∏° Íµ¨Í∞Ñ[min(date)..max(date)]ÏùÑ D(Ïùº) Í∑∏Î¶¨ÎìúÎ°ú Í∞ïÏ†ú.
    ÎàÑÎùΩÎêú ÎÇ†ÏùÄ sales=0 ÏúºÎ°ú Î≥¥Í∞ï ‚Üí Î∂àÏó∞ÏÜç ÏãúÌÄÄÏä§ Ï†úÍ±∞.
    """
    outs = []
    for key, g in df.groupby("store_menu", sort=False):
        g = g.sort_values("date")
        full_idx = pd.date_range(g["date"].min(), g["date"].max(), freq="D")
        g2 = g.set_index("date").reindex(full_idx)
        g2.index.name = "date"

        # ÏãùÎ≥ÑÏûê/Î¨∏ÏûêÏó¥ Ïª¨Îüº Ïú†ÏßÄ
        for c in ["store_name", "menu_name", "store_menu", "ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"]:
            if c in g2.columns:
                g2[c] = g[c].iloc[0]
        # ÏàòÏπò Î≥¥Í∞ï
        if "sales" in g2.columns:
            g2["sales"] = pd.to_numeric(g2["sales"], errors="coerce").fillna(0)
        if "Îß§Ï∂úÏàòÎüâ" in g2.columns:
            g2["Îß§Ï∂úÏàòÎüâ"] = pd.to_numeric(g2["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0)

        outs.append(g2.reset_index())
    return pd.concat(outs, ignore_index=True)

def fit_or_load_store_le(series: pd.Series) -> LabelEncoder:
    if os.path.exists(STORE_LE_PATH):
        with open(STORE_LE_PATH, "rb") as f:
            le = pickle.load(f)
        new = sorted(set(series.astype(str)) - set(le.classes_))
        if new:
            le.classes_ = np.array(list(le.classes_) + list(new))
            with open(STORE_LE_PATH, "wb") as f:
                pickle.dump(le, f)
    else:
        le = LabelEncoder().fit(series.astype(str))
        with open(STORE_LE_PATH, "wb") as f:
            pickle.dump(le, f)
    return le

In [None]:
# ----------  # <CELL: helpers for inference>  (NEW)

def leftpad_to_context(g: pd.DataFrame, context_len: int, store_menu: str) -> pd.DataFrame:
    """
    Îã®Ïùº ÏïÑÏù¥ÌÖú g(date Ï†ïÎ†¨Îêú DF)Ïóê ÎåÄÌï¥ Í∏∏Ïù¥Í∞Ä context_lenÎ≥¥Îã§ ÏßßÏúºÎ©¥
    ÏôºÏ™Ω(Í≥ºÍ±∞)ÏúºÎ°ú Ï†úÎ°úÌå®Îî©ÏùÑ Î∂ôÏó¨ Ï†ïÌôïÌûà context_lenÏùÑ ÎßûÏ∂òÎã§.
    Í≥µÎ≥ÄÎüâÎèÑ Ï†ïÏÉÅ ÏÉùÏÑ±ÎêòÎèÑÎ°ù add_time_features Ìò∏Ï∂ú.
    """
    g = g.sort_values("date").copy()
    n = len(g)
    if n >= context_len:
        return g

    need = context_len - n
    pad_end = g["date"].min() - pd.Timedelta(days=1)
    pad_dates = pd.date_range(end=pad_end, periods=need, freq="D")

    store, menu = store_menu.split("_", 1)
    pad = pd.DataFrame({
        "date": pad_dates,
        "ÏòÅÏóÖÏùºÏûê": pad_dates,
        "store_name": store,
        "menu_name": menu,
        "store_menu": store_menu,
        "ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö": store_menu,
        "Îß§Ï∂úÏàòÎüâ": 0,
        "sales": 0,
    })
    pad = add_time_features(pad)
    g2 = pd.concat([pad, g], ignore_index=True)
    return g2

def _naive_last7(g: pd.DataFrame) -> np.ndarray:
    """ÏµúÍ∑º 7Ïùº ÌèâÍ∑†ÏùÑ 7ÏùºÎ°ú Î≥µÏ†úÌïòÎäî Î≥¥ÏàòÏ†Å naive."""
    v = pd.to_numeric(g["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0).to_numpy()
    if len(v) == 0:
        return np.zeros(PRED_LEN, dtype=float)
    tail = v[-7:] if len(v) >= 7 else v
    m = float(tail.mean())
    return np.full(PRED_LEN, m, dtype=float)

def _naive_same_dow(g: pd.DataFrame) -> np.ndarray:
    """ÏµúÍ∑º ÏµúÎåÄ 4Ï£º(28Ïùº)ÏóêÏÑú ÏöîÏùºÎ≥Ñ ÌèâÍ∑†ÏùÑ Ïç®ÏÑú 7Ïùº ÏòàÏ∏°."""
    v = pd.to_numeric(g["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0).to_numpy()
    if len(v) < 7:
        return np.zeros(PRED_LEN, dtype=float)
    n = min(28, len(v))
    #n = min(21, len(v))    #21Ïùº 
    tail = v[-n:]
    k = n // 7
    tail = tail[-(k*7):]  # 7Ïùò Î∞∞ÏàòÎ°ú ÎßûÏ∂§
    if k == 0:
        return np.zeros(PRED_LEN, dtype=float)
    arr = tail.reshape(k, 7)
    mean_dow = arr.mean(axis=0)  # (7,)
    return mean_dow.astype(float)

def _blend_with_naive(yhat: np.ndarray, g: pd.DataFrame, alpha: float | None = None) -> np.ndarray:
    """Î™®Îç∏ ÏòàÏ∏° yhat(7,)Í≥º 'ÏµúÍ∑º 7Ïùº Í∑∏ÎåÄÎ°ú' ÎÇòÏù¥Î∏åÎ•º ÏÑûÎäîÎã§."""
    a = ENSEMBLE_NAIVE_W if alpha is None else float(alpha)
    naive = g["sales"].astype(float).tail(PRED_LEN).to_numpy()
    if naive.shape[0] < PRED_LEN:
        naive = np.pad(naive, (PRED_LEN - naive.shape[0], 0), constant_values=0.0)
    return (1.0 - a) * yhat + a * naive


def _zero_run_guard(g: pd.DataFrame, yhat: np.ndarray) -> np.ndarray:
    """ÏµúÍ∑º ZERO_RUN_GUARD_DAYSÍ∞Ä Ï†ÑÎ∂Ä 0Ïù¥Î©¥ ÎØ∏Îûò 7Ïùº 0ÏúºÎ°ú Í∞ÄÎìú."""
    v = pd.to_numeric(g["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0).to_numpy()
    if ZERO_RUN_GUARD_DAYS > 0 and len(v) >= ZERO_RUN_GUARD_DAYS:
        if v[-ZERO_RUN_GUARD_DAYS:].sum() == 0:
            return np.zeros_like(yhat, dtype=float)
    return yhat


In [34]:
# ----------  # <CELL: dataset builders>
import inspect

DEBUG_DATASET_SIG = False

ID_COLS = ["store_menu_id"]
TIME_COL = "date"
TARGET_COLS = ["sales_log"]
KNOWN_REAL_COLS = [
    "is_holiday", "is_weekend", "is_ski_season",
    "weekday_sin", "weekday_cos", "month_sin", "month_cos",
]

def build_dataset(
    df_split: pd.DataFrame,
    context_len: int | None = None,
    prediction_len: int | None = None,
    known_real_cols: list | None = None,
) -> ForecastDFDataset:
    """
    ForecastDFDataset ÏÉùÏÑ±Í∏∞ (Î≤ÑÏ†Ñ Ìò∏Ìôò + Îü∞ÌÉÄÏûÑ Ïò§Î≤ÑÎùºÏù¥Îìú ÏßÄÏõê)
    - context_len / prediction_len / known_real_cols Î•º Ìò∏Ï∂úÎ∂ÄÏóêÏÑú ÎçÆÏñ¥Ïì∏ Ïàò ÏûàÏùå
    """
    # Í∏∞Î≥∏Í∞í: Í∏ÄÎ°úÎ≤å ÏÑ§Ï†ï ÏÇ¨Ïö©
    context_len    = CONTEXT_LEN if context_len is None else int(context_len)
    prediction_len = PRED_LEN    if prediction_len is None else int(prediction_len)
    known_real_cols = KNOWN_REAL_COLS if known_real_cols is None else list(known_real_cols)

    sig = inspect.signature(ForecastDFDataset.__init__)
    params = set(sig.parameters.keys())
    kwargs = {}

    CTX = int(context_len) if context_len is not None else int(CONTEXT_LEN)
    PRED = int(prediction_len) if prediction_len is not None else int(PRED_LEN)

    # Í∏∏Ïù¥Îì§
    if "context_length" in params:
        kwargs["context_length"] = context_len
    elif "context_len" in params:
        kwargs["context_len"] = context_len

    if "prediction_length" in params:
        kwargs["prediction_length"] = prediction_len
    elif "prediction_len" in params:
        kwargs["prediction_len"] = prediction_len

    if "stride" in params:
        kwargs["stride"] = DATA_STRIDE
    if "enable_padding" in params:
        kwargs["enable_padding"] = False

    # id / time / target
    for k in ["id_columns", "id_cols", "group_ids", "ids"]:
        if k in params:
            kwargs[k] = ID_COLS
            break

    for k in ["timestamp_column", "time_column", "time_col", "timestamp_col"]:
        if k in params:
            kwargs[k] = TIME_COL
            break

    for k in ["target_columns", "target_col", "target", "targets"]:
        if k in params:
            kwargs[k] = TARGET_COLS
            break

    # ÎèôÏ†Å Ïã§Ïàò ÌîºÏ≤ò (Ï±ÑÎÑê ÏàòÎ•º Î∞îÍøîÏïº Ìï† Îïå Ïó¨Í∏∞Î°ú Ï†úÏñ¥)
    if "observable_columns" in params:
        kwargs["observable_columns"] = known_real_cols
    else:
        for alt in ["control_columns", "conditional_columns", "categorical_columns"]:
            if alt in params:
                kwargs[alt] = known_real_cols
                break

    # Ï†ïÏ†Å Î≤îÏ£º ÌîºÏ≤òÎ°ú store_id Ï£ºÏûÖ (ÏßÄÏõêÎêòÎäî ÌååÎùºÎØ∏ÌÑ∞ Î™ÖÏóêÎßå ÎÑ£Í∏∞)
    if "static_categorical_columns" in params:
        kwargs["static_categorical_columns"] = ["store_id"]
    elif "static_features" in params:  # ÌòπÏãú Îã§Î•∏ Ïù¥Î¶ÑÏùÑ Ïì∞Îäî Î≤ÑÏ†Ñ ÎåÄÎπÑ
        kwargs["static_features"] = ["store_id"]

    if "num_workers" in params:
        kwargs["num_workers"] = 0

    if DEBUG_DATASET_SIG:
        print("[ForecastDFDataset accepted params]", sorted(params))
        print("[ForecastDFDataset kwargs]", kwargs)

    return ForecastDFDataset(df_split, **kwargs)

In [35]:
# ----------  # <CELL: collator>  (Insert)
import numpy as np
import pandas as pd
from transformers.data.data_collator import default_data_collator

# Î∞∞ÏπòÏóêÏÑú ÎÇ†Ïßú¬∑ÌÉÄÏûÑÏä§ÌÉ¨ÌîÑÎ•º ÏïàÏ†ÑÌïòÍ≤å Ï≤òÎ¶¨(Ï†úÍ±∞/Ï†ïÏàòÌôî)
_DROP_KEYS_EXACT = {"date", "time", "start", "end", "target_start"}
_DROP_KEYS_SUBSTR = {"timestamp"}  # ÌÇ§ Ïù¥Î¶ÑÏóê 'timestamp'Í∞Ä Îì§Ïñ¥Í∞ÄÎ©¥ Ï†úÍ±∞

def _to_int_ts(x):
    # pandas.Timestamp -> int64 (Ï¥à Îã®ÏúÑ)
    return np.int64(x.value // 1_000_000_000)

def _sanitize_feature_dict(feat: dict):
    out = {}
    for k, v in feat.items():
        kl = k.lower()
        if kl in _DROP_KEYS_EXACT or any(sub in kl for sub in _DROP_KEYS_SUBSTR):
            # Î™®Îç∏ ÏûÖÎ†•Ïù¥ ÏïÑÎãå ÎÇ†Ïßú Î©îÌÉÄÎäî Ï†úÍ±∞
            continue

        # Í∞úÎ≥Ñ Timestamp
        if isinstance(v, pd.Timestamp):
            out[k] = _to_int_ts(v)
            continue

        # Î¶¨Ïä§Ìä∏Ïóê Timestamp Ìè¨Ìï®
        if isinstance(v, list) and v and isinstance(v[0], pd.Timestamp):
            out[k] = np.array([_to_int_ts(t) for t in v], dtype=np.int64)
            continue

        # pandas Series -> numpy
        if isinstance(v, pd.Series):
            if np.issubdtype(v.dtype, np.datetime64):
                out[k] = v.view("i8") // 1_000_000_000
            else:
                out[k] = v.to_numpy()
            continue

        # numpy datetime64 Î∞∞Ïó¥
        if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.datetime64):
            out[k] = v.view("i8") // 1_000_000_000
            continue

        # Í∑∏ Ïô∏(torch.Tensor/np.ndarray/ÏàòÏπòÌòï/Î¶¨Ïä§Ìä∏ Îì±)Îäî Í∑∏ÎåÄÎ°ú
        out[k] = v
    return out

def ts_data_collator(features):
    cleaned = [_sanitize_feature_dict(f) for f in features]
    return default_data_collator(cleaned)

In [36]:
# ----------  # <CELL: model>
import inspect
import torch
from torch import nn
from transformers import PatchTSTConfig, PatchTSTForPrediction

LEADERBOARD_OBJECTIVE = "SMAPE"

def _smape_torch(y_true, y_pred, eps=SMAPE_EPS):
    num = torch.abs(y_pred - y_true)
    den = (torch.abs(y_true) + torch.abs(y_pred)).clamp_min(eps)
    return 2.0 * (num / den)

def _log_mae_torch(y_true_log, y_pred_log):
    # Î°úÍ∑∏ Í≥µÍ∞Ñ MAE: Ï†ÄÏàòÎüâ/Ï†úÎ°ú Í∑ºÏ≤òÏóêÏÑú ÏïàÏ†ïÌôî
    return torch.abs(y_pred_log - y_true_log)

def _zero_overshoot_penalty_torch(y_true, y_pred):
    # y_true==0ÏóêÏÑú ÏñëÏàò ÏòàÏ∏° ÏûêÏ≤¥Ïóê ÏÑ†Ìòï Ìå®ÎÑêÌã∞ (ÎÑàÎ¨¥ Í∞ïÌïòÏßÄ ÏïäÍ≤å ÌèâÍ∑†)
    mask = (y_true <= 1e-9).float()
    return (mask * y_pred.clamp(min=0.0)).mean()

def _mae_torch(y_true, y_pred):
    return torch.abs(y_pred - y_true)

def _reduce_mean(x):
    return torch.mean(x)

def _choose_loss_weights(obj: str):
    # -> Í∏∞Ï°¥ Ìï®Ïàò ÌôïÏû•: w_logmae, w_zero Ï∂îÍ∞Ä
    obj = (obj or "").upper()
    if obj == "SMAPE":
        return dict(
            w_smape=SMAPE_WEIGHT,
            w_mae=MAE_WEIGHT,
            w_logmae=LOG_MAE_WEIGHT,
            w_zero=ZERO_OVERSHOOT_PENALTY
        )
    elif obj == "MAE":
        return dict(w_smape=0.0, w_mae=1.0, w_logmae=0.0, w_zero=0.0)
    else:
        return dict(w_smape=0.4, w_mae=0.4, w_logmae=0.2, w_zero=0.0)

class PatchTSTSalesOnly(nn.Module):
    """
    - Ïõê Ïä§ÏºÄÏùº ÏÜêÏã§(SMAPE Ï§ëÏã¨) + 0-ÏπúÌôî Î≥¥Ï†ï:
      ¬∑ y_true=0 ÌÉÄÏûÑÏä§ÌÖù Îã§Ïö¥Ïõ®Ïù¥Ìä∏
      ¬∑ Ï†ÑÎ∂Ä0 ÏúàÎèÑÏö∞ Îã§Ïö¥Ïõ®Ïù¥Ìä∏
      ¬∑ log-MAE Î≥¥Ï°∞Ìï≠
      ¬∑ zero-overshoot penalty(0Ïóê ÏñëÏàò ÏòàÏ∏° ÏñµÏ†ú)
      ¬∑ (ÏòµÏÖò) ÌäπÏ†ï Îß§Ïû• Í∞ÄÏ§ë(Ïòà: ÎØ∏ÎùºÏãúÏïÑ/Îã¥Ìïò)
    """
    def __init__(self, base_model: PatchTSTForPrediction, target_ch: int = 0,
                 objective: str = LEADERBOARD_OBJECTIVE,
                 special_store_ids: set[int] | None = None):
        super().__init__()
        self.base = base_model
        self.target_ch = target_ch
        self._allowed = set(inspect.signature(self.base.forward).parameters.keys())
        self.loss_w = _choose_loss_weights(objective)

        # Í∞ÄÏ§ë ÌååÎùºÎØ∏ÌÑ∞
        self.w_zero    = 1.10   # y_true==0 Íµ¨Í∞Ñ Í∞ÄÏ§ë(Í≥ºÎåÄÏòàÏ∏° ÏñµÏ†ú)
        self.w_all0w   = 1.00   # Ï†ÑÏ≤¥ ÏúàÎèÑÏö∞Í∞Ä 0Ïùº Îïå Í∞ÄÏ§ë
        self.w_logmae  = 0.03   # Î°úÍ∑∏-MAE Ìï≠ Í∞ÄÏ§ë
        self.w_ovr     = 0.02   # overshoot(0ÏóêÏÑú ÏñëÏàò ÏòàÏ∏°) Ìå®ÎÑêÌã∞ Í∞ÄÏ§ë
        self.special_store_ids = set(special_store_ids or [])

    def _filter_and_bridge(self, batch: dict):
        cleaned = {}
        for k, v in batch.items():
            if k in self._allowed:
                cleaned[k] = v
        if "labels" in self._allowed and "labels" not in cleaned and "future_values" in batch and "future_values" not in self._allowed:
            cleaned["labels"] = batch["future_values"]
        if "observed_mask" in self._allowed and "observed_mask" not in cleaned:
            po = batch.get("past_observed_mask", None)
            fo = batch.get("future_observed_mask", None)
            if po is not None and fo is not None:
                try: cleaned["observed_mask"] = torch.cat([po, fo], dim=-1)
                except Exception: cleaned["observed_mask"] = po
            elif po is not None:
                cleaned["observed_mask"] = po
        return cleaned

    def forward(self, **batch):
        cleaned = self._filter_and_bridge(batch)
        out = self.base(**cleaned)

        pred = getattr(out, "prediction", None)
        if pred is not None and pred.dim() == 3:
            try: pred = pred[:, self.target_ch, :]
            except Exception: pass

        labels = cleaned.get("labels", None)
        if pred is not None and labels is not None:
            eps   = 1e-6
            yhat  = torch.expm1(pred).clamp_min(0)
            ytrue = torch.expm1(labels).clamp_min(0)

            w_pos  = (ytrue > 0).float()
            w_zero = 1.0 - w_pos
            w_t = self.w_zero * w_zero + 1.0 * w_pos

            all0 = (ytrue.sum(dim=-1, keepdim=True) == 0).float()
            w_w  = self.w_all0w * all0 + 1.0 * (1.0 - all0)

            w_s = torch.ones_like(ytrue)
            scf = cleaned.get("static_categorical_features", None)
            if scf is None:
                scf = batch.get("static_categorical_features", None)
            if scf is None:
                scf = batch.get("static_features", None)  # ÏùºÎ∂Ä Íµ¨ÌòÑÏ≤¥ Ìò∏Ìôò

            if scf is not None:
                sid = scf.squeeze(-1) if scf.dim()==2 else scf  # (B,)
                if self.special_store_ids:
                    m = torch.zeros_like(sid, dtype=torch.float32)
                    for s in self.special_store_ids:
                        m = m + (sid == s).float()
                    m = (m > 0).float().unsqueeze(-1).expand_as(ytrue)
                    w_s = torch.where(m>0, torch.tensor(2.0, device=ytrue.device), torch.tensor(1.0, device=ytrue.device))

            W = w_t * w_w * w_s

            smape    = _smape_torch(ytrue, yhat)
            mae      = _mae_torch(ytrue, yhat)
            log_mae  = torch.abs(torch.log1p(ytrue+eps) - torch.log1p(yhat+eps))
            overshot = torch.relu(yhat) * (ytrue == 0).float()

            w = self.loss_w
            loss = 0.0
            if w["w_smape"]>0: loss += w["w_smape"] * torch.sum(W*smape) / (W.sum()+eps)
            if w["w_mae"]  >0: loss += w["w_mae"]   * torch.sum(W*mae)   / (W.sum()+eps)
            loss += self.w_logmae * torch.sum(W*log_mae) / (W.sum()+eps)
            loss += self.w_ovr    * torch.sum(W*overshot) / (W.sum()+eps)

            out.loss = loss

        if pred is not None:
            out.prediction = pred
        return out

def make_model():
    config = PatchTSTConfig(
        num_input_channels=1 + len(KNOWN_REAL_COLS),
        context_length=CONTEXT_LEN,
        prediction_length=PRED_LEN,
        patch_length=PATCH_LEN,
        patch_stride=PATCH_STRIDE,
        d_model=256,
        num_attention_heads=16,
        num_hidden_layers=4,
        ffn_dim=512,
        dropout=0.10,
        head_dropout=0.10,
        scaling="std",
        loss="mse",
    )
    base = PatchTSTForPrediction(config)
    return PatchTSTSalesOnly(base, target_ch=0, objective=SPLIT_OBJECTIVE)

In [37]:
# ----------  # <CELL: metrics>  (NEW)

def _flatten_any(x):
    if isinstance(x, (list, tuple)):
        out = []
        for e in x:
            out.extend(_flatten_any(e))
        return out
    return [np.asarray(x)]

def _extract_matrix_from_any(raw, pred_len: int, target_ch: int = 0):
    arrs = _flatten_any(raw)
    nlc, ncl = [], []
    for a in arrs:
        a = np.asarray(a)
        if a.ndim == 3:
            if a.shape[1] == pred_len:   # (N, L, C)
                nlc.append(a)
            elif a.shape[2] == pred_len: # (N, C, L)
                ncl.append(a)
    if nlc:
        a = nlc[0]; ch = target_ch if a.shape[2] > target_ch else 0
        return a[:, :, ch]
    if ncl:
        a = ncl[0]; ch = target_ch if a.shape[1] > target_ch else 0
        return a[:, ch, :]
    for a in arrs:
        a = np.asarray(a)
        if a.ndim == 2 and a.shape[1] == pred_len:
            return a
    raise ValueError(f"cannot shape to (N,{pred_len})")

def _smape_np(y, yhat, eps=1e-6):
    num = np.abs(yhat - y)
    den = (np.abs(y) + np.abs(yhat) + eps)
    return 2.0 * (num / den)

def compute_metrics(eval_pred):
    # Î°úÍ∑∏ ‚Üí Ïõê Ïä§ÏºÄÏùº
    Yhat_log = _extract_matrix_from_any(eval_pred.predictions, pred_len=PRED_LEN, target_ch=0)
    Ylbl_log = _extract_matrix_from_any(eval_pred.label_ids,   pred_len=PRED_LEN, target_ch=0)

    yhat = np.clip(np.expm1(Yhat_log), 0, None)
    ytrue = np.clip(np.expm1(Ylbl_log), 0, None)

    mae   = float(np.mean(np.abs(yhat - ytrue)))
    rmse  = float(np.sqrt(np.mean((yhat - ytrue) ** 2)))
    smape = float(np.mean(_smape_np(ytrue, yhat)))

    return {"mae": mae, "rmse": rmse, "smape": smape}

In [38]:
# ----------  # <CELL: training args>
import inspect

USE_BF16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
OPTIM_NAME = "adamw_torch_fused" if torch.cuda.is_available() else "adamw_torch"

# 1) ÌòÑÏû¨ TrainingArgumentsÍ∞Ä Ïñ¥Îñ§ ÌååÎùºÎØ∏ÌÑ∞Î•º Î∞õÎäîÏßÄ ÌôïÏù∏
sig = inspect.signature(TrainingArguments.__init__)
PARAMS = set(sig.parameters.keys())

# 2) Í≥µÌÜµ(Î™®Îì† Î≤ÑÏ†ÑÏóêÏÑú Î¨∏Ï†úÏóÜÎäî) Í∏∞Î≥∏ kwargs
kw = dict(
    output_dir=SAVE_DIR,
    overwrite_output_dir=True,

    num_train_epochs=30,
    do_eval=True,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    label_names=["future_values"],
    remove_unused_columns=False,

    dataloader_pin_memory=True,
    report_to="none",
)

# 3) Î≤ÑÏ†ÑÎ≥Ñ ÏòµÏÖòÏùÑ "ÏûàÏùÑ ÎïåÎßå" Ï∂îÍ∞Ä
if "evaluation_strategy" in PARAMS:
    kw["evaluation_strategy"] = "epoch"
elif "eval_strategy" in PARAMS:
    kw["eval_strategy"] = "epoch"

if "dataloader_num_workers" in PARAMS:
    kw["dataloader_num_workers"] = (4 if os.name != "nt" else 0)
    # dataloader_num_workers : Î¶¨ÎàÖÏä§/WSL : 4 / 8, Windows : 0 Ïú†ÏßÄ

if "dataloader_persistent_workers" in PARAMS:
    kw["dataloader_persistent_workers"] = (os.name != "nt")

if "tf32" in PARAMS:
    kw["tf32"] = True

if "bf16" in PARAMS:
    kw["bf16"] = USE_BF16

if "fp16" in PARAMS:
    kw["fp16"] = not USE_BF16

if "optim" in PARAMS:
    kw["optim"] = OPTIM_NAME

if "eval_accumulation_steps" in PARAMS:
    kw["eval_accumulation_steps"] = 32


if "learning_rate" in PARAMS:
    kw["learning_rate"] = 4e-4
    # 3e-4~1e-3 Í∂åÏó≠; 5e-4 Î¨¥ÎÇú / ÏßÄÍ∏à ÏÑ∏ÌåÖ(Î∞∞Ïπò 64, AdamW-fused, warmup_ratio=0.05, stride=1) Í∏∞Ï§Ä learning_rate=4e-4Í∞Ä 1Ï∞® Í∂åÏû•Í∞íÏûÖÎãàÎã§.
if "weight_decay" in PARAMS:
    kw["weight_decay"]  = 0.01
if "warmup_ratio" in PARAMS:
    kw["warmup_ratio"]  = 0.05

# ÏÑ†ÌÉù ÏßÄÌëú: SMAPEÎ°ú Í≥†Ï†ï
select_metric = "eval_smape"
kw["load_best_model_at_end"] = True
kw["metric_for_best_model"]  = select_metric
kw["greater_is_better"]      = False

# 4) ÏµúÏ¢Ö ÏÉùÏÑ±
training_args = TrainingArguments(**kw)
print("[TrainingArguments OK]")
print("Accepted keys:", sorted(kw.keys()))

EARLY_STOP_PATIENCE = 6

[TrainingArguments OK]
Accepted keys: ['bf16', 'dataloader_num_workers', 'dataloader_persistent_workers', 'dataloader_pin_memory', 'do_eval', 'eval_accumulation_steps', 'evaluation_strategy', 'fp16', 'greater_is_better', 'label_names', 'learning_rate', 'load_best_model_at_end', 'metric_for_best_model', 'num_train_epochs', 'optim', 'output_dir', 'overwrite_output_dir', 'per_device_eval_batch_size', 'per_device_train_batch_size', 'remove_unused_columns', 'report_to', 'save_strategy', 'save_total_limit', 'tf32', 'warmup_ratio', 'weight_decay']




In [39]:
# ----------  # <CELL: callbacks (anchor eval)>
class RotateEvalAnchors(TrainerCallback):
    """Îß§ epoch ÌèâÍ∞Ä ÏÖãÏùÑ 7Ïùº Í∞ÑÍ≤© ÏÑúÎ∏åÏÉòÌîåÎ°ú ÍµêÏ≤¥(ÏÜçÎèÑ‚Üë, ÏÑ±Îä• ÏòÅÌñ• ÏóÜÏùå)."""
    def __init__(self, trainer_ref, full_eval_ds, step: int = ANCHOR_STEP):
        self.trainer = trainer_ref
        self.full_eval_ds = full_eval_ds
        self.step = step
    def on_epoch_begin(self, args, state, control, **kwargs):
        ep = int(state.epoch) if state.epoch is not None else 0
        off = ep % self.step
        idx = list(range(off, len(self.full_eval_ds), self.step))
        if not idx:  # ‚òÖ ÏµúÏÜå 1Í∞ú Î≥¥Ïû•
            idx = [0]
        self.trainer.eval_dataset = Subset(self.full_eval_ds, idx)
        print(f"[RotateEvalAnchors] epoch={ep} offset={off} eval_size={len(idx)}")


In [40]:
# ----------  # <CELL: build dataframes>
raw = load_train_df()
raw = enforce_regular_daily(raw)

le  = fit_or_load_label_encoder(raw["store_menu"])
feat = add_time_features(raw)
df   = finalize_columns(feat, le)

N_WEEKS = int(df["week_idx"].max()) + 1
print(f"Rows={len(df)}, Items={df['store_menu_id'].nunique()}, Weeks={N_WEEKS}")

# REPLACE ‚Üì (ÏÉÅÌïú Í≥ÑÏÇ∞ÏùÑ ÏõêÎ≥∏ Í∏∞Î∞òÏúºÎ°ú ÍµêÏ≤¥)
ITEM_CAP = build_item_caps_from_original()

# ADD ‚Üì ÌäπÏ†ï Îß§Ïû• id ÏûêÎèô ÏàòÏßë(ÏòµÏÖò)
def collect_special_store_ids(df_: pd.DataFrame) -> set[int]:
    s = set()
    for nm, sid in df_[["store_name","store_id"]].drop_duplicates().itertuples(index=False, name=None):
        if ("ÎØ∏ÎùºÏãúÏïÑ" in nm) or ("Miracia" in nm) or ("Îã¥Ìïò" in nm) or ("Damha" in nm):
            s.add(int(sid))
    return s

SPECIAL_STORE_IDS = collect_special_store_ids(df)
print("Special store ids:", SPECIAL_STORE_IDS)

Using train.csv: ./dataset\train/train.csv
Rows=102676, Items=193, Weeks=76
Using train.csv: ./dataset\train/train.csv
Special store ids: {1, 3}


  return pos.groupby("ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö").apply(robust_cap).to_dict()


In [41]:
# ----------  # <CELL: cv split & run>
# Ïó∞ÏÜç Ï£º(week_idx) Í∏∞Ï§Ä K-Fold (Î∂àÍ∑†Îì± Î∂ÑÌï†ÎèÑ Ïª§Î≤Ñ)
def contiguous_week_folds(weeks_sorted, k):
    # np.array_splitÏúºÎ°ú Ïó∞ÏÜç Î∏îÎ°ù KÍ∞úÎ°ú ÎÇòÎàî
    return [list(chunk) for chunk in np.array_split(weeks_sorted, k)]

def make_masks_by_weeks(valid_weeks, all_weeks, purge_gap=1):
    valid_weeks = set(valid_weeks)
    if len(valid_weeks) == 0:
        raise ValueError("valid_weeks is empty.")
    min_w, max_w = min(valid_weeks), max(valid_weeks)
    # purge Î≤îÏúÑ: [min_w - gap, max_w + gap]
    purge_range = set([w for w in all_weeks if (min_w - purge_gap) <= w <= (max_w + purge_gap)])
    w_arr = df["week_idx"].values
    valid_mask = np.isin(w_arr, list(valid_weeks))
    purge_mask = np.isin(w_arr, list(purge_range))
    train_mask = (~valid_mask) & (~purge_mask)
    return train_mask, valid_mask

all_weeks_sorted = sorted(df["week_idx"].unique().tolist())
fold_weeks = contiguous_week_folds(all_weeks_sorted, K_FOLDS)

cv_metrics = []
for fold, v_weeks in enumerate(fold_weeks):
    tr_m, va_m = make_masks_by_weeks(v_weeks, all_weeks_sorted, purge_gap=PURGE_GAP_WEEKS)
    train_df = df.loc[tr_m].copy()
    valid_df = df.loc[va_m].copy()

    train_ds = build_dataset(train_df)
    valid_ds = build_dataset(valid_df)

    model = make_model()
    # ADD ‚Üì ÌäπÏàò Îß§Ïû• Í∞ÄÏ§ë ÏÇ¨Ïö©
    model.special_store_ids = SPECIAL_STORE_IDS

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=valid_ds,
        data_collator=ts_data_collator,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=EARLY_STOP_PATIENCE)],
    )
    trainer.add_callback(RotateEvalAnchors(trainer, valid_ds, step=ANCHOR_STEP))

    print(f"\n[CV] fold={fold} train_rows={len(train_df)} valid_rows={len(valid_df)} weeks={min(v_weeks)}..{max(v_weeks)}")
    trainer.train()
    trainer.eval_dataset = valid_ds
    m = trainer.evaluate()
    fold_dir = os.path.join(SAVE_DIR, f"fold_{fold}")
    trainer.save_model(fold_dir)
    m["fold"] = fold
    cv_metrics.append(m)
    print(f"[CV] fold={fold} metrics={m}")

cv_eval_loss = float(np.mean([m["eval_loss"] for m in cv_metrics]))
cv_smape = float(np.mean([m.get("eval_smape", np.nan) for m in cv_metrics]))
cv_mae   = float(np.mean([m.get("eval_mae",   np.nan) for m in cv_metrics]))
cv_rmse  = float(np.mean([m.get("eval_rmse",  np.nan) for m in cv_metrics]))
print(f"CV avg ‚Üí loss={cv_eval_loss:.6f}, smape={cv_smape:.6f}, mae={cv_mae:.3f}, rmse={cv_rmse:.3f}")

# --- Ìè¥ÎìúÎ≥Ñ sMAPE Í∏∞Î∞ò Í∞ÄÏ§ëÏπò (ÏûëÏùÑÏàòÎ°ù Í∞ÄÏ§ë‚Üë) ---
fold_smapes = [m.get("eval_smape", np.inf) for m in cv_metrics]
if all(np.isfinite(s) for s in fold_smapes) and len(fold_smapes) > 0:
    w = 1.0 / (np.asarray(fold_smapes) + 1e-6)
    FOLD_WEIGHTS = (w / w.sum()).astype(float).tolist()
else:
    FOLD_WEIGHTS = [1.0 / max(1, len(cv_metrics))] * max(1, len(cv_metrics))
print("[FOLD ENSEMBLE] weights:", FOLD_WEIGHTS)


[CV] fold=0 train_rows=79709 valid_rows=21616 weeks=0..15
[RotateEvalAnchors] epoch=0 offset=0 eval_size=2344


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.1064,0.098058,5.888744,25.739819,1.346286
2,0.1053,0.099168,6.116653,27.212065,1.157398
3,0.1036,0.097592,6.088609,27.136923,0.93014
4,0.1017,0.097951,5.907887,25.507057,1.131714
5,0.1006,0.097094,5.918335,25.177462,1.198462
6,0.1003,0.099055,6.000439,26.277948,1.043145
7,0.0993,0.09784,6.028482,27.018171,0.817371
8,0.0983,0.097123,6.148438,31.0044,0.814851
9,0.097,0.09432,6.152834,27.597965,1.131624
10,0.0957,0.095055,6.228302,28.831549,1.014039


[RotateEvalAnchors] epoch=1 offset=1 eval_size=2344
[RotateEvalAnchors] epoch=2 offset=2 eval_size=2344
[RotateEvalAnchors] epoch=3 offset=3 eval_size=2344
[RotateEvalAnchors] epoch=4 offset=4 eval_size=2343
[RotateEvalAnchors] epoch=5 offset=5 eval_size=2343
[RotateEvalAnchors] epoch=6 offset=6 eval_size=2343
[RotateEvalAnchors] epoch=7 offset=0 eval_size=2344
[RotateEvalAnchors] epoch=8 offset=1 eval_size=2344
[RotateEvalAnchors] epoch=9 offset=2 eval_size=2344
[RotateEvalAnchors] epoch=10 offset=3 eval_size=2344
[RotateEvalAnchors] epoch=11 offset=4 eval_size=2343
[RotateEvalAnchors] epoch=12 offset=5 eval_size=2343
[RotateEvalAnchors] epoch=13 offset=6 eval_size=2343


[CV] fold=0 metrics={'eval_loss': 0.09875380247831345, 'eval_mae': 6.018514633178711, 'eval_rmse': 27.374135971069336, 'eval_smape': 0.810080885887146, 'eval_runtime': 10.5336, 'eval_samples_per_second': 1557.393, 'eval_steps_per_second': 24.398, 'epoch': 14.0, 'fold': 0}

[CV] fold=1 train_rows=79709 valid_rows=20265 weeks=16..30
[RotateEvalAnchors] epoch=0 offset=0 eval_size=2151


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.1144,0.091348,4.801842,18.753269,1.293071
2,0.1129,0.088246,4.689861,18.349953,1.233217
3,0.1099,0.092641,4.766861,18.809587,1.184164
4,0.1084,0.088392,4.721426,18.653755,1.132227
5,0.1091,0.08967,4.71143,18.50761,1.114233
6,0.1076,0.085168,4.630296,17.846323,1.178326
7,0.1064,0.086894,4.650637,18.239067,1.015316
8,0.1053,0.089282,4.819985,19.253281,1.189508
9,0.1035,0.08651,4.666726,18.250715,1.108358
10,0.1026,0.090129,4.795278,19.088409,1.121903


[RotateEvalAnchors] epoch=1 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=2 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=3 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=4 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=5 offset=5 eval_size=2150
[RotateEvalAnchors] epoch=6 offset=6 eval_size=2150
[RotateEvalAnchors] epoch=7 offset=0 eval_size=2151
[RotateEvalAnchors] epoch=8 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=9 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=10 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=11 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=12 offset=5 eval_size=2150


[CV] fold=1 metrics={'eval_loss': 0.08819328248500824, 'eval_mae': 4.715989112854004, 'eval_rmse': 18.607099533081055, 'eval_smape': 1.0139180421829224, 'eval_runtime': 9.5683, 'eval_samples_per_second': 1573.326, 'eval_steps_per_second': 24.665, 'epoch': 13.0, 'fold': 1}

[CV] fold=2 train_rows=79709 valid_rows=20265 weeks=31..45
[RotateEvalAnchors] epoch=0 offset=0 eval_size=2151


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.1096,0.11491,8.240653,32.662434,1.119715
2,0.1083,0.113977,8.081577,30.321146,1.111975
3,0.1051,0.117079,8.255239,30.921469,1.112301
4,0.1044,0.114973,7.945055,29.929886,1.098897
5,0.1036,0.111271,8.146021,31.38747,1.096442
6,0.1029,0.111614,8.237008,31.931374,1.094745
7,0.1017,0.113545,8.239085,31.084999,1.120637
8,0.1003,0.115415,8.427156,32.501236,1.111809
9,0.099,0.114557,8.076365,30.121805,1.113337
10,0.0973,0.116055,8.126109,30.461369,1.097831


[RotateEvalAnchors] epoch=1 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=2 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=3 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=4 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=5 offset=5 eval_size=2150
[RotateEvalAnchors] epoch=6 offset=6 eval_size=2150
[RotateEvalAnchors] epoch=7 offset=0 eval_size=2151
[RotateEvalAnchors] epoch=8 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=9 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=10 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=11 offset=4 eval_size=2150


[CV] fold=2 metrics={'eval_loss': 0.11396749317646027, 'eval_mae': 8.144454956054688, 'eval_rmse': 30.920839309692383, 'eval_smape': 1.0998998880386353, 'eval_runtime': 8.866, 'eval_samples_per_second': 1697.954, 'eval_steps_per_second': 26.619, 'epoch': 12.0, 'fold': 2}

[CV] fold=3 train_rows=79709 valid_rows=20265 weeks=46..60
[RotateEvalAnchors] epoch=0 offset=0 eval_size=2151


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.1091,0.092516,8.79741,32.945969,1.118749
2,0.1074,0.094115,8.893288,34.204792,1.058098
3,0.1037,0.094629,8.859655,34.462711,1.091313
4,0.1035,0.094158,9.02042,34.663277,1.088898
5,0.1026,0.090855,8.907953,34.698437,1.056575
6,0.1013,0.090366,8.937726,34.824444,1.035854
7,0.1005,0.092592,9.132565,35.816978,1.014174
8,0.0994,0.091967,9.051322,34.551258,1.066989
9,0.0981,0.091984,8.87588,34.297142,1.087774
10,0.0958,0.093654,8.93606,34.123398,1.120895


[RotateEvalAnchors] epoch=1 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=2 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=3 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=4 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=5 offset=5 eval_size=2150
[RotateEvalAnchors] epoch=6 offset=6 eval_size=2150
[RotateEvalAnchors] epoch=7 offset=0 eval_size=2151
[RotateEvalAnchors] epoch=8 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=9 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=10 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=11 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=12 offset=5 eval_size=2150
[RotateEvalAnchors] epoch=13 offset=6 eval_size=2150
[RotateEvalAnchors] epoch=14 offset=0 eval_size=2151
[RotateEvalAnchors] epoch=15 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=16 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=17 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=18 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=19 offset=5 eval_size=2150
[R

[CV] fold=3 metrics={'eval_loss': 0.09621411561965942, 'eval_mae': 9.233500480651855, 'eval_rmse': 35.97841262817383, 'eval_smape': 1.0035589933395386, 'eval_runtime': 9.5003, 'eval_samples_per_second': 1584.588, 'eval_steps_per_second': 24.841, 'epoch': 23.0, 'fold': 3}

[CV] fold=4 train_rows=81060 valid_rows=20265 weeks=61..75
[RotateEvalAnchors] epoch=0 offset=0 eval_size=2151


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.0983,0.121603,6.763876,26.1194,1.299581
2,0.0982,0.116264,6.546128,25.148539,1.308147
3,0.0971,0.118495,6.56095,25.263678,1.227485
4,0.0939,0.11688,6.464201,24.445448,1.245273
5,0.0945,0.115725,6.523122,24.531322,1.285559
6,0.0918,0.116935,6.51052,24.545595,1.280205
7,0.0914,0.116871,6.573556,24.536333,1.2588
8,0.0906,0.115533,6.492199,24.502323,1.365151
9,0.0898,0.11611,6.412037,24.266222,1.309249


[RotateEvalAnchors] epoch=1 offset=1 eval_size=2151
[RotateEvalAnchors] epoch=2 offset=2 eval_size=2151
[RotateEvalAnchors] epoch=3 offset=3 eval_size=2151
[RotateEvalAnchors] epoch=4 offset=4 eval_size=2150
[RotateEvalAnchors] epoch=5 offset=5 eval_size=2150
[RotateEvalAnchors] epoch=6 offset=6 eval_size=2150
[RotateEvalAnchors] epoch=7 offset=0 eval_size=2151
[RotateEvalAnchors] epoch=8 offset=1 eval_size=2151


[CV] fold=4 metrics={'eval_loss': 0.11993442475795746, 'eval_mae': 6.595094203948975, 'eval_rmse': 24.66041374206543, 'eval_smape': 1.2341434955596924, 'eval_runtime': 9.0691, 'eval_samples_per_second': 1659.917, 'eval_steps_per_second': 26.022, 'epoch': 9.0, 'fold': 4}
CV avg ‚Üí loss=0.103413, smape=1.032320, mae=6.942, rmse=27.508
[FOLD ENSEMBLE] weights: [0.25005843718616927, 0.199786967154345, 0.18416914885757327, 0.20184922846882444, 0.164136218333088]


In [42]:
# ----------  # <CELL: final fit (all data)>
train_all = build_dataset(df)
valid_all = build_dataset(df)

final_model = make_model()
final_model.special_store_ids = SPECIAL_STORE_IDS

final_trainer = Trainer(
    model=final_model,
    args=training_args,
    train_dataset=train_all,
    eval_dataset=valid_all,
    data_collator=ts_data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=EARLY_STOP_PATIENCE)],
)
final_trainer.add_callback(RotateEvalAnchors(final_trainer, valid_all, step=ANCHOR_STEP))

final_trainer.train()
final_trainer.eval_dataset = valid_all
final_metrics = final_trainer.evaluate()
print("FINAL full-data eval:", final_metrics)

final_trainer.save_model(os.path.join(SAVE_DIR, "best"))
print("Saved:", os.path.join(SAVE_DIR, "best"))
print("LabelEncoder:", LE_PATH)

[RotateEvalAnchors] epoch=0 offset=0 eval_size=13924


Epoch,Training Loss,Validation Loss,Mae,Rmse,Smape
1,0.1031,0.101607,6.247122,25.935478,1.091347
2,0.1007,0.100064,6.133543,25.096621,1.150663
3,0.0993,0.098284,6.085174,24.678205,1.151478
4,0.0991,0.098624,6.048084,24.596313,1.087342
5,0.0989,0.095783,5.973545,24.261599,1.201196
6,0.0982,0.096731,6.125981,25.267939,1.160105
7,0.0966,0.093943,5.899421,23.747303,1.182195
8,0.0947,0.092614,5.927751,24.534285,1.103262
9,0.0932,0.091803,5.902679,25.95805,1.13514
10,0.0943,0.090206,5.862161,24.410206,1.102621


[RotateEvalAnchors] epoch=1 offset=1 eval_size=13924
[RotateEvalAnchors] epoch=2 offset=2 eval_size=13924
[RotateEvalAnchors] epoch=3 offset=3 eval_size=13924
[RotateEvalAnchors] epoch=4 offset=4 eval_size=13923
[RotateEvalAnchors] epoch=5 offset=5 eval_size=13923
[RotateEvalAnchors] epoch=6 offset=6 eval_size=13923
[RotateEvalAnchors] epoch=7 offset=0 eval_size=13924
[RotateEvalAnchors] epoch=8 offset=1 eval_size=13924
[RotateEvalAnchors] epoch=9 offset=2 eval_size=13924


FINAL full-data eval: {'eval_loss': 0.09849303215742111, 'eval_mae': 6.07979679107666, 'eval_rmse': 24.999366760253906, 'eval_smape': 1.0862210988998413, 'eval_runtime': 57.2811, 'eval_samples_per_second': 1701.522, 'eval_steps_per_second': 26.588, 'epoch': 10.0}
Saved: ./patchtst_sales_forecast\best
LabelEncoder: ./patchtst_sales_forecast\label_encoder.pkl


In [47]:
# ----------  # <CELL: load fold models for inference ensemble>
INFER_TRAINERS = []
if FOLD_ENSEMBLE:
    INFER_TRAINERS = []
    for fold in range(K_FOLDS):
        fold_dir = os.path.join(SAVE_DIR, f"fold_{fold}")
        bin_path = os.path.join(fold_dir, "pytorch_model.bin")
        if os.path.exists(bin_path):
            m = make_model()
            sd = torch.load(bin_path, map_location="cpu")
            missing, unexpected = m.load_state_dict(sd, strict=False)
            if missing:
                print(f"[fold {fold}] missing keys:", len(missing))
            if unexpected:
                print(f"[fold {fold}] unexpected keys:", len(unexpected))
            t = Trainer(
                model=m,
                args=training_args,
                data_collator=ts_data_collator,
                compute_metrics=compute_metrics,
            )
            INFER_TRAINERS.append(t)
    print(f"[FOLD ENSEMBLE] loaded {len(INFER_TRAINERS)} fold models.")
else:
    INFER_TRAINERS = []


[FOLD ENSEMBLE] loaded 0 fold models.


In [49]:
# ----------  # <CELL: inference> (Replace: helper Ìè¨Ìï®, sample_submission Ï†ÄÏû•)

import os, re, gc, glob
import numpy as np
import pandas as pd
import torch
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# (NEW) ÏµúÍ∑º KÏùºÏù¥ Ï†ÑÎ∂Ä 0Ïù¥Î©¥ ÎØ∏Îûò 7Ïùº 0ÏúºÎ°ú Í≥†Ï†ïÌïòÎäî Í∞ÄÎìú
ZERO_RUN_GUARD_DAYS = globals().get("ZERO_RUN_GUARD_DAYS", 14)

def _zero_run_guard(g: pd.DataFrame, yhat: np.ndarray) -> np.ndarray:
    v = pd.to_numeric(g["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0).to_numpy()
    if ZERO_RUN_GUARD_DAYS > 0 and len(v) >= ZERO_RUN_GUARD_DAYS:
        if v[-ZERO_RUN_GUARD_DAYS:].sum() == 0:
            return np.zeros_like(yhat, dtype=float)
    return yhat

# (Ï°¥Ïû¨ÌïòÏßÄ ÏïäÏúºÎ©¥ Í∏∞Î≥∏Í∞í ÏÑ∏ÌåÖ - ÎÑ§ ÏΩîÎìúÎûë Î≥ÄÏàòÎ™Ö Ìò∏Ìôò)
USE_INT_ROUND    = globals().get("USE_INT_ROUND", False)   # Ï†ïÏàò Ï†úÏ∂ú ÏïÑÎãò: False Í∂åÏû•
CUT_THRESHOLD    = globals().get("CUT_THRESHOLD", None)    # Ïòà: 0.9 Îì±, NoneÏù¥Î©¥ ÎØ∏ÏÇ¨Ïö©
ENSEMBLE_NAIVE_W = globals().get("ENSEMBLE_NAIVE_W", 0.20) # Î≥¥ÏàòÏ†Å ÏïôÏÉÅÎ∏î Í∞ÄÏ§ë(0~1)
CAP_MULT         = globals().get("CAP_MULT", 1.0)          # ÏÉÅÌïú Ïó¨Ïú† Î∞∞Ïàò
ITEM_CAP         = globals().get("ITEM_CAP", {}) or {}

# (ÌïÑÏöî Ïãú) Í∞ÑÎã®Ìïú leftpad Íµ¨ÌòÑ ‚Äî ÎÑ§Í∞Ä Ïù¥ÎØ∏ Ï†ïÏùòÌï¥Îëî Ìï®ÏàòÍ∞Ä ÏûàÏúºÎ©¥ Í∑∏Í±∏ ÏÇ¨Ïö©
if "leftpad_to_context" not in globals():
    def leftpad_to_context(g: pd.DataFrame, context_len: int, store_menu: str) -> pd.DataFrame:
        g = g.sort_values("date").copy()
        n = len(g)
        if n >= context_len:
            return g
        need = context_len - n
        pad_end = g["date"].min() - pd.Timedelta(days=1)
        pad_dates = pd.date_range(end=pad_end, periods=need, freq="D")
        store, menu = store_menu.split("_", 1)
        pad = pd.DataFrame({
            "date": pad_dates,
            "ÏòÅÏóÖÏùºÏûê": pad_dates,
            "store_name": store,
            "menu_name": menu,
            "store_menu": store_menu,
            "ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö": store_menu,
            "Îß§Ï∂úÏàòÎüâ": 0.0,
            "sales": 0.0,
        })
        pad = add_time_features(pad)
        return pd.concat([pad, g], ignore_index=True)

# --- helper: predictions -> (N, pred_len)Î°ú Ï†ïÍ∑úÌôî ---
def _extract_pred_matrix(pred_output, pred_len: int, target_ch: int = 0):
    raw = pred_output.predictions
    def _flatten(x):
        if isinstance(x, (list, tuple)):
            out = []
            for e in x: out.extend(_flatten(e))
            return out
        return [np.asarray(x)]

    arrs = _flatten(raw)
    nlc_candidates, ncl_candidates = [], []
    for a in arrs:
        a = np.asarray(a)
        if a.ndim == 3:
            if a.shape[1] == pred_len:   # (N, L, C)
                nlc_candidates.append(a)
            elif a.shape[2] == pred_len: # (N, C, L)
                ncl_candidates.append(a)
    if nlc_candidates:
        a = nlc_candidates[0]
        ch = target_ch if a.shape[2] > target_ch else 0
        return a[:, :, ch]
    if ncl_candidates:
        a = ncl_candidates[0]
        ch = target_ch if a.shape[1] > target_ch else 0
        return a[:, ch, :]
    for a in arrs:
        a = np.asarray(a)
        if a.ndim == 2 and a.shape[1] == pred_len:
            return a
    shapes = [np.asarray(a).shape for a in arrs]
    raise ValueError(f"[extract] Cannot find (N,{pred_len}) from predictions; seen shapes={shapes}")

def _find_sample_submission():
    for p in ["./dataset/sample_submission.csv", "./sample_submission.csv", "/mnt/data/sample_submission.csv"]:
        if os.path.exists(p): return p
    raise FileNotFoundError("sample_submission.csv Í≤ΩÎ°úÎ•º Ï∞æÏßÄ Î™ªÌñàÏäµÎãàÎã§.")

def _make_future_rows(store_menu, last_date, horizon=PRED_LEN):
    # ÎØ∏Îûò 1~horizonÏùº ÏÉùÏÑ±(Î™®Îç∏ ÏûÖÎ†•Ïö© Í≥µÎ≥ÄÎüâÎßå ÌïÑÏöî)
    future_dates = [last_date + pd.Timedelta(days=i) for i in range(1, horizon + 1)]
    store, menu = str(store_menu).split("_", 1)
    fut = pd.DataFrame({
        "ÏòÅÏóÖÏùºÏûê": future_dates,
        "ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö": store_menu,
        "Îß§Ï∂úÏàòÎüâ": 0.0,
        "store_name": store,
        "menu_name": menu,
        "store_menu": store_menu,
        "date": future_dates,
    })
    fut = add_time_features(fut)
    return fut

def _predict_last_window_for_file(file_path, sample_df):
    """
    Ìïú Í∞ú TEST_xx.csvÏóê ÎåÄÌï¥:
      - Í∞Å 'ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö'Î≥Ñ ÎßàÏßÄÎßâ CTXÏùº + ÎØ∏Îûò 7Ïùº Íµ¨ÏÑ±
      - ÎßàÏßÄÎßâ ÏúàÎèÑÏö∞ 7ÏùºÎßå ÏòàÏ∏°
      - baseline Ìè¨Îß∑(ÏòÅÏóÖÏùºÏûê='TEST_xx+{d}Ïùº')ÏúºÎ°ú Î†àÏΩîÎìú Î∞òÌôò
    """
    test_prefix = re.search(r"(TEST_\d+)", os.path.basename(file_path)).group(1)

    # ÌòÑÏû¨ Î°úÎçîÏùò Î™®Îç∏ Ïª®ÌÖçÏä§Ìä∏ Í∏∏Ïù¥Î•º ÏßÅÏ†ë ÏùΩÏñ¥ ÎèôÍ∏∞Ìôî
    CTX = int(getattr(final_trainer.model.base.config, "context_length", CONTEXT_LEN))

    df_t = pd.read_csv(file_path)
    df_t["date"] = pd.to_datetime(df_t["ÏòÅÏóÖÏùºÏûê"])
    s = df_t["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"].astype(str).str.split("_", n=1, expand=True)
    df_t["store_name"] = s[0]; df_t["menu_name"] = s[1]
    df_t["store_menu"] = df_t["store_name"] + "_" + df_t["menu_name"]
    df_t["Îß§Ï∂úÏàòÎüâ"] = pd.to_numeric(df_t["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0.0)
    df_t.loc[df_t["Îß§Ï∂úÏàòÎüâ"] < 0, "Îß§Ï∂úÏàòÎüâ"] = 0.0
    df_t["sales"] = df_t["Îß§Ï∂úÏàòÎüâ"].astype(float)  # ÎÇòÏù¥Î∏å ÏÑûÍ∏∞ Ïö©
    # (ÏòµÏÖò) ÌõàÎ†®Í≥º ÎèôÏùº Ï†ïÍ∑úÌôîÎ•º ÏõêÌïòÎ©¥ Ï£ºÏÑù Ìï¥Ï†ú
    df_t = enforce_regular_daily(df_t)
    df_t = add_time_features(df_t)

    records = []
    for store_menu, g in df_t.groupby("ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"):
        g = g.sort_values("date")

        # Ïä§ÌÇµ Í∏àÏßÄ: Î∂ÄÏ°±ÌïòÎ©¥ ÏôºÏ™Ω Ï†úÎ°úÌå®Îî©ÏúºÎ°ú CTX ÌôïÎ≥¥
        g = leftpad_to_context(g, CTX, store_menu)

        last_date = g["date"].max()
        fut = _make_future_rows(store_menu, last_date, horizon=PRED_LEN)
        combo = pd.concat([g, fut], ignore_index=True)
        combo["sales"] = pd.to_numeric(combo["Îß§Ï∂úÏàòÎüâ"], errors="coerce").fillna(0.0)

        # ÎßàÏßÄÎßâ ÏúàÎèÑÏö∞Îßå ÎÇ®ÍπÄ
        combo_tail = combo.iloc[-(CTX + PRED_LEN):].copy()

        # ÎùºÎ≤®Ïù∏ÏΩîÎçî(ÌÖåÏä§Ìä∏ Ïã†Í∑ú ID Ìè¨Ìï®) ‚Äî rawÍ∞Ä ÏóÜÏùÑ ÏàòÎèÑ ÏûàÏúºÎãà Í∞ÄÎìú
        base_series = raw["store_menu"] if "raw" in globals() else combo_tail["store_menu"]
        le2 = fit_or_load_label_encoder(pd.concat([base_series, combo_tail["store_menu"]]))
        combo_fin = finalize_columns(combo_tail, le2)

        # Ïù¥ ÏïÑÏù¥ÌÖúÎßå ÏûàÎäî dataset ÏÉùÏÑ± ‚Üí Ï†ïÌôïÌûà 1 ÏÉòÌîå (Î™®Îç∏ CTXÎ°ú ÎπåÎìú)
        ds = build_dataset(combo_fin, context_len=CTX, prediction_len=PRED_LEN)

        # --- Fold ensemble predictors ---
        # Ìè¥Îìú Î™®Îç∏Ïù¥ ÏûàÏúºÎ©¥ Í∑∏Í≤ÉÎßå ÏÇ¨Ïö©, ÏóÜÏúºÎ©¥ ÏµúÏ¢Ö Î™®Îç∏ ÌïòÎÇò ÏÇ¨Ïö©
        predictors = INFER_TRAINERS if(FOLD_ENSEMBLE and
                                       'INFER_TRAINERS' in globals() and
                                       len(INFER_TRAINERS) > 0) else [final_trainer]

        yhat_list = []
        for t in predictors:
            # predict ÏàúÍ∞ÑÎßå ÏõåÏª§/Î∞∞Ïπò/ÌçºÏãúÏä§ÌÑ¥Ìä∏ ÏõåÏª§ ÏÑ§Ï†ïÏùÑ ÏûÑÏãúÎ°ú Ï°∞Ï†ï
            old_workers = getattr(t.args, "dataloader_num_workers", None)
            old_eval_bs = getattr(t.args, "per_device_eval_batch_size", None)
            old_persist = getattr(t.args, "dataloader_persistent_workers", None)

            t.args.dataloader_num_workers = 0
            t.args.per_device_eval_batch_size = min(16, (old_eval_bs or 16))
            if old_persist is not None:
                t.args.dataloader_persistent_workers = False

            try:
                preds_out = t.predict(ds)
            finally:
                # ÏõêÎ≥µ
                if old_workers is not None:
                    t.args.dataloader_num_workers = old_workers
                if old_eval_bs is not None:
                    t.args.per_device_eval_batch_size = old_eval_bs
                if old_persist is not None:
                    t.args.dataloader_persistent_workers = old_persist

            # Î°úÍ∑∏->Ïõê Ïä§ÏºÄÏùº & ÎßàÏßÄÎßâ ÏÉòÌîå 7ÏùºÎßå
            Y = _extract_pred_matrix(preds_out, PRED_LEN, target_ch=0)
            yhat_i = np.clip(np.expm1(Y[-1]), 0, None)
            yhat_list.append(yhat_i)

        S = np.stack(yhat_list, axis=0)  # (n_models, 7)
        if S.shape[0] == 1:
            yhat = S[0]
        else:
            w = np.asarray(globals().get("FOLD_WEIGHTS", []), dtype=float)
            if w.size != S.shape[0] or not np.isfinite(w).all():
                print(f"[WARN] FOLD_WEIGHTS invalid (len={w.size}, models={S.shape[0]}) ‚Üí uniform avg")
                w = np.ones(S.shape[0], dtype=float) / S.shape[0]
            yhat = np.average(S, axis=0, weights=w)

        if ENSEMBLE_NAIVE_W > 0:
            yhat = _blend_with_naive(yhat, g)


        # zero-run Í∞ÄÎìúÏôÄ ÏÉÅÌïú ÌÅ¥Î¶¨Ìïë
        yhat = _zero_run_guard(g, yhat)
        cap = ITEM_CAP.get(store_menu, None)
        if cap is not None and np.isfinite(cap):
            yhat = np.minimum(yhat, float(cap) * float(CAP_MULT))

        # Ï∂úÎ†• Î†àÏΩîÎìú Ï†ÅÏû¨
        pred_dates = [f"{test_prefix}+{i + 1}Ïùº" for i in range(PRED_LEN)]
        for d_str, val in zip(pred_dates, yhat):
            records.append({
                "ÏòÅÏóÖÏùºÏûê": d_str,
                "ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö": store_menu,
                "Îß§Ï∂úÏàòÎüâ": float(val),
            })

    return pd.DataFrame(records)

# --- Ïã§Ìñâ: TEST_*Î≥Ñ ÏòàÏ∏° ‚Üí sample_submissionÏúºÎ°ú ÌîºÎ≤ó & Ï†ÄÏû• ---
test_files = find_test_files()
if not test_files:
    print("No TEST_*.csv detected; skipping inference.")
else:
    print("Found test files:", len(test_files))
    ss_path = _find_sample_submission()
    submit_df = pd.read_csv(ss_path)  # ÏµúÏ¢Ö Ï†úÏ∂ú DF (Ïó¨Í∏∞Ïóê Ï±ÑÏõåÎÑ£Ïùå)

    # Ïà´Ïûê Ïª¨Îüº floatÎ°ú Ïó¥Ïñ¥Îë† (ÎßàÏßÄÎßâÏóê Ïã§Ïàò/Ï†ïÏàò ÌÜ†Í∏Ä)
    for c in submit_df.columns[1:]:
        submit_df[c] = 0.0

    for p in sorted(test_files):
        df_pred_one = _predict_last_window_for_file(p, submit_df)

        # Ïù¥ ÌååÏùºÏóê Ìï¥ÎãπÌïòÎäî ÌñâÎßå ÏÑ†ÌÉù (Ïòà: 'TEST_03+1Ïùº' Îì±)
        pred_index = df_pred_one["ÏòÅÏóÖÏùºÏûê"].unique().tolist()
        mask_rows = submit_df["ÏòÅÏóÖÏùºÏûê"].isin(pred_index)

        # ÌååÏùº Îã®ÏúÑ ÌîºÎ≤ó: (Ìñâ=ÏòÅÏóÖÏùºÏûê, Ïó¥=ÏïÑÏù¥ÌÖú, Í∞í=ÏàòÎüâ)
        pivot = df_pred_one.pivot(index="ÏòÅÏóÖÏùºÏûê",
                                  columns="ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö",
                                  values="Îß§Ï∂úÏàòÎüâ")

        # Í≥µÌÜµ Ïó¥Îßå Ï£ºÏûÖ (Í∞ÑÌòπ Ïó¥ Î∂àÏùºÏπò ÎåÄÎπÑ)
        common_cols = submit_df.columns[1:].intersection(pivot.columns)
        sub_view = pivot.reindex(index=submit_df.loc[mask_rows, "ÏòÅÏóÖÏùºÏûê"],
                                 columns=common_cols).fillna(0.0)

        # Í∞í ÎåÄÏûÖ(ÎÑòÌååÏù¥Î°ú Î≥µÏÇ¨ ‚Üí Î©îÎ™®Î¶¨ Ï†àÏïΩ)
        submit_df.loc[mask_rows, common_cols] = sub_view.to_numpy()

        # Î©îÎ™®Î¶¨ Ï†ïÎ¶¨
        del df_pred_one, pivot, sub_view
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        print(f"Filled rows for {os.path.basename(p)} ‚Üí {mask_rows.sum()} rows, cols={len(common_cols)}")

    # ÏÇ¨Ï†Ñ Ï†êÍ≤Ä: 0 ÎπÑÏú®
    zero_ratio_before = (submit_df.iloc[:, 1:] == 0).mean().mean()
    print(f"[Sanity] zero ratio BEFORE postprocess: {zero_ratio_before:.3f}")

    # ÌõÑÏ≤òÎ¶¨: Ïª∑Ïò§ÌîÑ/Ï†ïÏàòÌôî ÌÜ†Í∏Ä
    vals = submit_df.iloc[:, 1:].to_numpy(dtype=float, copy=False)
    if CUT_THRESHOLD is not None:
        vals = np.where(vals < float(CUT_THRESHOLD), 0.0, vals)
    if USE_INT_ROUND:
        vals = np.rint(np.clip(vals, 0, None)).astype(int, copy=False)
    else:
        vals = np.clip(vals, 0, None)  # Ïã§Ïàò Ïú†ÏßÄ(Ï†ïÏàò Ï†úÏ∂ú ÏïÑÎãò)
    submit_df.iloc[:, 1:] = vals

    zero_ratio_after = (submit_df.iloc[:, 1:] == 0).mean().mean()
    print(f"[Sanity] zero ratio AFTER postprocess: {zero_ratio_after:.3f}")

    out_dir = os.path.join(SAVE_DIR, "results")
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, "submission_patchtst.csv")
    submit_df.to_csv(out_path, index=False, encoding="utf-8-sig")
    print("Ï†úÏ∂úÏö© CSV Ï†ÄÏû• ÏôÑÎ£å ‚Üí", out_path)

Found test files: 10


Filled rows for TEST_00.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_01.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_02.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_03.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_04.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_05.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_06.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_07.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_08.csv ‚Üí 7 rows, cols=193


Filled rows for TEST_09.csv ‚Üí 7 rows, cols=193
[Sanity] zero ratio BEFORE postprocess: 0.138
[Sanity] zero ratio AFTER postprocess: 0.138
Ï†úÏ∂úÏö© CSV Ï†ÄÏû• ÏôÑÎ£å ‚Üí ./patchtst_sales_forecast\results\submission_patchtst.csv


In [None]:
# ----------  # <CELL: evaluate (28‚Üí7 backtest via pseudo-TEST; uses real inference pipeline)>

import os, gc, re, glob, shutil
import numpy as np
import pandas as pd
from datetime import timedelta

# ÌïÑÏàò Ï†ÑÏó≠ Ï≤¥ÌÅ¨
assert 'CONTEXT_LEN' in globals() and 'PRED_LEN' in globals(), "ÏÉÅÎã® ÏÑ§Ï†ï ÏÖÄÏùÑ Î®ºÏ†Ä Ïã§ÌñâÌïòÏÑ∏Ïöî."
assert 'SAVE_DIR' in globals(), "ÏÉÅÎã® ÏÑ§Ï†ï ÏÖÄÏùÑ Î®ºÏ†Ä Ïã§ÌñâÌïòÏÑ∏Ïöî."
assert '_predict_last_window_for_file' in globals(), "inference ÏÖÄ(ÏÖÄ 14)ÏùÑ Î®ºÏ†Ä Ïã§ÌñâÌï¥ Ï£ºÏÑ∏Ïöî."

# 0) train.csv Î°úÎìú
train_candidates = ["./dataset/train/train.csv", "./dataset/train.csv", "/mnt/data/train.csv"]
train_df = None
for _p in train_candidates:
    try:
        train_df = pd.read_csv(_p)
        print("Loaded train:", _p)
        break
    except Exception:
        pass
if train_df is None:
    raise FileNotFoundError("train.csvÎ•º Ï∞æÏßÄ Î™ªÌñàÏäµÎãàÎã§.")

train_df["ÏòÅÏóÖÏùºÏûê"] = pd.to_datetime(train_df["ÏòÅÏóÖÏùºÏûê"])
train_df = train_df.sort_values(["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","ÏòÅÏóÖÏùºÏûê"]).reset_index(drop=True)

# 1) ÌèâÍ∞ÄÏö© ÏûÑÏãú ÎîîÎ†âÌÜ†Î¶¨
EVAL_DIR = os.path.join(SAVE_DIR, "eval_tmp")
if os.path.exists(EVAL_DIR):
    shutil.rmtree(EVAL_DIR)
os.makedirs(EVAL_DIR, exist_ok=True)

# 2) Í∞Å ÏïÑÏù¥ÌÖúÎ≥ÑÎ°ú 'fold' Ï†ïÏùò: fold=0ÏùÄ Í∞ÄÏû• ÏµúÍ∑º 7ÏùºÏùÑ Í≤ÄÏ¶ù, fold=1ÏùÄ Í∑∏ ÏßÅÏ†Ñ 7Ïùº...
def get_fold_periods_per_item(g: pd.DataFrame, k: int):
    """
    g: ÌäπÏ†ï 'ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö' Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑ(ÎÇ†Ïßú Ïò§Î¶ÑÏ∞®Ïàú)
    k: 0..K_FOLDS-1, k=0Ïù¥Î©¥ Í∞ÄÏû• ÏµúÍ∑º 7Ïùº, k=1Ïù¥Î©¥ Í∑∏ Ïù¥Ï†Ñ 7Ïùº ...
    return: (ctx_start, ctx_end, val_start, val_end) ÎòêÎäî None(Í∏∏Ïù¥Î∂ÄÏ°±)
    """
    if len(g) < CONTEXT_LEN + PRED_LEN + k*PRED_LEN:
        return None
    last_date = g["ÏòÅÏóÖÏùºÏûê"].max()
    val_end   = last_date - timedelta(days=k*PRED_LEN)
    val_start = val_end - timedelta(days=PRED_LEN-1)
    ctx_end   = val_start - timedelta(days=1)
    ctx_start = ctx_end - timedelta(days=CONTEXT_LEN-1)
    if (g["ÏòÅÏóÖÏùºÏûê"].min() <= ctx_start) and (ctx_end < val_start):
        return (ctx_start, ctx_end, val_start, val_end)
    return None

# 3) ÏßÄÌëú
def smape(a, p):
    a = np.asarray(a, float)
    p = np.asarray(p, float)
    denom = (np.abs(a) + np.abs(p))
    mask = denom > 0
    if not np.any(mask):
        return 0.0
    return float(np.mean(2.0*np.abs(a[mask]-p[mask]) / denom[mask]) * 100.0)

def mae(a, p):
    a = np.asarray(a, float)
    p = np.asarray(p, float)
    return float(np.mean(np.abs(a-p)))

# 4) Í≥µÌÜµ Ïª¨Îüº Ï§ÄÎπÑ
all_items = sorted(train_df["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"].unique())

# 5) Ìè¥Îìú Î∞òÎ≥µ: Ïã§Ï†ú TEST ÌååÏùºÏùÑ ÌùâÎÇ¥ ÎÇ¥ÏÑú(Ïª®ÌÖçÏä§Ìä∏ 28Ïùº) ÏòàÏ∏° ‚Üí GTÏôÄ Îß§Ïπ≠Ìï¥ ÌèâÍ∞Ä
fold_results = []
all_eval_rows = []  # ÏÉÅÏÑ∏ Ï†ÄÏû•Ïö©

for fold in range(K_FOLDS):
    # Ïù¥ Ìè¥ÎìúÏóêÏÑú ÏÇ¨Ïö©Ìï† Ïª®ÌÖçÏä§Ìä∏/ÎùºÎ≤® Ìñâ ÏàòÏßë
    test_rows = []
    gt_rows   = []
    for sid, g in train_df.groupby("ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö"):
        g = g.sort_values("ÏòÅÏóÖÏùºÏûê")
        periods = get_fold_periods_per_item(g, fold)
        if periods is None:
            continue
        ctx_start, ctx_end, val_start, val_end = periods

        g_ctx = g[(g["ÏòÅÏóÖÏùºÏûê"]>=ctx_start)&(g["ÏòÅÏóÖÏùºÏûê"]<=ctx_end)][["ÏòÅÏóÖÏùºÏûê","ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","Îß§Ï∂úÏàòÎüâ"]].copy()
        g_val = g[(g["ÏòÅÏóÖÏùºÏûê"]>=val_start)&(g["ÏòÅÏóÖÏùºÏûê"]<=val_end)][["ÏòÅÏóÖÏùºÏûê","ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","Îß§Ï∂úÏàòÎüâ"]].copy()

        test_rows.append(g_ctx)
        gt_rows.append(g_val)

    if not test_rows or not gt_rows:
        print(f"[Fold {fold}] Ïú†Ìö®Ìïú Ìï≠Î™©Ïù¥ ÏóÜÏñ¥ Ïä§ÌÇµ")
        continue

    df_test = pd.concat(test_rows, ignore_index=True)
    df_gt   = pd.concat(gt_rows,   ignore_index=True)

    # 5-1) ÏÉòÌîåÏÑúÎ∏å ÏÉùÏÑ± (Ìñâ: TEST_{fold}+1..+7Ïùº, Ïó¥: Î™®Îì† ÏïÑÏù¥ÌÖú)
    ss = pd.DataFrame({"ÏòÅÏóÖÏùºÏûê":[f"TEST_{fold:02d}+{d}Ïùº" for d in range(1, PRED_LEN+1)]})
    for c in all_items:
        ss[c] = 0.0

    # 5-2) inference ÌååÏù¥ÌîÑÎùºÏù∏ Í∑∏ÎåÄÎ°ú Ïã§Ìñâ (ÌïµÏã¨!)
    #     _predict_last_window_for_file(file_path, sample_df) ÏÇ¨Ïö©
    test_path = os.path.join(EVAL_DIR, f"TEST_{fold:02d}.csv")
    df_test.to_csv(test_path, index=False, encoding="utf-8-sig")
    submit_df = ss.copy()
    df_pred_one = _predict_last_window_for_file(test_path, submit_df)  # ‚Üê inference ÏÖÄÏùò Ìï®Ïàò

    # 5-3) ÏòàÏ∏° ÌîºÎ≤ó (Ìñâ: TEST_xx+{d}Ïùº, Ïó¥: ÏïÑÏù¥ÌÖú)
    pivot = df_pred_one.pivot(index="ÏòÅÏóÖÏùºÏûê", columns="ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö", values="Îß§Ï∂úÏàòÎüâ").sort_index()
    reindexed = pd.DataFrame(index=[f"TEST_{fold:02d}+{d}Ïùº" for d in range(1,PRED_LEN+1)],
                             columns=all_items, data=0.0)
    common_cols = reindexed.columns.intersection(pivot.columns)
    reindexed.loc[:, common_cols] = pivot[common_cols].reindex(reindexed.index).to_numpy()

    # 5-4) GTÏóê day_offset(1..7) Î∂ÄÏó¨: ÏïÑÏù¥ÌÖúÎ≥Ñ ÎÇ†Ïßú Ïò§Î¶ÑÏ∞®ÏàúÏúºÎ°ú 1~7 Î≤àÌò∏
    gt_sorted = df_gt.sort_values(["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","ÏòÅÏóÖÏùºÏûê"]).copy()
    gt_sorted["day_offset"] = gt_sorted.groupby("ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö").cumcount() + 1
    gt_sorted = gt_sorted[gt_sorted["day_offset"]<=PRED_LEN]

    # 5-5) ÏòàÏ∏° long Ìè¨Îß∑ + day_offset
    pred_long = reindexed.reset_index().melt(id_vars="index", var_name="ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö", value_name="y_pred")
    pred_long = pred_long.rename(columns={"index":"ÏòÅÏóÖÏùºÏûê"})
    pred_long["day_offset"] = pred_long["ÏòÅÏóÖÏùºÏûê"].str.extract(r"\+(\d+)Ïùº").astype(int)

    gt_long = gt_sorted.rename(columns={"Îß§Ï∂úÏàòÎüâ":"y_true"})[["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","ÏòÅÏóÖÏùºÏûê","y_true","day_offset"]]

    merged = pd.merge(pred_long, gt_long, on=["ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","day_offset"], how="inner")
    merged = merged.dropna(subset=["y_true","y_pred"])

    # 5-6) ÏßÄÌëú
    s_overall = smape(merged["y_true"], merged["y_pred"])
    m_overall = mae(merged["y_true"], merged["y_pred"])
    print(f"[Fold {fold}] SMAPE={s_overall:.3f}  MAE={m_overall:.3f}  (items={merged['ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö'].nunique()}, N={len(merged):,})")
    fold_results.append((fold, s_overall, m_overall))

    # ÏÉÅÏÑ∏ Ï†ÄÏû•Ïö© Ïª¨Îüº Ï∂îÍ∞Ä
    merged["fold"] = fold
    all_eval_rows.append(merged)

# 6) ÏöîÏïΩ + Ï†ÄÏû•
if fold_results:
    sm = float(np.mean([x[1] for x in fold_results]))
    mm = float(np.mean([x[2] for x in fold_results]))
    print("\n[EVAL SUMMARY]")
    for f, s, m in fold_results:
        print(f"  - Fold {f}: SMAPE={s:.3f}, MAE={m:.3f}")
    print(f"  => Mean SMAPE={sm:.3f}, Mean MAE={mm:.3f}")

    # ÏÉÅÏÑ∏ CSV Ï†ÄÏû•
    out_dir = os.path.join(SAVE_DIR, "results")
    os.makedirs(out_dir, exist_ok=True)
    if all_eval_rows:
        full = pd.concat(all_eval_rows, ignore_index=True)
        # Ï†ïÎ¶¨: Ïó¥ ÏàúÏÑú
        cols = ["fold","ÏòÅÏóÖÏû•Î™Ö_Î©îÎâ¥Î™Ö","day_offset","ÏòÅÏóÖÏùºÏûê_x","ÏòÅÏóÖÏùºÏûê_y","y_true","y_pred"]
        for c in cols:
            if c not in full.columns:
                cols = [x for x in cols if x in full.columns]
                break
        full.to_csv(os.path.join(out_dir, "eval_detailed_per_item_day.csv"), index=False, encoding="utf-8-sig")
        print("Saved detailed results ‚Üí", os.path.join(out_dir, "eval_detailed_per_item_day.csv"))
else:
    print("ÌèâÍ∞Ä Í≤∞Í≥ºÍ∞Ä ÏóÜÏäµÎãàÎã§. (Îç∞Ïù¥ÌÑ∞ Í∏∏Ïù¥/ÏÑ∏ÌåÖ ÌôïÏù∏)")
