<a href="https://colab.research.google.com/github/yyduyuxuan/Machine-Learning-for-Data-Driven-Inventory-Replenishment-Evidence-from-the-M5-Retail-Dataset/blob/main/Fine_Tuning_Results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os, glob
import pandas as pd
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

base_dir = "/content/drive/MyDrive/Colab Notebooks/Supervised Project/Fine Tuning/"
horizons = ["1y1y", "2y1y", "3y1y"]  # different horizon
group_key = ["store_id", "dept_id"]


def attach_seriesname(df, group_key, series_col="SeriesName"):
    if series_col in df.columns:
        return df
    out = df.copy()
    out[series_col] = list(map(tuple, out[group_key].to_numpy()))
    return out

def make_weights(all_results, group_key, series_col="SeriesName"):
    ar = attach_seriesname(all_results, group_key, series_col)
    w = (ar.groupby(series_col, as_index=False)["true"]
           .sum()
           .rename(columns={"true": "val_sum_true"}))
    tot = w["val_sum_true"].sum()
    w["weight"] = (1.0 / len(w)) if tot <= 0 else (w["val_sum_true"] / tot)
    return w[[series_col, "weight"]]

def compare_runs(runs_dict, group_key, series_col="SeriesName"):
    rows = []
    for name, (err_df, all_res) in runs_dict.items():
        rows.append(summarize_run(err_df, all_res, group_key, model_name=name, series_col=series_col))
    return pd.concat(rows, ignore_index=True).sort_values("WRMSSE")


def load_all_results_dir(dir_path):
    files = sorted(glob.glob(os.path.join(dir_path, "forecast_*.pkl")))
    if not files:
        print(f"[WARN] No forecast_*.pkl found in {dir_path}")
        return pd.DataFrame()
    parts = [pd.read_pickle(p) for p in files]
    return pd.concat(parts, ignore_index=True)

runs = {}
for hz in horizons:
    err_path = os.path.join(base_dir, f"{hz}_error_df.pkl")
    res_dir  = os.path.join(base_dir, f"train_store_dept_{hz}/")

    error_df = pd.read_pickle(err_path)
    all_results = load_all_results_dir(res_dir)

    runs[hz] = (error_df, all_results)
    print(f"[LOAD] {hz}: error_df={len(error_df)} rows, all_results={len(all_results)} rows")

summary = compare_runs(runs, group_key)
print(summary[["model","WRMSSE","RMSSE_median","RMSSE_p90","ME_mean",
               *(["train_secs_sum","train_secs_avg"] if "train_secs_sum" in summary.columns else [])]])


Mounted at /content/drive
[LOAD] 1y1y: error_df=70 rows, all_results=10377296 rows
[LOAD] 2y1y: error_df=70 rows, all_results=10377296 rows
[LOAD] 3y1y: error_df=70 rows, all_results=10377296 rows
  model  WRMSSE  RMSSE_median  RMSSE_p90   ME_mean  train_secs_sum  \
0  1y1y     0.0      0.727222   1.228258 -0.219949     2868.333455   
1  2y1y     0.0      0.717693   1.059117 -0.177910     3284.359483   
2  3y1y     0.0      0.695149   1.047871 -0.151094     3662.511812   

   train_secs_avg  
0       40.976192  
1       46.919421  
2       52.321597  


In [None]:
from ast import literal_eval

def coerce_seriesname(df, group_key, series_col="SeriesName"):
    out = df.copy()
    if series_col in out.columns:
        def _to_tuple(x, row=None):
            if isinstance(x, tuple):
                return x
            if isinstance(x, str):
                try:
                    t = literal_eval(x)
                    if isinstance(t, tuple):
                        return t
                except Exception:
                    pass
            if row is not None and set(group_key).issubset(out.columns):
                return tuple(row[g] for g in group_key)
            return x
        out[series_col] = [ _to_tuple(val, row) for val, (_, row) in zip(out[series_col], out.iterrows()) ]
    else:
        out[series_col] = list(map(tuple, out[group_key].to_numpy()))
    return out

def attach_seriesname(df, group_key, series_col="SeriesName"):
    return coerce_seriesname(df, group_key, series_col)

def summarize_run(error_df, all_results=None, group_key=None, model_name="run", series_col="SeriesName"):
    edf = coerce_seriesname(error_df, group_key, series_col)
    if all_results is not None:
        ar  = coerce_seriesname(all_results, group_key, series_col)
        w   = (ar.groupby(series_col, as_index=False)["true"]
                 .sum().rename(columns={"true":"val_sum_true"}))
        tot = w["val_sum_true"].sum()
        w["weight"] = (1.0/len(w)) if tot <= 0 else w["val_sum_true"]/tot
        edf = edf.merge(w[[series_col,"weight"]], on=series_col, how="left")
        edf["weight"] = edf["weight"].fillna(0.0)
    else:
        edf["weight"] = 1.0 / max(len(edf), 1)

    out = {
        "model":        model_name,
        "n_series":     edf[series_col].nunique(),
        "WRMSSE":       float((edf["RMSSE"] * edf["weight"]).sum()),
        "RMSSE_median": float(edf["RMSSE"].median()),
        "RMSSE_p90":    float(edf["RMSSE"].quantile(0.90)),
        "ME_mean":      float(edf["ME"].mean()) if "ME" in edf else np.nan,
    }
    # Train secs
    if all_results is not None and "train_secs" in all_results.columns:
        ar = coerce_seriesname(all_results, group_key, series_col)
        tr = ar.groupby(series_col)["train_secs"].first()
        out["train_secs_sum"] = float(tr.sum())
        out["train_secs_avg"] = float(tr.mean())

    return pd.DataFrame([out])

summary = compare_runs(runs, group_key)
print(summary[["model","WRMSSE","RMSSE_median","RMSSE_p90","ME_mean",
               *(["train_secs_sum","train_secs_avg"] if "train_secs_sum" in summary.columns else [])]])


  model    WRMSSE  RMSSE_median  RMSSE_p90   ME_mean  train_secs_sum  \
2  3y1y  0.576873      0.695149   1.047871 -0.151094     3662.511812   
1  2y1y  0.589383      0.717693   1.059117 -0.177910     3284.359483   
0  1y1y  0.619323      0.727222   1.228258 -0.219949     2868.333455   

   train_secs_avg  
2       52.321597  
1       46.919421  
0       40.976192  


In [None]:
summary.head()

Unnamed: 0,model,n_series,WRMSSE,RMSSE_median,RMSSE_p90,ME_mean,train_secs_sum,train_secs_avg
2,3y1y,70,0.576873,0.695149,1.047871,-0.151094,3662.511812,52.321597
1,2y1y,70,0.589383,0.717693,1.059117,-0.17791,3284.359483,46.919421
0,1y1y,70,0.619323,0.727222,1.228258,-0.219949,2868.333455,40.976192


In [None]:
summary.reset_index(drop=True)

Unnamed: 0,model,n_series,WRMSSE,RMSSE_median,RMSSE_p90,ME_mean,train_secs_sum,train_secs_avg
0,3y1y,70,0.576873,0.695149,1.047871,-0.151094,3662.511812,52.321597
1,2y1y,70,0.589383,0.717693,1.059117,-0.17791,3284.359483,46.919421
2,1y1y,70,0.619323,0.727222,1.228258,-0.219949,2868.333455,40.976192
