
# MGMT 298D — Minimal Lab (v10): Embeddings Ablation

**Goal:** Compare three models on monthly demand  
A) Temporal features only · B) Temporal + embeddings · C) Temporal + embeddings × (seasonality & lifecycle)

> Preprocessing lives in `hm_helper_v10.py` (separate file). Students do **not** need to open it.


#### (If needed) One-time installs

In [None]:

#@markdown _(Colab usually has these. Run only if you see import errors.)_
# !pip install -q pandas numpy scikit-learn transformers torch matplotlib


### 1) Settings

In [None]:

#@title Run settings
CSV_PATH = "HandMSales.csv" #@param {type:"string"}
PRODUCT_TYPE_FILTER = None  #@param {type:"string"}
N_PRODUCTS = 5000           #@param {type:"integer"}
SAMPLE = "random"           #@param ["random", "top"]
LIMIT_EMBED_DIMS = None     #@param {type:"raw"}
DEV_SKIP_EMBED = False      #@param {type:"boolean"}
RANDOM_STATE = 0            #@param {type:"integer"}


### 2) Run the three models (A/B/C)

In [None]:

# Ensure hm_helper_v10.py is in the same folder (upload once in Colab).
from hm_helper_v10 import load_and_prepare, design_matrices, fit_lasso

print("[INFO] Preparing data…")
train, test = load_and_prepare(CSV_PATH, PRODUCT_TYPE_FILTER, N_PRODUCTS, SAMPLE, RANDOM_STATE)
print(f"[INFO] Train months: {train['month_ts'].min().date()} → {train['month_ts'].max().date()} | rows: {len(train)}")
print(f"[INFO] Test  months: {test['month_ts'].min().date()}  → {test['month_ts'].max().date()}  | rows: {len(test)}")

# Model A — temporal
XA_tr, XA_te, y_tr, y_te, y_tr_log = design_matrices(
    train, test,
    include_numeric=True,
    include_month_ohe=True,
    include_channel_ohe=True,
    include_meta_ohe=False,
    include_embeddings=False,
    dev_skip_embed=False
)
metrics_A = fit_lasso(XA_tr, y_tr_log, XA_te, y_te, "Model A (Temporal)")

# Model B — temporal + embeddings
XB_tr, XB_te, *_ = design_matrices(
    train, test,
    include_numeric=True,
    include_month_ohe=True,
    include_channel_ohe=True,
    include_meta_ohe=False,
    include_embeddings=(not DEV_SKIP_EMBED),
    limit_embed_dims=LIMIT_EMBED_DIMS,
    inter_month=False,
    inter_age=False,
    inter_channel=False,
    model_name="openai/clip-vit-base-patch32",
    dev_skip_embed=DEV_SKIP_EMBED
)
metrics_B = fit_lasso(XB_tr, y_tr_log, XB_te, y_te, "Model B (Temporal + Embeddings)")

# Model C — temporal + embeddings × (sin(month), is_new_3m)
XC_tr, XC_te, *_ = design_matrices(
    train, test,
    include_numeric=True,
    include_month_ohe=True,
    include_channel_ohe=True,
    include_meta_ohe=False,
    include_embeddings=(not DEV_SKIP_EMBED),
    limit_embed_dims=LIMIT_EMBED_DIMS,
    inter_month=True,
    inter_age=True,
    inter_channel=False,
    model_name="openai/clip-vit-base-patch32",
    dev_skip_embed=DEV_SKIP_EMBED
)
metrics_C = fit_lasso(XC_tr, y_tr_log, XC_te, y_te, "Model C (Temporal + Embeddings × Seasonality/Lifecycle)")

print("\n=== RESULTS (R² / RMSE / MAE) ===")
print(f"A: R²={metrics_A['r2']:.3f}  RMSE={metrics_A['rmse']:.3f}  MAE={metrics_A['mae']:.3f}")
print(f"B: R²={metrics_B['r2']:.3f}  RMSE={metrics_B['rmse']:.3f}  MAE={metrics_B['mae']:.3f}")
print(f"C: R²={metrics_C['r2']:.3f}  RMSE={metrics_C['rmse']:.3f}  MAE={metrics_C['mae']:.3f}")

# Simple bar chart of test R²
import matplotlib.pyplot as plt
labels = ["A: Temporal", "B: +Emb", "C: +Emb×(sin,new3m)"]
r2s = [metrics_A["r2"], metrics_B["r2"], metrics_C["r2"]]
plt.figure(figsize=(5.5,3.5))
plt.bar(labels, r2s)
plt.ylabel("R²"); plt.title("Embeddings Ablation — Test Performance")
plt.show()
