# 04 — Forecast Generation & Business KPIs
Generate final forecasts and translate them to **business decisions**:

- Safety stock & service level approximation

- Promo ROI and post‑promo dip

- Scenario analysis (holiday/promo toggles)


In [1]:

import pandas as pd, numpy as np
from pathlib import Path
from lightgbm import LGBMRegressor

RAW = Path("../data/raw")
train = pd.read_csv(RAW/"train.csv")
stores = pd.read_csv(RAW/"stores.csv")
features = pd.read_csv(RAW/"features.csv")
for df in [train, features]:
    df['Date'] = pd.to_datetime(df['Date'])

df = (train.merge(stores, on='Store', how='left')
           .merge(features, on=['Store','Date'], how='left')
      ).sort_values(['Store','Dept','Date'])
df = df.drop(columns=['IsHoliday_y'])  # remove duplicate IsHoliday
df = df.rename(columns={'IsHoliday_x': 'IsHoliday'})
df['IsHoliday'] = df['IsHoliday'].astype(bool)

# Simple engineered features (use notebook 02 for full feature set)
df['Year'] = df['Date'].dt.year
df['Week'] = df['Date'].dt.isocalendar().week.astype(int)
for k in [1,2,3,4,13,52]:
    df[f'lag_{k}'] = df.groupby(['Store','Dept'])['Weekly_Sales'].shift(k)

y = 'Weekly_Sales'
X_cols = ['IsHoliday','Year','Week'] + [c for c in df.columns if c.startswith('lag_')]

# Train final global model on entire history
train_df = df.dropna(subset=X_cols+[y]).copy()
model = LGBMRegressor(n_estimators=1500, learning_rate=0.02, num_leaves=64, subsample=0.8, colsample_bytree=0.8, random_state=42)
model.fit(train_df[X_cols], train_df[y])

# Next‑week forecast (example)
last_date = df['Date'].max()
future = df[df['Date'] == last_date].copy()
future['Date'] = future['Date'] + np.timedelta64(7, 'D')
# TODO: update lags with latest actuals & planned features (IsHoliday/Markdowns)
pred = model.predict(future[X_cols])
future['Forecast'] = pred
future[['Store','Dept','Date','Forecast']].head()


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003107 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1588
[LightGBM] [Info] Number of data points in the train set: 261083, number of used features: 9
[LightGBM] [Info] Start training from score 16395.584286


Unnamed: 0,Store,Dept,Date,Forecast
142,1,1,2012-11-02,27182.061195
285,1,2,2012-11-02,43039.18133
428,1,3,2012-11-02,9433.466536
571,1,4,2012-11-02,35290.35122
714,1,5,2012-11-02,23100.65482


In [2]:

# Business KPIs — Safety Stock & Service Level (approximation)
# Assuming normal demand with std from rolling residuals
hist = train_df[['Store','Dept','Weekly_Sales']].copy()
hist['naive'] = hist.groupby(['Store','Dept'])['Weekly_Sales'].shift(52)
hist['resid'] = hist['Weekly_Sales'] - hist['naive']
sigma = hist.groupby(['Store','Dept'])['resid'].std().rename('sigma')
f = future.merge(sigma, on=['Store','Dept'], how='left')
z = 1.28  # ~90% service level (adjust per policy)
f['safety_stock'] = z * f['sigma'].fillna(f['Forecast']*0.2)
f[['Store','Dept','Forecast','safety_stock']].head()


Unnamed: 0,Store,Dept,Forecast,safety_stock
0,1,1,27182.061195,11617.960966
1,1,2,43039.18133,2671.327724
2,1,3,9433.466536,7154.416699
3,1,4,35290.35122,2294.78222
4,1,5,23100.65482,6061.890288


In [3]:

# Promo ROI (template): margin during promo minus incremental cost and post‑promo dip
# Placeholders: margin_rate, promo_cost
margin_rate = 0.3
promo_cost = 0.05  # per dollar sold
uplift = f['Forecast'] * 0.08  # assume +8% from planned promo; replace with model delta
post_dip = f['Forecast'] * 0.03  # assume 3% cannibalization
f['promo_roi'] = (uplift * margin_rate) - (uplift * promo_cost) - (post_dip * margin_rate)
f[['Store','Dept','promo_roi']].head()


Unnamed: 0,Store,Dept,promo_roi
0,1,1,299.002673
1,1,2,473.430995
2,1,3,103.768132
3,1,4,388.193863
4,1,5,254.107203
