## PCS forecast (three-phase linear model)

Forecast weekly quantities per SKU with additional regressors.

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

from three_phase_linear import ForecastConfig, run_three_phase_forecast

DATA_PATH = Path('dataset_pcs.csv')
OUTPUT_PATH = Path('pcs_three_phase_forecast.csv')
GROUP_COLS = ['sku_id']
TARGET_COLUMN = 'qty_total'
REGRESSORS = [
    'orders_qty', 'total_abc_numeric', 'avg_discount_perc_by_goods',
    'max_discount_perc_by_goods', 'avg_goods_price_by_goods', 'oos__by_goods',
    'war', 'covid', 'sin_quarter', 'cos_quarter', 'sin_month', 'cos_month',
    'sin_week', 'cos_week'
]

In [None]:
df = pd.read_csv(DATA_PATH)
df['period'] = pd.to_datetime(df['period'])
comma_cols = ['avg_discount_perc_by_goods', 'max_discount_perc_by_goods', 'avg_goods_price_by_goods', 'oos__by_goods', 'sin_month', 'cos_month', 'sin_week', 'cos_week']
for col in comma_cols:
    df[col] = df[col].astype(str).str.replace(',', '.', regex=False)
    df[col] = pd.to_numeric(df[col], errors='coerce')
numeric_cols = ['qty_total', 'orders_qty', 'total_abc_numeric', 'war', 'covid', 'sin_quarter', 'cos_quarter']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Mark placeholder rows (future horizon) where target should be forecasted
placeholder_mask = df['last_goods_sell_status'].isna() & df['oos__by_goods'].isna()
df.loc[placeholder_mask, TARGET_COLUMN] = np.nan

df = df.sort_values(GROUP_COLS + ['period']).reset_index(drop=True)

forecast_horizon = 4
print(f'Forecast horizon: {forecast_horizon} periods (weeks)')

In [None]:
input_cols = ['period', *GROUP_COLS, 'category_id', TARGET_COLUMN, *REGRESSORS]
input_cols = list(dict.fromkeys(input_cols))
config = ForecastConfig(
    time_col='period',
    target_col=TARGET_COLUMN,
    group_cols=GROUP_COLS,
    freq='W-MON',
    forecast_horizon=forecast_horizon,
    seasonal_periods=52,
    min_history=20,
    lags=(1, 2, 3, 4, 8, 12, 16),
    rolling_windows=(3, 4, 8, 12),
    additional_regressors=REGRESSORS,
    random_search_iterations=25,
    n_splits=4,
    random_state=46,
)

preds, summaries = run_three_phase_forecast(df[input_cols].copy(), config)
preds = preds.rename(columns={
    'prediction': 'qty_total_forecast',
    f'{TARGET_COLUMN}_holtwinters': 'qty_total_baseline',
})
summary_report = pd.DataFrame({
    'group_key': [s.group_key[0] for s in summaries],
    'train_rows': [s.train_rows for s in summaries],
    'cv_mae': [s.best_score for s in summaries],
    'skipped_reason': [s.skipped_reason for s in summaries],
})
summary_report.head()

In [None]:
merge_cols = [*GROUP_COLS, 'period']
result_df = df[['period', *GROUP_COLS, 'category_id', TARGET_COLUMN]].copy()
forecast_df = preds[merge_cols + ['qty_total_forecast']]
static_map = df[['sku_id', 'category_id']].drop_duplicates()
forecast_df = forecast_df.merge(static_map, on='sku_id', how='left')
result_df = result_df.rename(columns={TARGET_COLUMN: 'qty_total_actual'})
result_df['source'] = 'history'
forecast_df = forecast_df.rename(columns={'qty_total_forecast': 'qty_total'})
forecast_df['source'] = 'forecast'
combined = pd.concat([
    result_df.rename(columns={'qty_total_actual': 'qty_total'}),
    forecast_df[['period', *GROUP_COLS, 'category_id', 'qty_total', 'source']]
], ignore_index=True, sort=False)
combined = combined.sort_values(GROUP_COLS + ['period']).reset_index(drop=True)
combined.to_csv(OUTPUT_PATH, index=False)

combined.tail()

In [None]:
summary_report