## Прогноз PCS (three-phase linear)

Прогноз щотижневих продажів по SKU з додатковими регресорами.

In [3]:
from pathlib import Path

import numpy as np
import pandas as pd

from three_phase_linear import ForecastConfig, run_three_phase_forecast

DATA_PATH = Path('dataset_pcs.csv')
OUTPUT_PATH = Path('pcs_three_phase_forecast.csv')
GROUP_COLS = ['sku_id']
TARGET_COLUMN = 'qty_total'
REGRESSORS = [
    'orders_qty', 'total_abc_numeric', 'avg_discount_perc_by_goods',
    'max_discount_perc_by_goods', 'avg_goods_price_by_goods', 'oos__by_goods',
    'war', 'covid', 'sin_quarter', 'cos_quarter', 'sin_month', 'cos_month',
    'sin_week', 'cos_week'
]

In [2]:
df = pd.read_csv(DATA_PATH)
df['period'] = pd.to_datetime(df['period'])
comma_cols = ['avg_discount_perc_by_goods', 'max_discount_perc_by_goods', 'avg_goods_price_by_goods', 'oos__by_goods', 'sin_month', 'cos_month', 'sin_week', 'cos_week']
for col in comma_cols:
    df[col] = df[col].astype(str).str.replace(',', '.', regex=False)
    df[col] = pd.to_numeric(df[col], errors='coerce')
numeric_cols = ['qty_total', 'orders_qty', 'total_abc_numeric', 'war', 'covid', 'sin_quarter', 'cos_quarter']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Mark placeholder rows (future horizon) where target should be forecasted
placeholder_mask = df['last_goods_sell_status'].isna() & df['oos__by_goods'].isna()
df.loc[placeholder_mask, TARGET_COLUMN] = np.nan

df = df.sort_values(GROUP_COLS + ['period']).reset_index(drop=True)

forecast_horizon = int(df.loc[placeholder_mask, 'period'].nunique()) if placeholder_mask.any() else 0
if forecast_horizon <= 0:
    forecast_horizon = 4
print(f'Горизонт прогнозу: {forecast_horizon} тижнів')

Горизонт прогнозу: 134 тижнів


In [3]:
input_cols = ['period', *GROUP_COLS, 'category_id', TARGET_COLUMN, *REGRESSORS]
input_cols = list(dict.fromkeys(input_cols))
config = ForecastConfig(
    time_col='period',
    target_col=TARGET_COLUMN,
    group_cols=GROUP_COLS,
    freq='W-MON',
    forecast_horizon=forecast_horizon,
    seasonal_periods=52,
    min_history=20,
    lags=(1, 2, 3, 4, 8, 12, 16),
    rolling_windows=(3, 4, 8, 12),
    additional_regressors=REGRESSORS,
    random_search_iterations=0,
    n_splits=3,
    n_estimators=300,
    target_transform=np.log1p,
    target_inverse_transform=np.expm1,
    random_state=46,
)

preds, summaries = run_three_phase_forecast(df[input_cols].copy(), config)
preds = preds.rename(columns={
    'prediction': 'qty_total_forecast',
    f'{TARGET_COLUMN}_holtwinters': 'qty_total_baseline',
})
preds['qty_total_forecast'] = preds['qty_total_baseline']
summary_report = pd.DataFrame({
    'group_key': [s.group_key[0] for s in summaries],
    'train_rows': [s.train_rows for s in summaries],
    'cv_mae': [s.best_score for s in summaries],
    'skipped_reason': [s.skipped_reason for s in summaries],
})
summary_report.head()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  aic = self.nobs * np.log(sse / self.nobs) + k * 2
  bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs)
  aic = self.nobs * np.log(sse / self.nobs) + k * 2
  bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates

Unnamed: 0,group_key,train_rows,cv_mae,skipped_reason
0,598294111,119,,
1,598302302,119,,
2,599653937,119,,
3,599703217,119,,
4,746436995,119,,


In [4]:
merge_cols = [*GROUP_COLS, 'period']
forecast_df = preds[merge_cols + ['qty_total_forecast']].copy()
static_map = df[['sku_id', 'category_id']].drop_duplicates()
forecast_df = forecast_df.merge(static_map, on='sku_id', how='left')
forecast_df = forecast_df.rename(columns={'qty_total_forecast': 'qty_total'})
forecast_df = forecast_df.sort_values(GROUP_COLS + ['period']).reset_index(drop=True)
forecast_df.to_csv(OUTPUT_PATH, index=False)

forecast_df.tail()

Unnamed: 0,sku_id,period,qty_total,category_id
6940,48789690935,2025-05-19,0.0,672241
6941,48789690935,2025-05-26,0.0,672241
6942,48789690935,2025-06-02,0.0,672241
6943,48789690935,2025-06-09,0.0,672241
6944,48789690935,2025-06-16,0.0,672241


In [5]:
summary_report

Unnamed: 0,group_key,train_rows,cv_mae,skipped_reason
0,598294111,119,,
1,598302302,119,,
2,599653937,119,,
3,599703217,119,,
4,746436995,119,,
...,...,...,...,...
458,48609967412,119,,
459,48609984414,119,,
460,48788920432,119,,
461,48789681494,119,,
