## Market forecast (three-phase linear model)

This notebook mirrors the main stages of `3p_linear_model`: a Holt-Winters baseline, engineered seasonal and lag features, and a final XGBoost model per product group.

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

from three_phase_linear import ForecastConfig, run_three_phase_forecast

DATA_PATH = Path('forecast_of_market_dataset.csv')
OUTPUT_PATH = Path('market_three_phase_forecast.csv')
GROUP_COLS = ['product_group_id']
TARGET_COLUMNS = ['market_revenue', 'revenue_amazon']

In [2]:
df = pd.read_csv(DATA_PATH)
df['month'] = pd.to_datetime(df['month'])
df = df.sort_values(GROUP_COLS + ['month']).reset_index(drop=True)

future_counts = df[df['market_revenue'].isna()].groupby(GROUP_COLS).size()
forecast_horizon = int(future_counts.max()) if not future_counts.empty else 12
if forecast_horizon <= 0:
    forecast_horizon = 12

print(f'Forecast horizon: {forecast_horizon} periods')

Forecast horizon: 12 periods


In [3]:
prediction_frames = {}
summary_frames = []

for target in TARGET_COLUMNS:
    target_df = df[['month', *GROUP_COLS, target]].copy()
    config = ForecastConfig(
        time_col='month',
        target_col=target,
        group_cols=GROUP_COLS,
        freq='MS',
        forecast_horizon=forecast_horizon,
        seasonal_periods=12,
        min_history=24,
        lags=(1, 2, 3, 6, 12, 18, 24),
        rolling_windows=(3, 6, 12, 24),
        random_search_iterations=30,
        n_splits=4,
        random_state=46,
    )

    preds, summaries = run_three_phase_forecast(target_df, config)
    preds = preds.rename(columns={
        'prediction': f'{target}_forecast',
        f'{target}_holtwinters': f'{target}_baseline',
    })
    prediction_frames[target] = preds

    summary_df = pd.DataFrame({
        'group_key': [s.group_key[0] for s in summaries],
        'train_rows': [s.train_rows for s in summaries],
        'cv_mae': [s.best_score for s in summaries],
        'skipped_reason': [s.skipped_reason for s in summaries],
    })
    summary_df['target'] = target
    summary_frames.append(summary_df)

summary_report = pd.concat(summary_frames, ignore_index=True)
summary_report.head()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Unnamed: 0,group_key,train_rows,cv_mae,skipped_reason,target
0,1,72,1616506000.0,,market_revenue
1,2,72,1055578000.0,,market_revenue
2,3,72,225211500.0,,market_revenue
3,4,72,945632100.0,,market_revenue
4,5,72,257202500.0,,market_revenue


In [4]:
result_df = df.copy()

for target, preds in prediction_frames.items():
    merge_cols = [*GROUP_COLS, 'month']
    result_df = result_df.merge(
        preds[merge_cols + [f'{target}_forecast']],
        on=merge_cols,
        how='left'
    )
    result_df[target] = result_df[target].astype(float)
    result_df[target] = result_df[target].fillna(result_df[f'{target}_forecast'])

output_columns = ['month', 'product_group_id', 'product_group_name', 'market_revenue', 'revenue_amazon']
final_output = result_df[output_columns].sort_values(['product_group_id', 'month']).reset_index(drop=True)
final_output.to_csv(OUTPUT_PATH, index=False)

final_output.tail()

Unnamed: 0,month,product_group_id,product_group_name,market_revenue,revenue_amazon
2431,2025-08-01,43,Communication Gadgets,1574394000.0,399830720.0
2432,2025-09-01,43,Communication Gadgets,1367154000.0,372070944.0
2433,2025-10-01,43,Communication Gadgets,1595098000.0,382852096.0
2434,2025-11-01,43,Communication Gadgets,1647955000.0,369745504.0
2435,2025-12-01,43,Communication Gadgets,1513647000.0,401998912.0


In [5]:
summary_report

Unnamed: 0,group_key,train_rows,cv_mae,skipped_reason,target
0,1,72,1616506000.0,,market_revenue
1,2,72,1055578000.0,,market_revenue
2,3,72,225211500.0,,market_revenue
3,4,72,945632100.0,,market_revenue
4,5,72,257202500.0,,market_revenue
5,6,72,208611300.0,,market_revenue
6,7,72,0.0,,market_revenue
7,8,72,228789300.0,,market_revenue
8,9,72,229924300.0,,market_revenue
9,10,72,210856000.0,,market_revenue
