## Прогноз ринку (three-phase linear)

Зошит повторює ключові кроки `3p_linear_model`: базовий Holt-Winters, сезонні та лагові ознаки, фінальна модель XGBoost для кожної товарної групи.

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

from three_phase_linear import ForecastConfig, run_three_phase_forecast

DATA_PATH = Path('forecast_of_market_dataset.csv')
OUTPUT_PATH = Path('market_three_phase_forecast.csv')
GROUP_COLS = ['product_group_id']
TARGET_COLUMNS = ['market_revenue', 'revenue_amazon']

In [None]:
df = pd.read_csv(DATA_PATH)
df['month'] = pd.to_datetime(df['month'])
df = df.sort_values(GROUP_COLS + ['month']).reset_index(drop=True)

future_counts = df[df['market_revenue'].isna()].groupby(GROUP_COLS).size()
forecast_horizon = int(future_counts.max()) if not future_counts.empty else 12
if forecast_horizon <= 0:
    forecast_horizon = 12

print(f'Горизонт прогнозу: {forecast_horizon} періодів')

In [None]:
prediction_frames = {}
summary_frames = []

for target in TARGET_COLUMNS:
    target_df = df[['month', *GROUP_COLS, target]].copy()
    config = ForecastConfig(
        time_col='month',
        target_col=target,
        group_cols=GROUP_COLS,
        freq='MS',
        forecast_horizon=forecast_horizon,
        seasonal_periods=12,
        min_history=24,
        lags=(1, 2, 3, 6, 12, 18, 24),
        rolling_windows=(3, 6, 12, 24),
        random_search_iterations=10,
        n_splits=4,
        random_state=46,
    )

    preds, summaries = run_three_phase_forecast(target_df, config)
    preds = preds.rename(columns={
        'prediction': f'{target}_forecast',
        f'{target}_holtwinters': f'{target}_baseline',
    })
    prediction_frames[target] = preds

    summary_df = pd.DataFrame({
        'group_key': [s.group_key[0] for s in summaries],
        'train_rows': [s.train_rows for s in summaries],
        'cv_mae': [s.best_score for s in summaries],
        'skipped_reason': [s.skipped_reason for s in summaries],
    })
    summary_df['target'] = target
    summary_frames.append(summary_df)

summary_report = pd.concat(summary_frames, ignore_index=True)
summary_report.head()

In [None]:
result_df = df.copy()
original_masks = {target: result_df[target].isna() for target in TARGET_COLUMNS}

for target, preds in prediction_frames.items():
    merge_cols = [*GROUP_COLS, 'month']
    result_df = result_df.merge(
        preds[merge_cols + [f'{target}_forecast']],
        on=merge_cols,
        how='left'
    )
    result_df[target] = result_df[target].astype(float)
    result_df[target] = result_df[target].fillna(result_df[f'{target}_forecast'])

output_columns = ['month', 'product_group_id', 'product_group_name', 'market_revenue', 'revenue_amazon']
forecast_mask = np.zeros(len(result_df), dtype=bool)
for target, mask in original_masks.items():
    forecast_mask |= mask
final_output = result_df.loc[forecast_mask, output_columns].sort_values(['product_group_id', 'month']).reset_index(drop=True)
final_output.to_csv(OUTPUT_PATH, index=False)

final_output.tail()

In [None]:
summary_report