# DLinear 预测可视化

这个 notebook 用于对比预测值与真实值（`y`），支持 baseline 和 improved 两种结果文件。

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

In [None]:
from pathlib import Path

# 读取数据（可切换文件）
history_path = 'data/data_cleaned.csv'
forecast_candidates = ['forecast_results_deep.csv', 'forecast_results_improved.csv', 'forecast_results.csv']
forecast_path = next((x for x in forecast_candidates if Path(x).exists()), None)
if forecast_path is None:
    raise FileNotFoundError('未找到预测结果文件，请先运行 train.py 或 train_improved.py。')

hist = pd.read_csv(history_path, parse_dates=['ds'])
cv = pd.read_csv(forecast_path, parse_dates=['ds', 'cutoff'])

# 自动识别预测列（优先 hybrid > DLinear > NBEATS）
priority_cols = ['yhat_hybrid', 'DLinear', 'NBEATS']
base_cols = {'unique_id', 'ds', 'cutoff', 'y', 'use_fallback'}
pred_col = None
for c in priority_cols:
    if c in cv.columns:
        pred_col = c
        break
if pred_col is None:
    numeric_cols = [
        c for c in cv.columns
        if c not in base_cols and pd.api.types.is_numeric_dtype(cv[c])
    ]
    if not numeric_cols:
        raise ValueError('预测文件中未找到可用预测列。')
    pred_col = numeric_cols[0]

cv[pred_col] = cv[pred_col].clip(lower=0)
print('history file:', history_path)
print('forecast file:', forecast_path)
print('prediction column:', pred_col)
print('history rows:', len(hist), 'series:', hist['unique_id'].nunique())
print('cv rows:', len(cv), 'cutoff count:', cv['cutoff'].nunique())

In [None]:
# 选择一个 cutoff（默认选最新）
selected_cutoff = cv['cutoff'].max()
cv_cut = cv[cv['cutoff'] == selected_cutoff].copy()
print('selected cutoff:', selected_cutoff.date(), 'rows:', len(cv_cut))

# 选择需要画图的 SKU：按预测期真实销量排序，取前 N 个
top_n = 9
top_ids = (
    cv_cut.groupby('unique_id')['y']
    .sum()
    .sort_values(ascending=False)
    .head(top_n)
    .index
)
top_ids[:5]

In [None]:
# 单 SKU 可视化函数：历史 + 真实未来 + 预测未来
def plot_one(uid, history_months=24):
    h = hist[hist['unique_id'] == uid].sort_values('ds').copy()
    f = cv_cut[cv_cut['unique_id'] == uid].sort_values('ds').copy()
    if f.empty:
        return

    cutoff = f['cutoff'].iloc[0]
    hist_tail = h[h['ds'] <= cutoff].tail(history_months)

    plt.figure(figsize=(10, 4))
    plt.plot(hist_tail['ds'], hist_tail['y'], label='History', marker='o', linewidth=1.5)
    plt.plot(f['ds'], f['y'], label='Actual', marker='o', linewidth=2)
    plt.plot(f['ds'], f[pred_col], label='Forecast', marker='o', linewidth=2)
    plt.axvline(cutoff, color='gray', linestyle='--', alpha=0.8, label='Cutoff')
    plt.title(f'{uid} | cutoff={cutoff.date()}')
    plt.xlabel('Month')
    plt.ylabel('Demand')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# 批量画 top SKU
for uid in top_ids:
    plot_one(uid, history_months=24)

In [None]:
# 聚合层（全量）对比：真实 vs 预测
agg = cv_cut.groupby('ds', as_index=False)[['y', pred_col]].sum()

plt.figure(figsize=(10, 4))
plt.plot(agg['ds'], agg['y'], marker='o', linewidth=2, label='Actual (Total)')
plt.plot(agg['ds'], agg[pred_col], marker='o', linewidth=2, label='Forecast (Total)')
plt.title(f'Aggregate Forecast vs Actual | cutoff={selected_cutoff.date()}')
plt.xlabel('Month')
plt.ylabel('Total Demand')
plt.legend()
plt.tight_layout()
plt.show()

display(agg)

In [None]:
# 基础误差指标（选定 cutoff）
eps = 1e-9
err = (cv_cut['y'] - cv_cut[pred_col]).abs()
mae = err.mean()
rmse = np.sqrt(((cv_cut['y'] - cv_cut[pred_col]) ** 2).mean())
wape = err.sum() / (cv_cut['y'].abs().sum() + eps)
smape = (2 * err / (cv_cut['y'].abs() + cv_cut[pred_col].abs() + eps)).mean()

pd.DataFrame({
    'metric': ['MAE', 'RMSE', 'WAPE', 'sMAPE'],
    'value': [mae, rmse, wape, smape]
})

In [None]:
# 计算验证集中每个 SKU 第一个月预测的准确率
def acc(f, a):
    if f == 0 and a == 0:
        return 1
    if f != 0 and a == 0:
        return 0
    return max(0, 1 - abs(f - a) / a)

# 对当前 selected_cutoff：每个 SKU 只取预测期第一个月（最小 ds）
first_idx = cv_cut.groupby('unique_id')['ds'].idxmin()
first_month_df = cv_cut.loc[first_idx, ['unique_id', 'ds', 'y', pred_col]].copy()
first_month_df = first_month_df.rename(columns={pred_col: 'forecast'})
first_month_df['accuracy'] = first_month_df.apply(lambda r: acc(r['forecast'], r['y']), axis=1)

overall_acc = first_month_df['accuracy'].mean()
print(f'cutoff={selected_cutoff.date()} | sku数={len(first_month_df)} | first-month mean accuracy={overall_acc:.5f}')
display(first_month_df.head(10))
display(first_month_df['accuracy'].describe())