In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random
import torch
from datetime import datetime, timedelta
from typing import List, Tuple, Union, Dict
import time

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

from darts import TimeSeries
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.dataprocessing.transformers.scaler import Scaler
from darts.models import TiDEModel, NaiveMovingAverage, TFTModel, NHiTSModel, TSMixerModel
from darts.metrics import mae, mse, smape
from darts.utils.losses import MAELoss, MapeLoss, SmapeLoss
import darts

from pytorch_lightning.callbacks.early_stopping import EarlyStopping

import torch
import torch.nn.functional as F
import torchmetrics
from torch import nn

from tqdm.auto import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def set_global_seed(seed: int = 42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # CuDNN 연산을 deterministic하게 만들어 주지만, 약간 느려질 수 있음
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 시드 값 고정
set_global_seed()

In [4]:
def to_darts_time_series_group(
    dataset: pd.DataFrame,
    target: Union[List[str],str],
    time_col: str,
    group_cols: Union[List[str],str],
    static_cols: Union[List[str],str]=None,
    past_cols: Union[List[str],str]=None,
    future_cols: Union[List[str],str]=None,
    freq: str=None,
    encode_static_cov: bool=True,
)-> Tuple[List[TimeSeries], List[TimeSeries], List[TimeSeries], List[TimeSeries]]:

    series_raw = TimeSeries.from_group_dataframe(
    dataset,
    time_col    =   time_col,
    group_cols  =   group_cols,  # individual time series are extracted by grouping `df` by `group_cols`
    static_cols =   static_cols,  # also extract these additional columns as static covariates (without grouping)
    value_cols  =   target,  # optionally, specify the time varying columns
    n_jobs      =   -1,
    verbose     =   False,
    freq        =   freq,
    )

    if encode_static_cov:
        static_cov_transformer = StaticCovariatesTransformer()
        series_encoded = static_cov_transformer.fit_transform(series_raw)
    else: series_encoded = []

    if past_cols:
        past_cov = TimeSeries.from_group_dataframe(
            dataset,
            time_col    =   time_col,
            group_cols  =   group_cols,
            value_cols  =   past_cols,
            n_jobs      =   -1,
            verbose     =   False,
            freq        =   freq,
            )
    else: past_cov = []

    if future_cols:
        future_cov = TimeSeries.from_group_dataframe(
            dataset,
            time_col    =   time_col,
            group_cols  =   group_cols,
            value_cols  =   future_cols,
            n_jobs      =   -1,
            verbose     =   False,
            freq        =   freq,
            )
    else: future_cov = []

    return series_raw, series_encoded, past_cov, future_cov

def split_grouped_darts_time_series(
    series: List[TimeSeries],
    split_date: Union[str, pd.Timestamp],
    min_date: Union[str, pd.Timestamp]=None,
    max_date: Union[str, pd.Timestamp]=None,
) -> Tuple[List[TimeSeries], List[TimeSeries]]:

    if min_date:
       raw_series = series.copy()
       series = []
       for s in raw_series:
        try: series.append(s.split_before(pd.Timestamp(min_date)-timedelta(1))[1])
        except: series.append(s)

    if max_date:
       raw_series = series.copy()
       series = []
       for s in raw_series:
        try: series.append(s.split_before(pd.Timestamp(max_date))[0])
        except: series.append(s)

    split_0 = [s.split_before(pd.Timestamp(split_date))[0] for s in series]
    split_1 = [s.split_before(pd.Timestamp(split_date))[1] for s in series]
    return split_0, split_1

def eval_forecasts(
    pred_series: Union[List[TimeSeries], TimeSeries],
    test_series: Union[List[TimeSeries], TimeSeries],
    error_metric: darts.metrics,
    plot: bool=False
) -> List[float]:

    errors = error_metric(test_series, pred_series)
    print(errors)
    if plot:
        plt.figure()
        plt.hist(errors, bins=50)
        plt.ylabel("Count")
        plt.xlabel("Error")
        plt.title(f"Mean error: {np.mean(errors):.3f}")
        plt.show()
        plt.close()
    return errors

def fit_mixed_covariates_model(
    model_cls,
    common_model_args: dict,
    specific_model_args: dict,
    model_name: str,
    past_cov: Union[List[TimeSeries], TimeSeries],
    future_cov: Union[List[TimeSeries], TimeSeries],
    train_series: Union[List[TimeSeries], TimeSeries],
    val_series: Union[List[TimeSeries], TimeSeries]=None,
    max_samples_per_ts: int=None,
    save:bool=False,
    path:str="",
):

    # Declarare model
    model = model_cls(model_name=model_name,
                    **common_model_args,
                    **specific_model_args)

    # Train model
    model.fit(
                    # TRAIN ARGS ===================================
                    series                = train_series,
                    past_covariates       = past_cov,
                    future_covariates     = future_cov,
                    max_samples_per_ts    = max_samples_per_ts,
                    # VAL ARGS ======================================
                    val_series            = val_series,
                    val_past_covariates   = past_cov,
                    val_future_covariates = future_cov,
                )

    if save: model.save(path)

def backtesting(model, series, past_cov, future_cov, start_date, horizon, stride):
  historical_backtest = model.historical_forecasts(
    series, past_cov, future_cov,
    start=start_date,
    forecast_horizon=horizon,
    stride=stride,  # Predict every N months
    retrain=False,  # Keep the model fixed (no retraining)
    overlap_end=False,
    last_points_only=False
  )
  maes = model.backtest(series, historical_forecasts=historical_backtest, metric=mae)

  return np.mean(maes)

def process_predictions(
    preds: List[TimeSeries],
    series_raw: List[TimeSeries],
    group_cols: List[str]
) -> pd.DataFrame:

    list_df = [serie.pd_dataframe() for serie in preds]
    for i in range(len(list_df)):
      list_df[i]['Date'] = preds[i].time_index
      for j in range(len(group_cols)):
        list_df[i][group_cols[j]] = series_raw[i].static_covariates[group_cols[j]].values[0]
    processed_preds =  pd.concat(list_df, ignore_index=True)
    return processed_preds

def price_weighted_mae(predictions, targets, prices):
    """
    Compute the price-weighted Mean Absolute Error (MAE).

    :param predictions: A list or 1D NumPy array of predicted values.
    :param targets: A list or 1D NumPy array of actual (ground truth) values.
    :param prices: A list or 1D NumPy array of prices corresponding to the targets.
    :return: The price-weighted MAE as a float.
    """
    # Ensure inputs are NumPy arrays
    predictions = np.array(predictions, dtype=np.float32)
    targets = np.array(targets, dtype=np.float32)
    prices = np.array(prices, dtype=np.float32)

    # Compute absolute error
    error = np.abs(targets - predictions)

    # Compute price-weighted error
    weighted_error = error * prices

    # Compute and return the mean of the weighted error
    return np.mean(weighted_error)

def local_iqr_clip(series, window=30, q1=0.25, q3=0.75, m=2.5):
    roll_q1 = series.rolling(window, center=True).quantile(q1)
    roll_q3 = series.rolling(window, center=True).quantile(q3)
    iqr = roll_q3 - roll_q1
    upper = roll_q3 + m * iqr
    return series.clip(0, upper)

class MultiTaskLossModule(nn.Module):
    def __init__(self):
        super(MultiTaskLossModule, self).__init__()
        self.alpha = 1.0
        self.beta = 1.0
        self.gamma = 1.0

    def forward(self, y_pred, y_true):
        # 내부에 바로 구현
        reg_pred   = y_pred[..., :2]
        reg_true   = y_true[..., :2]
        # cls_pred   = y_pred[..., 2]
        # cls_true   = y_true[..., 2]

        loss_demand   = F.mse_loss(reg_pred[..., 0], reg_true[..., 0])
        loss_discount = F.mse_loss(reg_pred[..., 1], reg_true[..., 1])
        # loss_cls      = F.binary_cross_entropy_with_logits(cls_pred, cls_true)

        return self.alpha*loss_demand + self.beta*loss_discount #+ self.gamma*loss_cls


In [5]:
class EventWeightedMultiTaskLoss(nn.Module):
    """
    Darts 프레임워크에 최적화된 이벤트 가중 멀티태스크 손실 함수.

    이 손실 함수는 is_event 정보를 타겟 텐서의 일부로 받아,
    이벤트 기간 동안의 수요(demand) 및 할인율(discount_pct) 예측 오차에
    더 높은 페널티를 부여하도록 설계되었습니다.
    """
    def __init__(self, demand_weight: float = 1.0, discount_weight: float = 0.1, event_penalty: float = 500.0):
        """
        손실 함수의 가중치를 초기화합니다.

        Args:
            demand_weight (float): 수요 예측 손실에 대한 기본 가중치.
            discount_weight (float): 할인율 예측 손실에 대한 기본 가중치.
            event_penalty (float): 이벤트 기간의 오차에 적용할 추가 페널티 배수.
        """
        super().__init__()
        self.demand_weight = demand_weight
        self.discount_weight = discount_weight
        self.event_penalty = event_penalty
        
        # 이상치에 강건한 Huber Loss를 사용 (reduction='none'으로 설정)
        # self.loss_fn = nn.HuberLoss(reduction='none')
        self.loss_fn = nn.MSELoss(reduction='none')

    def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
        """
        가중 손실을 계산합니다.

        Darts 모델 학습 시 이 함수가 호출되며, y_pred와 y_true는 
        모델의 출력 차원과 타겟 시계열의 컴포넌트 수에 맞춰 전달됩니다.

        Args:
            y_pred (torch.Tensor): 모델의 예측값 텐서.
                                   - y_pred[..., 0]: 예측된 demand
                                   - y_pred[..., 1]: 예측된 discount_pct
                                   - y_pred[..., 2]: (무시할) 예측된 is_event
            y_true (torch.Tensor): 실제값 텐서.
                                   - y_true[..., 0]: 실제 demand
                                   - y_true[..., 1]: 실제 discount_pct
                                   - y_true[..., 2]: 실제 is_event 플래그 (0 또는 1)
        
        Returns:
            torch.Tensor: 최종 계산된 스칼라 손실 값.
        """
        # --- 1. 텐서에서 각 변수 분리 ---
        pred_demand = y_pred[..., 0]
        pred_discount = y_pred[..., 1]
        # 모델의 is_event 예측값(y_pred[..., 2])은 사용하지 않으므로 무시합니다.

        true_demand = y_true[..., 0]
        true_discount = y_true[..., 1]
        true_is_event_flag = y_true[..., 2]

        # --- 2. 이벤트 가중치 텐서 생성 ---
        # is_event가 1인 위치는 event_penalty 값을, 아닌 곳은 1.0 값을 가짐
        weights = torch.ones_like(true_is_event_flag)
        weights[true_is_event_flag == 1] = self.event_penalty

        # --- 3. 각 태스크의 가중 손실 계산 ---
        # 3.1. 수요(demand) 손실 계산
        loss_demand_samples = self.loss_fn(pred_demand, true_demand)
        weighted_demand_loss = (loss_demand_samples * weights).mean()

        # 3.2. 할인율(discount) 손실 계산
        loss_discount_samples = self.loss_fn(pred_discount, true_discount)
        weighted_discount_loss = (loss_discount_samples * weights).mean()
        
        # --- 4. 최종 손실 조합 ---
        total_loss = (self.demand_weight * weighted_demand_loss + 
                      self.discount_weight * weighted_discount_loss)
        
        return total_loss

In [6]:
TEST_DATE = pd.Timestamp('2023-01-01')
VAL_DATE_OUT = pd.Timestamp('2022-01-01')
VAL_DATE_IN = pd.Timestamp('2021-01-01')
# MIN_TRAIN_DATE = pd.Timestamp('2015-06-01')

dataset = pd.read_csv("data/all_master.csv", parse_dates=["date"])
dataset = dataset.sort_values(by=["city", "sku", "date"])

In [7]:
# 1. 이벤트 기간 정의 (기간 및 미래 예측 시나리오)
event_periods = {
    2018: {"country": "KOR", "start_date": "2018-02-05", "end_date": "2018-03-26"},
    2019: {"country": "JPN", "start_date": "2019-01-09", "end_date": "2019-03-08"},
    2020: {"country": "USA", "start_date": "2020-01-07", "end_date": "2020-04-28"},
    2021: {"country": "USA", "start_date": "2021-02-19", "end_date": "2021-06-10"},
    2022: {"country": "KOR", "start_date": "2022-01-17", "end_date": "2022-04-18"},
    # --- 미래 예측 시나리오 ---
    2023: {"country": "AUS", "start_date": "2023-07-25", "end_date": "2023-08-13"},
    2024: {"country": "DEU", "start_date": "2024-02-13", "end_date": "2024-04-15"}
}

# 3. 동적 피쳐(Dynamic Features) 생성
# 컬럼 초기화
dataset['is_event'] = 0
dataset['days_since_event_start'] = -1
dataset['event_countdown'] = -1
dataset['event_progress_normalized'] = 0.0
dataset['event_peak_proximity'] = 0.0

# 이벤트 기간을 순회하며 피쳐 생성 (모든 sku, city에 일괄 적용)
for year, event_info in tqdm(event_periods.items()):
    country = event_info['country']
    start_date = pd.to_datetime(event_info['start_date'])
    end_date = pd.to_datetime(event_info['end_date'])

    # 카운트다운 피쳐
    countdown_start_date = start_date - pd.Timedelta(days=14)
    countdown_mask = (
        (dataset['country'] == country) &
        (dataset['date'] >= countdown_start_date) &
        (dataset['date'] < start_date)
    )
    dataset.loc[countdown_mask, 'event_countdown'] = (start_date - dataset.loc[countdown_mask, 'date']).dt.days

    # 이벤트 기간 내 피쳐
    event_mask = (
        (dataset['country'] == country) &
        (dataset['date'] >= start_date) &
        (dataset['date'] <= end_date)
    )
    dataset.loc[event_mask, 'is_event'] = 1
    dataset.loc[event_mask, 'days_since_event_start'] = (dataset.loc[event_mask, 'date'] - start_date).dt.days
    
    event_duration = (end_date - start_date).days
    if event_duration > 0:
        days_from_start = dataset.loc[event_mask, 'days_since_event_start']
        dataset.loc[event_mask, 'event_progress_normalized'] = days_from_start / event_duration
        
        midpoint = event_duration / 2.0
        dataset.loc[event_mask, 'event_peak_proximity'] = 1 - np.abs(days_from_start - midpoint) / midpoint


# 4. 정적 피쳐(Static Features) 생성
# 4.1. is_key_market 피쳐
key_markets = ['KOR', 'USA', 'JPN']
dataset['is_key_market'] = dataset['country'].apply(lambda x: 1 if x in key_markets else 0)

# 4.2. event_responsiveness_score 피쳐 (국가별 집계 데이터로 계산 후 매핑)
df_agg = pd.read_csv('aggregated_df.csv') # 국가별 집계 데이터 로드
df_agg['date'] = pd.to_datetime(df_agg['date'])
train_agg = df_agg[df_agg['date'] < '2023-01-01'].copy()
train_agg['is_event'] = 0

# 집계 데이터에 이벤트 기간 표시
for year, event_info in event_periods.items():
    if year < 2023:
        event_mask_agg = (
            (train_agg['country'] == event_info['country']) &
            (train_agg['date'] >= event_info['start_date']) &
            (train_agg['date'] <= event_info['end_date'])
        )
        train_agg.loc[event_mask_agg, 'is_event'] = 1

# 국가별 반응성 점수 계산
responsiveness = {}
for country in dataset['country'].unique():
    if country in key_markets:
        event_demand = train_agg[(train_agg['country'] == country) & (train_agg['is_event'] == 1)]['demand'].mean()
        non_event_demand = train_agg[(train_agg['country'] == country) & (train_agg['is_event'] == 0)]['demand'].mean()
        responsiveness[country] = event_demand / non_event_demand if non_event_demand > 0 else 1.0
    else:
        responsiveness[country] = 1.0 # 이벤트가 없었던 국가는 1

dataset['event_responsiveness_score'] = dataset['country'].map(responsiveness)

100%|██████████| 7/7 [00:01<00:00,  6.50it/s]


In [8]:
preprocess = 'none'
# preprocess = 'iqr'
dataset = pd.get_dummies(dataset, columns=['season'], prefix='season')
# dataset = dataset[dataset['days_since_launch'] > 0]
condition = (dataset['days_since_launch'] < 0) & (dataset['date'] < '2023-01-01')
dataset.loc[condition, 'discount_pct'] = 0.0

dataset['month'] = dataset['date'].dt.month
dataset['day']   = dataset['date'].dt.day 
dataset['year']  = dataset['date'].dt.year
dataset['day_of_week'] = dataset['date'].dt.dayofweek

dataset['time_index'] = (dataset['date'] - pd.Timestamp('2018-01-01')).dt.days
dataset['time_index'] = dataset['time_index'].astype(np.float32)


target_col = ['demand', 'discount_pct', 'is_event' ]
time_col = 'date'
group_cols = ['sku','city']

drop_cols = [
    # static
    # 'country', 
    'category', 'family', 'storage_gb', 'colour', 
    # numeric
    # 'season_Fall', 'season_Spring', 'season_Summer', 'season_Winter', 'is_holiday',
    'avg_temp', 'humidity', 'precip_mm',
    'rain_mm', 'snow_mm', 'snow_depth_cm', 'pressure_msl', 'cloud_cover',
    'wind_speed_avg', 'wind_speed_max', 'wind_gust_max', 'wind_dir_mode',
    'shortwave_rad_MJ', 'vpd', 'cdd18', 'delta_temp',
    'delta_humidity',
]
drop_cols = []
# past_cols = ['EMA_30', 'MA_30']
# future_cols <=> past_cols N-HiTS
past_cols= []
future_cols = ['season_Fall', 'season_Spring', 'season_Summer', 'season_Winter', 'is_holiday','avg_temp', 'min_temp', 'max_temp', 'dewpoint', 'humidity', 'precip_mm',
       'rain_mm', 'snow_mm', 'snow_depth_cm', 'pressure_msl', 'cloud_cover',
       'wind_speed_avg', 'wind_speed_max', 'wind_gust_max', 'wind_dir_mode',
       'shortwave_rad_MJ', 'vpd', 'hdd18', 'cdd18', 'delta_temp',
       'delta_humidity', 'brent_usd', 'local_fx', 'spend_usd', 'days_since_launch',
       'month', 'day', "year", "day_of_week", "time_index",
       'is_event', 'event_countdown', 'days_since_event_start', 'event_progress_normalized', 'event_peak_proximity',
]
static_cols = ['country', 'category', 'family', 'storage_gb', 'colour', 'unit_price', 'life_days', 'is_key_market', 'event_responsiveness_score']

dataset = dataset.drop(columns=drop_cols)
future_cols = [col for col in future_cols if col not in drop_cols]
static_cols = [col for col in static_cols if col not in drop_cols]


if preprocess == 'clip':
    print('clip')
    low, high = dataset['demand'].quantile([0.00,0.95])
    dataset['demand'] = dataset['demand'].clip(low, high)
elif preprocess == 'iqr':
    print('iqr')
    dataset['demand'] = local_iqr_clip(dataset['demand'])
dataset['demand'] = np.log1p(dataset['demand'])
dataset['discount_pct'] = dataset['discount_pct']/100

series_raw, series, past_cov, future_cov = to_darts_time_series_group(
    dataset=dataset,
    target=target_col,
    time_col=time_col,
    group_cols=group_cols,
    past_cols=past_cols,
    future_cols=future_cols,
    static_cols=static_cols,
    freq='D', # daily
    encode_static_cov=True, # so that the models can use the categorical variables (Agency & Product)
)

In [9]:
train_val, test = split_grouped_darts_time_series(
    series=series,
    split_date=TEST_DATE
)

train, _ = split_grouped_darts_time_series(
    series=train_val,
    split_date=VAL_DATE_OUT
)

_, val = split_grouped_darts_time_series(
    series=train_val,
    split_date=VAL_DATE_IN
)

In [None]:

early_stopping_args = {
    "monitor": "val_loss",
    "patience": 10,
    "min_delta": 1e-3,
    "mode": "min",
}

pl_trainer_kwargs = {
    "max_epochs": 100,
    "accelerator": "gpu", 
    "callbacks": [EarlyStopping(**early_stopping_args)],
    "enable_progress_bar":True
}

common_model_args = {
    # "output_chunk_length": 28,
    # "input_chunk_length": 365,
    "output_chunk_length": 7,
    "input_chunk_length": 30,
    "pl_trainer_kwargs": pl_trainer_kwargs,
    "save_checkpoints": True,  # checkpoint to retrieve the best performing model state,
    "force_reset": True,
    "batch_size": 512,
    "random_state": 42,
}

encoders = {
    "position": {"past": ["relative"], "future": ["relative"]},
    "transformer": Scaler(),
}

best_hp = {
 'optimizer_kwargs': {'lr':0.001},
 'lr_scheduler_cls':ReduceLROnPlateau,   # <<<--- 사용할 스케줄러 클래스 직접 지정
 'lr_scheduler_kwargs':{                 # <<<--- 스케줄러에 전달할 파라미터 딕셔너리
     'monitor': 'val_loss',
     'patience': 5,
     'factor': 0.2, 
     'threshold': 1e-4,
 },   
 'loss_fn': EventWeightedMultiTaskLoss(),
 'use_layer_norm': True,
 'use_reversible_instance_norm': True,
 'add_encoders':encoders,
 }

In [12]:
past_cov = None if not past_cov else past_cov

start = time.time()
## COMMENT TO LOAD PRE-TRAINED MODEL
fit_mixed_covariates_model(
    model_cls = TiDEModel,
    common_model_args = common_model_args,
    specific_model_args = best_hp,
    model_name = 'TiDE_model',
    past_cov = past_cov,
    future_cov = future_cov,
    train_series = train,
    # train_series=train_val,
    val_series = val,
    # val_series=None,
)
time_tide = time.time() - start

IndexError: list index out of range

In [None]:
best_tide = TiDEModel.load_from_checkpoint(model_name='TiDE_model', best=True)
preds_tide = best_tide.predict(
    series            = train_val,
    past_covariates   = past_cov,
    future_covariates = future_cov,
    n                 = test[0].n_timesteps 
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  4.00it/s]


In [13]:
from tqdm import tqdm

group_cols = ["sku","city"]  # 예시
groups_df = (
    dataset
    .loc[:, group_cols]
    .drop_duplicates()
    .sort_values(by=group_cols)   # from_group_dataframe 도 내부적으로 정렬하므로
    .reset_index(drop=True)
)

result = []
for i in tqdm(range(len(groups_df))):
    group_id = groups_df.iloc[i]
    
    pred = preds_tide[i].to_dataframe()
    pred = pred.reset_index()
    pred = pred.rename(columns={"index": "date"})
    # pred = pred.drop(columns=['discount_pct'])
    
    for j in range(len(pred)):
        result.append({
            "sku":  group_id["sku"],
            "city": group_id["city"],
            "date": pred['date'][j],
            "mean": pred['demand'][j],
            "discount_pct": pred['discount_pct'][j],
            "is_event": pred['is_event'][j],
        })

result_df = pd.DataFrame(result)
result_df['mean'] = np.expm1(result_df['mean'])
result_df['mean'] = result_df['mean'].round().astype(int)
result_df['date'] = pd.to_datetime(result_df['date'])
sub = pd.read_csv("extracted_contents/data/forecast_submission_template.csv", parse_dates=["date"])
sub.drop(columns=['mean'], inplace=True)
sub = sub.merge(result_df, on=['sku', 'city', 'date'], how='left')
sub.to_csv("result.csv", index=False)
sub.drop(columns=['discount_pct', 'is_event'], inplace=True)
sub.to_csv("forecast_submission_template.csv", index=False)

100%|██████████| 1000/1000 [00:11<00:00, 88.44it/s]


# TSMixer

In [10]:

early_stopping_args = {
    "monitor": "val_loss",
    "patience": 10,
    "min_delta": 1e-3,
    "mode": "min",
}

pl_trainer_kwargs = {
    "max_epochs": 100,
    "accelerator": "gpu", 
    "callbacks": [EarlyStopping(**early_stopping_args)],
    "enable_progress_bar":True
}

common_model_args = {
    # "output_chunk_length": 28,
    # "input_chunk_length": 365,
    "output_chunk_length": 7,
    "input_chunk_length": 28,
    "pl_trainer_kwargs": pl_trainer_kwargs,
    "save_checkpoints": True,  # checkpoint to retrieve the best performing model state,
    "force_reset": True,
    "batch_size": 512,
    "random_state": 42,
}

encoders = {
    "position": {"past": ["relative"], "future": ["relative"]},
    "transformer": Scaler(),
}

best_hp = {
 'optimizer_kwargs': {'lr':0.0001},
#  'lr_scheduler_cls':ReduceLROnPlateau,   # <<<--- 사용할 스케줄러 클래스 직접 지정
#  'lr_scheduler_kwargs':{                 # <<<--- 스케줄러에 전달할 파라미터 딕셔너리
#      'monitor': 'val_loss',
#      'patience': 5,
#      'factor': 0.2, 
#      'threshold': 1e-4,
#  },   
 'loss_fn': EventWeightedMultiTaskLoss(),
 'use_reversible_instance_norm': True,
 'add_encoders':encoders,
 }

In [11]:
from darts.models import TSMixerModel

past_cov = None if not past_cov else past_cov

start = time.time()
## COMMENT TO LOAD PRE-TRAINED MODEL
fit_mixed_covariates_model(
    model_cls = TSMixerModel,
    common_model_args = common_model_args,
    specific_model_args = best_hp,
    model_name = 'TSMixer_model',
    past_cov = past_cov,
    future_cov = future_cov,
    train_series = train,
    # train_series=train_val,
    val_series = val,
    # val_series=None,
)
time_tide = time.time() - start

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX PRO 6000 Blackwell Workstation Edition') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                  | Type                       | Params | Mode 
------------------------------------------------------------------------------
0  | criterion             | EventWeightedMultiTaskLoss | 0      | train
1  | train_criterion       | EventWeightedMultiTaskLoss | 0      | train
2  | val_criterion         | EventWeightedMultiTaskLoss | 0      | train
3  | train_metrics         | MetricCollection           | 0      | train
4  | va

Epoch 21: 100%|██████████| 2788/2788 [02:07<00:00, 21.94it/s, train_loss=1.560, val_loss=4.730]


# N-HiTS

In [10]:
early_stopping_args = {
    "monitor": "val_loss",
    "patience": 10,
    "min_delta": 1e-3,
    "mode": "min",
}

pl_trainer_kwargs = {
    "max_epochs": 100,
    "accelerator": "gpu", 
    "callbacks": [EarlyStopping(**early_stopping_args)],
    "enable_progress_bar":True
}

common_model_args = {
    "output_chunk_length": 28,
    "input_chunk_length": 365,
    "pl_trainer_kwargs": pl_trainer_kwargs,
    "save_checkpoints": True,  # checkpoint to retrieve the best performing model state,
    "force_reset": True,
    "batch_size": 512,
    "random_state": 42,
    # --------------------------------------------------------------------------
    # N-HiTS 아키텍처 파라미터 
    # --------------------------------------------------------------------------
    "num_stacks": 3,              # 3개의 스택으로 잔차(residual)를 반복 학습 (표준값)
    "num_blocks": 3,              # 각 스택 당 3개의 블록 사용 (표준값)
    "num_layers": 4,              # 각 블록 내 4개의 MLP 레이어로 충분한 복잡성 부여
    "layer_widths": 512,          # MLP 레이어의 너비. 모델의 표현력(capacity)을 결정
    "dropout": 0.1,               # 과적합 방지를 위한 드롭아웃 (표준값)
    "activation": "ReLU",         # 활성화 함수
}

encoders = {
    "position": {"past": ["relative"], "future": ["relative"]},
    "transformer": Scaler(),
}

best_hp = {
 'optimizer_kwargs': {'lr':0.0001},
 'loss_fn': EventWeightedMultiTaskLoss(),
 'use_reversible_instance_norm': True,
 'add_encoders':encoders,
 }

# --- 모델 생성 예시 ---
# from darts.models import NHiTSModel
#
# model_nhits = NHiTSModel(
#     output_dim=3, # 타겟 변수 개수 (demand, discount_pct, is_event)
#     **n_hits_params
# )

In [12]:
past_cov = None if not past_cov else past_cov

start = time.time()
## COMMENT TO LOAD PRE-TRAINED MODEL
fit_mixed_covariates_model(
    model_cls = NHiTSModel,
    common_model_args = common_model_args,
    specific_model_args = best_hp,
    model_name = 'NHiTS_model',
    past_cov = past_cov,
    future_cov = None,
    train_series = train,
    # train_series=train_val,
    val_series = val,
    # val_series=None,
)
nhits_tide = time.time() - start

Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX PRO 6000 Blackwell Workstation Edition') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                       | Params | Mode 
-----------------------------------------------------------------------
0 | criterion       | EventWeightedMultiTaskLoss | 0      | train
1 | train_criterion | EventWeightedMultiTaskLoss | 0      | train
2 | val_criterion   | EventWeightedMultiT

Epoch 24:   1%|          | 13/2088 [00:02<06:09,  5.62it/s, train_loss=5.680, val_loss=4.340]    


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

# TFT

In [9]:
# PyTorch Lightning Trainer arguments
early_stopping_args = {
    "monitor": "val_loss",
    "patience": 50,
    "min_delta": 1e-3,
    "mode": "min",
}

pl_trainer_kwargs = {
    "max_epochs": 200,
    "accelerator": "gpu", # uncomment for gpu use
    "callbacks": [EarlyStopping(**early_stopping_args)],
    "enable_progress_bar":True
}

common_model_args = {
    "output_chunk_length": 7,
    "input_chunk_length": 84,
    "pl_trainer_kwargs": pl_trainer_kwargs,
    "save_checkpoints": True,  # checkpoint to retrieve the best performing model state,
    "force_reset": True,
    "batch_size": 128,
    "random_state": 42,
}

In [10]:
encoders = {
    "position": {"past": ["relative"], "future": ["relative"]},
    "transformer": Scaler(),
}

best_hp = {
 'optimizer_kwargs': {'lr':0.0001},
 'loss_fn': MultiTaskLossModule(),
 'use_reversible_instance_norm': True,
 'add_encoders':encoders,
 }

In [11]:
past_cov = None if not past_cov else past_cov

start = time.time()
## COMMENT TO LOAD PRE-TRAINED MODEL
fit_mixed_covariates_model(
    model_cls = TFTModel,
    common_model_args = common_model_args,
    specific_model_args = best_hp,
    model_name = 'TFT_model',
    past_cov = past_cov,
    future_cov = future_cov,
    train_series = train,
    val_series = val,
)
time_tft = time.time() - start

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX PRO 6000 Blackwell Workstation Edition') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | criterion                         | MultiTaskLossModule              | 0      | train
1  | train_criterion                   | MultiTaskLossModule              | 0      | train
2  | val_criterion                     | MultiTaskLossModule              | 0    

Epoch 4:  29%|██▉       | 3101/10711 [33:24<1:21:57,  1.55it/s, train_loss=0.248, val_loss=0.261] 


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [12]:
best_tft = TFTModel.load_from_checkpoint(model_name='TFT_model', best=True)
preds_tft = best_tft.predict(
                    series            = train_val,
                    past_covariates   = past_cov,
                    future_covariates = future_cov,
                    n                 = test[0].n_timesteps
                )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 8/8 [07:37<00:00,  0.02it/s]


In [13]:
from tqdm import tqdm

group_cols = ["sku","city"]  # 예시
groups_df = (
    dataset
    .loc[:, group_cols]
    .drop_duplicates()
    .sort_values(by=group_cols)   # from_group_dataframe 도 내부적으로 정렬하므로
    .reset_index(drop=True)
)

result = []
for i in tqdm(range(len(groups_df))):
    group_id = groups_df.iloc[i]
    
    pred = preds_tft[i].to_dataframe()
    pred = pred.reset_index()
    pred = pred.rename(columns={"index": "date"})
    pred = pred.drop(columns=['discount_pct'])
    # pred['demand'] = np.exp(pred['demand'])
    
    for j in range(len(pred)):
        result.append({
            "sku":  group_id["sku"],
            "city": group_id["city"],
            "date": pred['date'][j],
            "mean": pred['demand'][j],
        })

result_df = pd.DataFrame(result)
result_df['mean'] = np.expm1(result_df['mean'])
result_df['mean'] = result_df['mean'].round().astype(int)
result_df['date'] = pd.to_datetime(result_df['date'])
sub = pd.read_csv("extracted_contents/data/forecast_submission_template.csv", parse_dates=["date"])
sub.drop(columns=['mean'], inplace=True)
sub = sub.merge(result_df, on=['sku', 'city', 'date'], how='left')
sub.to_csv("forecast_submission_template.csv", index=False)

100%|██████████| 1000/1000 [00:08<00:00, 121.48it/s]
