In [48]:
import warnings
import numpy as np
import pandas as pd

from tqdm import tqdm

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler, StaticCovariatesTransformer, InvertibleMapper, MissingValuesFiller
from darts.dataprocessing import Pipeline
from darts.models import ExponentialSmoothing
from darts.metrics import rmsle

from sklearn.preprocessing import OneHotEncoder

warnings.filterwarnings('ignore')

In [2]:
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

In [3]:
# going to build an exponential smoothing model for each store/family category
# there's huge variation across them, not every store sells everything, etc.

ALL_STORES = set(train.store_nbr)
ALL_FAMILIES = set(train.family)

In [4]:
# construct TimeSeries objects for each family
# store is a static covariate, so each series has length 54 (number of stores)

family_series = {}

for family in ALL_FAMILIES:
    train_fam = train.query(f"family == '{family}'")
    
    # creates one time series per store_nbr for the family
    family_ts_list = TimeSeries.from_group_dataframe(
        train_fam,
        time_col="date",
        group_cols=["store_nbr"],
        value_cols="sales",
        freq="D"
    )
    
    for series in family_ts_list:
        series = series.astype(np.float32)
    
    # order by store
    family_ts_list = sorted(family_ts_list, key=lambda ts: int(ts.static_covariates_values()[0, 0]))
    
    family_series[family] = family_ts_list

In [39]:
# make pipelines
# onehot encode store, log transform target var

family_pipelines = {}
family_ts_transformed = {}

for key, series in family_series.items():
    train_filler = MissingValuesFiller(verbose=False, n_jobs=-1, name="Fill NAs")
    cov_trans = StaticCovariatesTransformer(transformer_cat=OneHotEncoder, name="encoder", verbose=False)
    log_trans = InvertibleMapper(np.log1p, np.expm1, name="log-transform", verbose=False)
    scaler = Scaler(name="Scaling", verbose=False)
    
    pipeline = Pipeline([train_filler, cov_trans, log_trans, scaler])
    transformed = pipeline.fit_transform(series)

    family_pipelines[key] = pipeline
    family_ts_transformed[key] = transformed

In [40]:
def es_model_builder(training_list):
    models = []
    for data in training_list:
        model = ExponentialSmoothing()
        model.fit(data)
        models.append(model)
    return models


def es_forecaster(models, n=16):
    preds = []
    for model in models:
        pred = model.predict(n=n)
        preds.append(pred)
    return preds


es_family_models = {}
es_family_forecasts = {}

for family in ALL_FAMILIES:
    
    transformed = family_ts_transformed[family]
    training_data = [series[:-16] for series in transformed]
    
    es_models = es_model_builder(training_data)
    forecasts = es_forecaster(es_models)
    
    es_family_models[family] = es_models
    
    # transform back
    pipeline = family_pipelines[family]
    es_family_forecasts[family] = pipeline.inverse_transform(forecasts, partial=True)
    
    # zero forecasting
    # ...

In [55]:
flatten = lambda lst: [k for j in lst for k in j]

true = flatten(family_series.values())
forecasted = flatten(es_family_forecasts.values())

score = rmsle(actual_series=true, pred_series=forecasted, inter_reduction=np.mean)

In [56]:
score

0.3747182766566481