In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import XGBModel
from darts import concatenate

In [None]:
train = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [None]:
train.head()

In [None]:
unique_combinations = train[['store_nbr', 'family']].drop_duplicates()

In [None]:
predictions = {}

In [None]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    print(f"\nTraining model for store {store_nbr} and family {family}...")
    
    df_subset = train[(train['store_nbr'] == store_nbr) & (train['family'] == family)]
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

    dcoilwtico_ts = TimeSeries.from_dataframe(df_subset, value_cols=["dcoilwtico"])
    onpromotion_ts = TimeSeries.from_dataframe(df_subset, value_cols=["onpromotion"])
    typeholiday_ts = TimeSeries.from_dataframe(df_subset, value_cols=["typeholiday"])
    day_of_week_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day_of_week"])
    day_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day"])
    month_ts = TimeSeries.from_dataframe(df_subset, value_cols=["month"])
    year_ts = TimeSeries.from_dataframe(df_subset, value_cols=["year"])
    
    past_cov = concatenate([dcoilwtico_ts,
                          onpromotion_ts,
                          typeholiday_ts,
                          day_of_week_ts,
                          day_ts,
                          month_ts,
                          year_ts], axis=1)

    past_covariates_lags = list(range(-16, 0))

    model = XGBModel(lags=2, lags_past_covariates=past_covariates_lags, output_chunk_length=16)
    model.fit(ts, past_covariates=past_cov)

    predictions[(store_nbr, family)] = model.predict(n=16)
    predictions[(store_nbr, family)] = predictions[(store_nbr, family)].pd_series().tolist()
    predictions[(store_nbr, family)] = np.maximum(predictions[(store_nbr, family)], 0)

In [None]:
test = pd.read_csv("../../Data/Kaggle/StoreSales/test_darts_univariate.csv", parse_dates=["date"])

In [None]:
test.head()

In [None]:
test.tail()

In [None]:
test_sales = []

In [None]:
for i in range(16):
    for (store_nbr, family), prediction in predictions.items():
        print(f"Store {store_nbr}, Family {family} predictions: {prediction[i]}")
        test_sales.append(prediction[i])

In [None]:
test['sales'] = test_sales

In [None]:
test.drop(columns=['date', 'store_nbr', 'family'], inplace=True)

In [None]:
test.to_csv('submission.csv', index=False)