In [1]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import XGBModel
from darts import concatenate

In [2]:
train = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [3]:
train.head()

Unnamed: 0,date,store_nbr,family,sales,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
0,2013-01-01,1,0,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
1,2013-01-01,1,1,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
2,2013-01-01,1,2,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
3,2013-01-01,1,3,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
4,2013-01-01,1,4,0.0,0,3,93.14,18,12,3,13,2,1,1,2013


In [4]:
unique_combinations = train[['store_nbr', 'family']].drop_duplicates()

In [5]:
predictions = {}

In [6]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    print(f"\nTraining model for store {store_nbr} and family {family}...")
    
    df_subset = train[(train['store_nbr'] == store_nbr) & (train['family'] == family)]
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

    dcoilwtico_ts = TimeSeries.from_dataframe(df_subset, value_cols=["dcoilwtico"])
    onpromotion_ts = TimeSeries.from_dataframe(df_subset, value_cols=["onpromotion"])
    typeholiday_ts = TimeSeries.from_dataframe(df_subset, value_cols=["typeholiday"])
    day_of_week_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day_of_week"])
    day_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day"])
    month_ts = TimeSeries.from_dataframe(df_subset, value_cols=["month"])
    year_ts = TimeSeries.from_dataframe(df_subset, value_cols=["year"])
    
    past_cov = concatenate([dcoilwtico_ts,
                          onpromotion_ts,
                          typeholiday_ts,
                          day_of_week_ts,
                          day_ts,
                          month_ts,
                          year_ts], axis=1)

    past_covariates_lags = list(range(-16, 0))

    model = XGBModel(lags=2, lags_past_covariates=past_covariates_lags, output_chunk_length=16)
    model.fit(ts, past_covariates=past_cov)

    predictions[(store_nbr, family)] = model.predict(n=16)
    predictions[(store_nbr, family)] = predictions[(store_nbr, family)].pd_series().tolist()
    predictions[(store_nbr, family)] = np.maximum(predictions[(store_nbr, family)], 0)


Training model for store 1 and family 0...

Training model for store 1 and family 1...

Training model for store 1 and family 2...

Training model for store 1 and family 3...

Training model for store 1 and family 4...

Training model for store 1 and family 5...

Training model for store 1 and family 6...

Training model for store 1 and family 7...

Training model for store 1 and family 8...

Training model for store 1 and family 9...

Training model for store 1 and family 10...

Training model for store 1 and family 11...

Training model for store 1 and family 12...

Training model for store 1 and family 13...

Training model for store 1 and family 14...

Training model for store 1 and family 15...

Training model for store 1 and family 16...

Training model for store 1 and family 17...

Training model for store 1 and family 18...

Training model for store 1 and family 19...

Training model for store 1 and family 20...

Training model for store 1 and family 21...

Training model for 

In [7]:
test = pd.read_csv("../../Data/Kaggle/StoreSales/test_darts_univariate.csv", parse_dates=["date"])

In [8]:
test.head()

Unnamed: 0,id,date,store_nbr,family
0,3000888,2017-08-16,1,0
1,3000889,2017-08-16,1,1
2,3000890,2017-08-16,1,2
3,3000891,2017-08-16,1,3
4,3000892,2017-08-16,1,4


In [9]:
test.tail()

Unnamed: 0,id,date,store_nbr,family
28507,3029395,2017-08-31,9,28
28508,3029396,2017-08-31,9,29
28509,3029397,2017-08-31,9,30
28510,3029398,2017-08-31,9,31
28511,3029399,2017-08-31,9,32


In [10]:
test_sales = []

In [11]:
for i in range(16):
    for (store_nbr, family), prediction in predictions.items():
        print(f"Store {store_nbr}, Family {family} predictions: {prediction[i]}")
        test_sales.append(prediction[i])

Store 1, Family 0 predictions: 2.554008722305298
Store 1, Family 1 predictions: 1.6596815184570984e-16
Store 1, Family 2 predictions: 6.235189437866211
Store 1, Family 3 predictions: 2420.32861328125
Store 1, Family 4 predictions: 0.903712272644043
Store 1, Family 5 predictions: 452.0231018066406
Store 1, Family 6 predictions: 17.269485473632812
Store 1, Family 7 predictions: 903.3443603515625
Store 1, Family 8 predictions: 845.3331298828125
Store 1, Family 9 predictions: 131.3484649658203
Store 1, Family 10 predictions: 156.06529235839844
Store 1, Family 11 predictions: 151.03016662597656
Store 1, Family 12 predictions: 2887.842529296875
Store 1, Family 13 predictions: 37.444969177246094
Store 1, Family 14 predictions: 1.96244215965271
Store 1, Family 15 predictions: 35.19350051879883
Store 1, Family 16 predictions: 28.11985206604004
Store 1, Family 17 predictions: 1.2319570779800415
Store 1, Family 18 predictions: 191.60806274414062
Store 1, Family 19 predictions: 14.810450553894043


In [12]:
test['sales'] = test_sales

In [13]:
test.drop(columns=['date', 'store_nbr', 'family'], inplace=True)

In [14]:
test.to_csv('submission.csv', index=False)