## Model training and tuning for the consumer discretionary sector

In [27]:
import pandas as pd
import numpy as np

import time

from darts import TimeSeries, concatenate
from darts.utils.model_selection import train_test_split
from darts.models.forecasting.xgboost import XGBModel
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics.metrics import mape, mae
from darts.models.forecasting.prophet_model import Prophet
from darts.models.forecasting.sf_auto_theta import AutoTheta
from darts.models.forecasting.linear_regression_model import LinearRegressionModel
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.dataprocessing.transformers.boxcox import BoxCox
from darts.models.forecasting.sf_auto_arima import AutoARIMA

In [None]:
df = pd.read_csv('../../data/processed/sp500_all_companies_cleaned_with_sector.csv')
df['date'] = pd.to_datetime(df['date'])

df_discretionary = df[df['GICS Sector'] == 'Consumer Discretionary'].sort_values(by=['ticker', 'date']).reset_index(drop=True)

In [3]:
df['GICS Sector'].value_counts()

GICS Sector
Industrials               193459
Financials                190485
Information Technology    177149
Health Care               151348
Consumer Discretionary    130185
Consumer Staples           96649
Real Estate                79757
Utilities                  78786
Materials                  63394
Energy                     58523
Communication Services     58017
Name: count, dtype: int64

In [4]:
df_discretionary['ticker'].unique()

array(['ABNB', 'AMZN', 'APTV', 'AZO', 'BBY', 'BKNG', 'CCL', 'CMG', 'CZR',
       'DASH', 'DECK', 'DHI', 'DPZ', 'DRI', 'EBAY', 'EXPE', 'F', 'GM',
       'GPC', 'GRMN', 'HAS', 'HD', 'HLT', 'KMX', 'LEN', 'LKQ', 'LOW',
       'LULU', 'LVS', 'MAR', 'MCD', 'MGM', 'MHK', 'NCLH', 'NKE', 'NVR',
       'ORLY', 'PHM', 'POOL', 'RCL', 'RL', 'ROST', 'SBUX', 'TJX', 'TPR',
       'TSCO', 'TSLA', 'ULTA', 'WSM', 'WYNN', 'YUM'], dtype=object)

In [None]:
data = {}
static_covs = pd.DataFrame({'ticker': df_discretionary['ticker'].unique()})

for ticker, group in df_discretionary.groupby('ticker'):
    series = TimeSeries.from_dataframe(
        group,
        time_col='date',
        value_cols=['close', 'high', 'low', 'open', 'volume'],
        freq='B',
        static_covariates=pd.Series(ticker)
    )
    data[ticker] = series

In [6]:
# encode each ticker as a static covariate
transformer = StaticCovariatesTransformer()
ts = [d for d in data.values()]
transformer.fit(ts)

for ticker, series in data.items():
    data[ticker] = transformer.transform(series)

In [7]:
train = {}
test = {}

for ticker, series in data.items():
    train[ticker], test[ticker] = train_test_split(series, test_size=0.1)
    last_train = train[ticker][-1:]
    test_with_context = last_train.append(test[ticker]) # append last point of train to test as our forecast requires it
    test[ticker] = test_with_context

## Gradient Boosting

### Train a local model first

In [8]:
models = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = XGBModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models[ticker] = model

Training model for ABNB
Model for ABNB trained in 0.29 seconds
**************************************************
Training model for AMZN
Model for AMZN trained in 0.34 seconds
**************************************************
Training model for APTV
Model for APTV trained in 0.29 seconds
**************************************************
Training model for AZO
Model for AZO trained in 0.30 seconds
**************************************************
Training model for BBY
Model for BBY trained in 0.28 seconds
**************************************************
Training model for BKNG
Model for BKNG trained in 0.32 seconds
**************************************************
Training model for CCL
Model for CCL trained in 0.32 seconds
**************************************************
Training model for CMG
Model for CMG trained in 0.32 seconds
**************************************************
Training model for CZR
Model for CZR trained in 0.36 seconds
***********************************

In [9]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes[ticker] = mape_
    maes[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ABNB, MAPE: 12.26, MAE: 15.15
**************************************************
Ticker: AMZN, MAPE: 11.26, MAE: 23.57
**************************************************
Ticker: APTV, MAPE: 5.56, MAE: 3.52
**************************************************
Ticker: AZO, MAPE: 6.23, MAE: 209.84
**************************************************
Ticker: BBY, MAPE: 12.55, MAE: 10.84
**************************************************
Ticker: BKNG, MAPE: 18.15, MAE: 845.43
**************************************************
Ticker: CCL, MAPE: 6.10, MAE: 1.29
**************************************************
Ticker: CMG, MAPE: 7.27, MAE: 3.92
**************************************************
Ticker: CZR, MAPE: 6.23, MAE: 2.18
**************************************************
Ticker: DASH, MAPE: 5.74, MAE: 10.31
**************************************************
Ticker: DECK, MAPE: 14.57, MAE: 24.28
**************************************************
Ticker: DHI, MAPE: 12.23, MAE: 19.

In [10]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(8.964565327142548, 53.00893140968817)

### Global model

In [11]:
print(f"Training global model")
start = time.time()
model = XGBModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    },
    use_static_covariates=True,
    n_estimators=1000,
    eta=0.01,
    max_depth=8,
    subsample=0.8
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 16.18 seconds
**************************************************


In [12]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ABNB, MAPE: 4.15, MAE: 5.35
**************************************************
Global model for AMZN, MAPE: 2.78, MAE: 5.59
**************************************************
Global model for APTV, MAPE: 5.92, MAE: 3.77
**************************************************
Global model for AZO, MAPE: 5.55, MAE: 182.32
**************************************************
Global model for BBY, MAPE: 4.51, MAE: 3.60
**************************************************
Global model for BKNG, MAPE: 23.82, MAE: 1108.36
**************************************************
Global model for CCL, MAPE: 3.60, MAE: 0.71
**************************************************
Global model for CMG, MAPE: 4.59, MAE: 2.53
**************************************************
Global model for CZR, MAPE: 5.88, MAE: 2.00
**************************************************
Global model for DASH, MAPE: 2.72, MAE: 5.08
**************************************************
Global model for DECK, MAPE: 4.13, MAE:

In [13]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(4.411186524068468, 50.137823727677194)

## Prophet model

In [14]:
models_prophet = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = Prophet(
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year']  # automatically generate future covariates from time series
            }
        },
        seasonality_mode='multiplicative',
    )

    model.fit(
        train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_prophet[ticker] = model

Training model for ABNB


21:35:01 - cmdstanpy - INFO - Chain [1] start processing
21:35:03 - cmdstanpy - INFO - Chain [1] done processing


Model for ABNB trained in 1.83 seconds
**************************************************
Training model for AMZN


21:35:03 - cmdstanpy - INFO - Chain [1] start processing
21:35:06 - cmdstanpy - INFO - Chain [1] done processing


Model for AMZN trained in 2.84 seconds
**************************************************
Training model for APTV


21:35:06 - cmdstanpy - INFO - Chain [1] start processing
21:35:08 - cmdstanpy - INFO - Chain [1] done processing


Model for APTV trained in 2.66 seconds
**************************************************
Training model for AZO


21:35:09 - cmdstanpy - INFO - Chain [1] start processing
21:35:11 - cmdstanpy - INFO - Chain [1] done processing


Model for AZO trained in 2.25 seconds
**************************************************
Training model for BBY


21:35:11 - cmdstanpy - INFO - Chain [1] start processing
21:35:13 - cmdstanpy - INFO - Chain [1] done processing


Model for BBY trained in 2.66 seconds
**************************************************
Training model for BKNG


21:35:14 - cmdstanpy - INFO - Chain [1] start processing
21:35:15 - cmdstanpy - INFO - Chain [1] done processing


Model for BKNG trained in 1.85 seconds
**************************************************
Training model for CCL


21:35:16 - cmdstanpy - INFO - Chain [1] start processing
21:35:18 - cmdstanpy - INFO - Chain [1] done processing


Model for CCL trained in 2.64 seconds
**************************************************
Training model for CMG


21:35:18 - cmdstanpy - INFO - Chain [1] start processing
21:35:21 - cmdstanpy - INFO - Chain [1] done processing


Model for CMG trained in 3.13 seconds
**************************************************
Training model for CZR


21:35:21 - cmdstanpy - INFO - Chain [1] start processing
21:35:23 - cmdstanpy - INFO - Chain [1] done processing


Model for CZR trained in 2.01 seconds
**************************************************
Training model for DASH


21:35:23 - cmdstanpy - INFO - Chain [1] start processing
21:35:24 - cmdstanpy - INFO - Chain [1] done processing


Model for DASH trained in 0.88 seconds
**************************************************
Training model for DECK


21:35:24 - cmdstanpy - INFO - Chain [1] start processing
21:35:26 - cmdstanpy - INFO - Chain [1] done processing


Model for DECK trained in 2.58 seconds
**************************************************
Training model for DHI


21:35:27 - cmdstanpy - INFO - Chain [1] start processing
21:35:30 - cmdstanpy - INFO - Chain [1] done processing


Model for DHI trained in 3.18 seconds
**************************************************
Training model for DPZ


21:35:30 - cmdstanpy - INFO - Chain [1] start processing
21:35:32 - cmdstanpy - INFO - Chain [1] done processing


Model for DPZ trained in 2.56 seconds
**************************************************
Training model for DRI


21:35:33 - cmdstanpy - INFO - Chain [1] start processing
21:35:34 - cmdstanpy - INFO - Chain [1] done processing


Model for DRI trained in 2.15 seconds
**************************************************
Training model for EBAY


21:35:35 - cmdstanpy - INFO - Chain [1] start processing
21:35:37 - cmdstanpy - INFO - Chain [1] done processing


Model for EBAY trained in 2.72 seconds
**************************************************
Training model for EXPE


21:35:37 - cmdstanpy - INFO - Chain [1] start processing
21:35:40 - cmdstanpy - INFO - Chain [1] done processing


Model for EXPE trained in 3.33 seconds
**************************************************
Training model for F


21:35:41 - cmdstanpy - INFO - Chain [1] start processing
21:35:44 - cmdstanpy - INFO - Chain [1] done processing


Model for F trained in 3.14 seconds
**************************************************
Training model for GM


21:35:44 - cmdstanpy - INFO - Chain [1] start processing
21:35:47 - cmdstanpy - INFO - Chain [1] done processing


Model for GM trained in 3.33 seconds
**************************************************
Training model for GPC


21:35:47 - cmdstanpy - INFO - Chain [1] start processing
21:35:50 - cmdstanpy - INFO - Chain [1] done processing


Model for GPC trained in 3.13 seconds
**************************************************
Training model for GRMN


21:35:50 - cmdstanpy - INFO - Chain [1] start processing
21:35:53 - cmdstanpy - INFO - Chain [1] done processing


Model for GRMN trained in 3.46 seconds
**************************************************
Training model for HAS


21:35:54 - cmdstanpy - INFO - Chain [1] start processing
21:35:56 - cmdstanpy - INFO - Chain [1] done processing


Model for HAS trained in 2.51 seconds
**************************************************
Training model for HD


21:35:56 - cmdstanpy - INFO - Chain [1] start processing
21:35:59 - cmdstanpy - INFO - Chain [1] done processing


Model for HD trained in 3.17 seconds
**************************************************
Training model for HLT


21:36:00 - cmdstanpy - INFO - Chain [1] start processing
21:36:02 - cmdstanpy - INFO - Chain [1] done processing


Model for HLT trained in 2.99 seconds
**************************************************
Training model for KMX


21:36:02 - cmdstanpy - INFO - Chain [1] start processing
21:36:05 - cmdstanpy - INFO - Chain [1] done processing


Model for KMX trained in 3.05 seconds
**************************************************
Training model for LEN


21:36:06 - cmdstanpy - INFO - Chain [1] start processing
21:36:07 - cmdstanpy - INFO - Chain [1] done processing


Model for LEN trained in 1.99 seconds
**************************************************
Training model for LKQ


21:36:07 - cmdstanpy - INFO - Chain [1] start processing
21:36:10 - cmdstanpy - INFO - Chain [1] done processing


Model for LKQ trained in 3.20 seconds
**************************************************
Training model for LOW


21:36:11 - cmdstanpy - INFO - Chain [1] start processing
21:36:14 - cmdstanpy - INFO - Chain [1] done processing


Model for LOW trained in 3.15 seconds
**************************************************
Training model for LULU


21:36:14 - cmdstanpy - INFO - Chain [1] start processing
21:36:16 - cmdstanpy - INFO - Chain [1] done processing


Model for LULU trained in 2.38 seconds
**************************************************
Training model for LVS


21:36:16 - cmdstanpy - INFO - Chain [1] start processing
21:36:19 - cmdstanpy - INFO - Chain [1] done processing


Model for LVS trained in 3.10 seconds
**************************************************
Training model for MAR


21:36:19 - cmdstanpy - INFO - Chain [1] start processing
21:36:21 - cmdstanpy - INFO - Chain [1] done processing


Model for MAR trained in 1.98 seconds
**************************************************
Training model for MCD


21:36:21 - cmdstanpy - INFO - Chain [1] start processing
21:36:24 - cmdstanpy - INFO - Chain [1] done processing


Model for MCD trained in 2.99 seconds
**************************************************
Training model for MGM


21:36:24 - cmdstanpy - INFO - Chain [1] start processing
21:36:27 - cmdstanpy - INFO - Chain [1] done processing


Model for MGM trained in 2.90 seconds
**************************************************
Training model for MHK


21:36:27 - cmdstanpy - INFO - Chain [1] start processing
21:36:29 - cmdstanpy - INFO - Chain [1] done processing


Model for MHK trained in 2.46 seconds
**************************************************
Training model for NCLH


21:36:30 - cmdstanpy - INFO - Chain [1] start processing
21:36:33 - cmdstanpy - INFO - Chain [1] done processing


Model for NCLH trained in 3.16 seconds
**************************************************
Training model for NKE


21:36:33 - cmdstanpy - INFO - Chain [1] start processing
21:36:36 - cmdstanpy - INFO - Chain [1] done processing


Model for NKE trained in 3.60 seconds
**************************************************
Training model for NVR


21:36:36 - cmdstanpy - INFO - Chain [1] start processing
21:36:39 - cmdstanpy - INFO - Chain [1] done processing


Model for NVR trained in 3.17 seconds
**************************************************
Training model for ORLY


21:36:40 - cmdstanpy - INFO - Chain [1] start processing
21:36:42 - cmdstanpy - INFO - Chain [1] done processing


Model for ORLY trained in 3.05 seconds
**************************************************
Training model for PHM


21:36:43 - cmdstanpy - INFO - Chain [1] start processing
21:36:44 - cmdstanpy - INFO - Chain [1] done processing


Model for PHM trained in 2.14 seconds
**************************************************
Training model for POOL


21:36:45 - cmdstanpy - INFO - Chain [1] start processing
21:36:47 - cmdstanpy - INFO - Chain [1] done processing


Model for POOL trained in 2.09 seconds
**************************************************
Training model for RCL


21:36:47 - cmdstanpy - INFO - Chain [1] start processing
21:36:50 - cmdstanpy - INFO - Chain [1] done processing


Model for RCL trained in 3.29 seconds
**************************************************
Training model for RL


21:36:50 - cmdstanpy - INFO - Chain [1] start processing
21:36:53 - cmdstanpy - INFO - Chain [1] done processing


Model for RL trained in 3.04 seconds
**************************************************
Training model for ROST


21:36:53 - cmdstanpy - INFO - Chain [1] start processing
21:36:56 - cmdstanpy - INFO - Chain [1] done processing


Model for ROST trained in 2.65 seconds
**************************************************
Training model for SBUX


21:36:56 - cmdstanpy - INFO - Chain [1] start processing
21:36:59 - cmdstanpy - INFO - Chain [1] done processing


Model for SBUX trained in 3.06 seconds
**************************************************
Training model for TJX


21:36:59 - cmdstanpy - INFO - Chain [1] start processing
21:37:02 - cmdstanpy - INFO - Chain [1] done processing


Model for TJX trained in 3.04 seconds
**************************************************
Training model for TPR


21:37:02 - cmdstanpy - INFO - Chain [1] start processing
21:37:05 - cmdstanpy - INFO - Chain [1] done processing


Model for TPR trained in 3.03 seconds
**************************************************
Training model for TSCO


21:37:05 - cmdstanpy - INFO - Chain [1] start processing
21:37:08 - cmdstanpy - INFO - Chain [1] done processing


Model for TSCO trained in 3.11 seconds
**************************************************
Training model for TSLA


21:37:08 - cmdstanpy - INFO - Chain [1] start processing
21:37:10 - cmdstanpy - INFO - Chain [1] done processing


Model for TSLA trained in 2.25 seconds
**************************************************
Training model for ULTA


21:37:10 - cmdstanpy - INFO - Chain [1] start processing
21:37:13 - cmdstanpy - INFO - Chain [1] done processing


Model for ULTA trained in 2.69 seconds
**************************************************
Training model for WSM


21:37:13 - cmdstanpy - INFO - Chain [1] start processing
21:37:14 - cmdstanpy - INFO - Chain [1] done processing


Model for WSM trained in 1.70 seconds
**************************************************
Training model for WYNN


21:37:15 - cmdstanpy - INFO - Chain [1] start processing
21:37:18 - cmdstanpy - INFO - Chain [1] done processing


Model for WYNN trained in 3.19 seconds
**************************************************
Training model for YUM


21:37:18 - cmdstanpy - INFO - Chain [1] start processing
21:37:21 - cmdstanpy - INFO - Chain [1] done processing


Model for YUM trained in 3.26 seconds
**************************************************


In [15]:
mapes_prophet = {}
maes_prophet = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_prophet[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_prophet[ticker] = mape_
    maes_prophet[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ABNB, MAPE: 7.50, MAE: 9.32
**************************************************
Ticker: AMZN, MAPE: 9.25, MAE: 18.52
**************************************************
Ticker: APTV, MAPE: 17.36, MAE: 10.62
**************************************************
Ticker: AZO, MAPE: 8.81, MAE: 290.54
**************************************************
Ticker: BBY, MAPE: 15.48, MAE: 13.27
**************************************************
Ticker: BKNG, MAPE: 8.03, MAE: 369.48
**************************************************
Ticker: CCL, MAPE: 15.54, MAE: 3.38
**************************************************
Ticker: CMG, MAPE: 14.00, MAE: 7.51
**************************************************
Ticker: CZR, MAPE: 29.23, MAE: 9.44
**************************************************
Ticker: DASH, MAPE: 17.92, MAE: 33.40
**************************************************
Ticker: DECK, MAPE: 23.14, MAE: 32.26
**************************************************
Ticker: DHI, MAPE: 22.56, MAE: 3

In [16]:
np.mean(list(mapes_prophet.values())), np.mean(list(maes_prophet.values()))

(21.753373068911202, 67.2655245439751)

## Theta model

In [17]:
models_theta = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoTheta(
        season_length=30
    )

    model.fit(
        series=train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_theta[ticker] = model

Training model for ABNB
Model for ABNB trained in 0.21 seconds
**************************************************
Training model for AMZN
Model for AMZN trained in 1.05 seconds
**************************************************
Training model for APTV
Model for APTV trained in 0.90 seconds
**************************************************
Training model for AZO
Model for AZO trained in 1.27 seconds
**************************************************
Training model for BBY
Model for BBY trained in 0.81 seconds
**************************************************
Training model for BKNG
Model for BKNG trained in 0.74 seconds
**************************************************
Training model for CCL
Model for CCL trained in 0.62 seconds
**************************************************
Training model for CMG
Model for CMG trained in 1.06 seconds
**************************************************
Training model for CZR
Model for CZR trained in 0.71 seconds
***********************************

In [18]:
mapes_theta = {}
maes_theta = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_theta[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_theta[ticker] = mape_
    maes_theta[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ABNB, MAPE: 8.42, MAE: 10.52
**************************************************
Ticker: AMZN, MAPE: 6.86, MAE: 14.03
**************************************************
Ticker: APTV, MAPE: 22.95, MAE: 13.97
**************************************************
Ticker: AZO, MAPE: 5.15, MAE: 172.76
**************************************************
Ticker: BBY, MAPE: 14.63, MAE: 12.49
**************************************************
Ticker: BKNG, MAPE: 19.32, MAE: 896.22
**************************************************
Ticker: CCL, MAPE: 32.07, MAE: 7.11
**************************************************
Ticker: CMG, MAPE: 15.24, MAE: 8.17
**************************************************
Ticker: CZR, MAPE: 13.13, MAE: 4.14
**************************************************
Ticker: DASH, MAPE: 5.08, MAE: 9.63
**************************************************
Ticker: DECK, MAPE: 17.03, MAE: 27.98
**************************************************
Ticker: DHI, MAPE: 13.41, MAE: 2

In [19]:
np.mean(list(mapes_theta.values())), np.mean(list(maes_theta.values()))

(13.825863013585836, 58.21607323540278)

## Linear Regression

### Local model

In [20]:
models_lr = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = LinearRegressionModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_lr[ticker] = model

Training model for ABNB
Model for ABNB trained in 0.08 seconds
**************************************************
Training model for AMZN
Model for AMZN trained in 0.12 seconds
**************************************************
Training model for APTV
Model for APTV trained in 0.12 seconds
**************************************************
Training model for AZO
Model for AZO trained in 0.12 seconds
**************************************************
Training model for BBY
Model for BBY trained in 0.12 seconds
**************************************************
Training model for BKNG
Model for BKNG trained in 0.17 seconds
**************************************************
Training model for CCL
Model for CCL trained in 0.12 seconds
**************************************************
Training model for CMG
Model for CMG trained in 0.12 seconds
**************************************************
Training model for CZR
Model for CZR trained in 0.12 seconds
***********************************

In [21]:
mapes_lr = {}
maes_lr = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_lr[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_lr[ticker] = mape_
    maes_lr[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ABNB, MAPE: 7.43, MAE: 9.48
**************************************************
Ticker: AMZN, MAPE: 2.38, MAE: 4.64
**************************************************
Ticker: APTV, MAPE: 4.35, MAE: 2.70
**************************************************
Ticker: AZO, MAPE: 2.87, MAE: 94.43
**************************************************
Ticker: BBY, MAPE: 142861.87, MAE: 97222.34
**************************************************
Ticker: BKNG, MAPE: 16.05, MAE: 738.83
**************************************************
Ticker: CCL, MAPE: 3.65, MAE: 0.72
**************************************************
Ticker: CMG, MAPE: 4.55, MAE: 2.50
**************************************************
Ticker: CZR, MAPE: 273692.53, MAE: 73975.93
**************************************************
Ticker: DASH, MAPE: 3.67, MAE: 6.89
**************************************************
Ticker: DECK, MAPE: 223808992068.49, MAE: 246600283839.76
**************************************************
Tick

In [22]:
np.mean(list(mapes_lr.values())), np.mean(list(maes_lr.values()))

(4389549820.414343, 4835941443.714583)

### Global model

In [23]:
print(f"Training global model")
start = time.time()
model = LinearRegressionModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    }
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 6.17 seconds
**************************************************


In [24]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ABNB, MAPE: 3.31, MAE: 4.30
**************************************************
Global model for AMZN, MAPE: 2.75, MAE: 5.35
**************************************************
Global model for APTV, MAPE: 3.63, MAE: 2.27
**************************************************
Global model for AZO, MAPE: 1.67, MAE: 53.72
**************************************************
Global model for BBY, MAPE: 3.60, MAE: 2.90
**************************************************
Global model for BKNG, MAPE: 2.77, MAE: 120.58
**************************************************
Global model for CCL, MAPE: 4.34, MAE: 0.82
**************************************************
Global model for CMG, MAPE: 2.82, MAE: 1.58
**************************************************
Global model for CZR, MAPE: 4.62, MAE: 1.61
**************************************************
Global model for DASH, MAPE: 3.12, MAE: 5.79
**************************************************
Global model for DECK, MAPE: 4.63, MAE: 7.

In [25]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(3.2243200478544516, 11.6941244901704)

## ARIMA

In [32]:
models_arima = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoARIMA()

    model.fit(
        series=train[ticker]['close'],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_arima[ticker] = model

Training model for ABNB
Model for ABNB trained in 0.13 seconds
**************************************************
Training model for AMZN
Model for AMZN trained in 0.21 seconds
**************************************************
Training model for APTV
Model for APTV trained in 0.23 seconds
**************************************************
Training model for AZO
Model for AZO trained in 0.21 seconds
**************************************************
Training model for BBY
Model for BBY trained in 0.24 seconds
**************************************************
Training model for BKNG
Model for BKNG trained in 0.19 seconds
**************************************************
Training model for CCL
Model for CCL trained in 0.21 seconds
**************************************************
Training model for CMG
Model for CMG trained in 0.39 seconds
**************************************************
Training model for CZR
Model for CZR trained in 1.46 seconds
***********************************

In [33]:
mapes_arima = {}
maes_arima = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_arima[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_arima[ticker] = mape_
    maes_arima[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ABNB, MAPE: 9.66, MAE: 12.02
**************************************************
Ticker: AMZN, MAPE: 6.86, MAE: 13.93
**************************************************
Ticker: APTV, MAPE: 20.94, MAE: 12.74
**************************************************
Ticker: AZO, MAPE: 5.16, MAE: 173.28
**************************************************
Ticker: BBY, MAPE: 16.77, MAE: 14.50
**************************************************
Ticker: BKNG, MAPE: 20.15, MAE: 934.94
**************************************************
Ticker: CCL, MAPE: 25.71, MAE: 5.77
**************************************************
Ticker: CMG, MAPE: 29.17, MAE: 15.82
**************************************************
Ticker: CZR, MAPE: 11.60, MAE: 3.86
**************************************************
Ticker: DASH, MAPE: 5.04, MAE: 9.52
**************************************************
Ticker: DECK, MAPE: 17.47, MAE: 25.43
**************************************************
Ticker: DHI, MAPE: 13.66, MAE: 

In [34]:
np.mean(list(mapes_arima.values())), np.mean(list(maes_arima.values()))

(13.930011477783392, 59.01241972004467)