## Model training and tuning for the consumer discretionary sector

In [27]:
import pandas as pd
import numpy as np

import time

from darts import TimeSeries, concatenate
from darts.utils.model_selection import train_test_split
from darts.models.forecasting.xgboost import XGBModel
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics.metrics import mape, mae
from darts.models.forecasting.prophet_model import Prophet
from darts.models.forecasting.sf_auto_theta import AutoTheta
from darts.models.forecasting.linear_regression_model import LinearRegressionModel
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.models.forecasting.sf_auto_arima import AutoARIMA
from darts.dataprocessing.transformers.boxcox import BoxCox

In [2]:
df = pd.read_csv('../../data/processed/sp500_all_companies_cleaned_with_sector.csv')
df['date'] = pd.to_datetime(df['date'])

df_staples = df[df['GICS Sector'] == 'Consumer Staples'].sort_values(by=['ticker', 'date']).reset_index(drop=True)

In [3]:
df['GICS Sector'].value_counts()

GICS Sector
Industrials               193459
Financials                190485
Information Technology    177149
Health Care               151348
Consumer Discretionary    130185
Consumer Staples           96649
Real Estate                79757
Utilities                  78786
Materials                  63394
Energy                     58523
Communication Services     58017
Name: count, dtype: int64

In [4]:
df_staples['ticker'].unique()

array(['ADM', 'BF-B', 'BG', 'CAG', 'CHD', 'CL', 'CLX', 'COST', 'CPB',
       'DG', 'DLTR', 'EL', 'GIS', 'HRL', 'HSY', 'K', 'KDP', 'KHC', 'KMB',
       'KO', 'KR', 'KVUE', 'LW', 'MDLZ', 'MKC', 'MNST', 'MO', 'PEP', 'PG',
       'PM', 'SJM', 'STZ', 'SYY', 'TAP', 'TGT', 'TSN', 'WBA', 'WMT'],
      dtype=object)

In [5]:
data = {}
static_covs = pd.DataFrame({'ticker': df_staples['ticker'].unique()})

for ticker, group in df_staples.groupby('ticker'):
    series = TimeSeries.from_dataframe(
        group,
        time_col='date',
        value_cols=['close', 'high', 'low', 'open', 'volume'],
        freq='B',
        static_covariates=pd.Series(ticker)
    )
    data[ticker] = series

In [6]:
# encode each ticker as a static covariate
transformer = StaticCovariatesTransformer()
ts = [d for d in data.values()]
transformer.fit(ts)

for ticker, series in data.items():
    data[ticker] = transformer.transform(series)

In [7]:
train = {}
test = {}

for ticker, series in data.items():
    train[ticker], test[ticker] = train_test_split(series, test_size=0.1)
    last_train = train[ticker][-1:]
    test_with_context = last_train.append(test[ticker]) # append last point of train to test as our forecast requires it
    test[ticker] = test_with_context

## Gradient Boosting

### Train a local model first

In [8]:
models = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = XGBModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models[ticker] = model

Training model for ADM
Model for ADM trained in 0.28 seconds
**************************************************
Training model for BF-B
Model for BF-B trained in 0.31 seconds
**************************************************
Training model for BG
Model for BG trained in 0.32 seconds
**************************************************
Training model for CAG
Model for CAG trained in 0.30 seconds
**************************************************
Training model for CHD
Model for CHD trained in 0.32 seconds
**************************************************
Training model for CL
Model for CL trained in 0.38 seconds
**************************************************
Training model for CLX
Model for CLX trained in 0.33 seconds
**************************************************
Training model for COST
Model for COST trained in 0.50 seconds
**************************************************
Training model for CPB
Model for CPB trained in 0.34 seconds
*******************************************

In [9]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes[ticker] = mape_
    maes[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ADM, MAPE: 4.11, MAE: 2.23
**************************************************
Ticker: BF-B, MAPE: 8.14, MAE: 3.30
**************************************************
Ticker: BG, MAPE: 7.33, MAE: 6.05
**************************************************
Ticker: CAG, MAPE: 2.84, MAE: 0.76
**************************************************
Ticker: CHD, MAPE: 2.68, MAE: 2.82
**************************************************
Ticker: CL, MAPE: 6.82, MAE: 6.61
**************************************************
Ticker: CLX, MAPE: 4.29, MAE: 6.59
**************************************************
Ticker: COST, MAPE: 20.05, MAE: 186.49
**************************************************
Ticker: CPB, MAPE: 5.99, MAE: 2.49
**************************************************
Ticker: DG, MAPE: 5.65, MAE: 5.16
**************************************************
Ticker: DLTR, MAPE: 19.09, MAE: 13.61
**************************************************
Ticker: EL, MAPE: 25.14, MAE: 17.93
*************

In [10]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(9.905617179092381, 11.2246734320959)

### Global model

In [11]:
print(f"Training global model")
start = time.time()
model = XGBModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    },
    use_static_covariates=True,
    n_estimators=1000,
    eta=0.01,
    max_depth=8,
    subsample=0.8
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 15.85 seconds
**************************************************


In [12]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ADM, MAPE: 3.38, MAE: 1.72
**************************************************
Global model for BF-B, MAPE: 15.80, MAE: 5.56
**************************************************
Global model for BG, MAPE: 3.03, MAE: 2.59
**************************************************
Global model for CAG, MAPE: 2.20, MAE: 0.60
**************************************************
Global model for CHD, MAPE: 1.46, MAE: 1.52
**************************************************
Global model for CL, MAPE: 4.81, MAE: 4.65
**************************************************
Global model for CLX, MAPE: 2.61, MAE: 3.93
**************************************************
Global model for COST, MAPE: 21.16, MAE: 196.30
**************************************************
Global model for CPB, MAPE: 8.83, MAE: 3.51
**************************************************
Global model for DG, MAPE: 4.12, MAE: 3.67
**************************************************
Global model for DLTR, MAPE: 3.61, MAE: 2.85
**

In [13]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(5.210549585712213, 7.648513957262321)

## Prophet model

In [14]:
models_prophet = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = Prophet(
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year']  # automatically generate future covariates from time series
            }
        },
        seasonality_mode='multiplicative',
    )

    model.fit(
        train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_prophet[ticker] = model

Training model for ADM


21:45:42 - cmdstanpy - INFO - Chain [1] start processing
21:45:45 - cmdstanpy - INFO - Chain [1] done processing


Model for ADM trained in 3.19 seconds
**************************************************
Training model for BF-B


21:45:46 - cmdstanpy - INFO - Chain [1] start processing
21:45:48 - cmdstanpy - INFO - Chain [1] done processing


Model for BF-B trained in 2.66 seconds
**************************************************
Training model for BG


21:45:48 - cmdstanpy - INFO - Chain [1] start processing
21:45:51 - cmdstanpy - INFO - Chain [1] done processing


Model for BG trained in 3.11 seconds
**************************************************
Training model for CAG


21:45:51 - cmdstanpy - INFO - Chain [1] start processing
21:45:54 - cmdstanpy - INFO - Chain [1] done processing


Model for CAG trained in 3.12 seconds
**************************************************
Training model for CHD


21:45:54 - cmdstanpy - INFO - Chain [1] start processing
21:45:57 - cmdstanpy - INFO - Chain [1] done processing


Model for CHD trained in 2.67 seconds
**************************************************
Training model for CL


21:45:57 - cmdstanpy - INFO - Chain [1] start processing
21:46:00 - cmdstanpy - INFO - Chain [1] done processing


Model for CL trained in 3.19 seconds
**************************************************
Training model for CLX


21:46:00 - cmdstanpy - INFO - Chain [1] start processing
21:46:03 - cmdstanpy - INFO - Chain [1] done processing


Model for CLX trained in 3.08 seconds
**************************************************
Training model for COST


21:46:03 - cmdstanpy - INFO - Chain [1] start processing
21:46:05 - cmdstanpy - INFO - Chain [1] done processing


Model for COST trained in 2.35 seconds
**************************************************
Training model for CPB


21:46:06 - cmdstanpy - INFO - Chain [1] start processing
21:46:08 - cmdstanpy - INFO - Chain [1] done processing


Model for CPB trained in 3.05 seconds
**************************************************
Training model for DG


21:46:09 - cmdstanpy - INFO - Chain [1] start processing
21:46:12 - cmdstanpy - INFO - Chain [1] done processing


Model for DG trained in 3.36 seconds
**************************************************
Training model for DLTR


21:46:12 - cmdstanpy - INFO - Chain [1] start processing
21:46:14 - cmdstanpy - INFO - Chain [1] done processing


Model for DLTR trained in 2.51 seconds
**************************************************
Training model for EL


21:46:15 - cmdstanpy - INFO - Chain [1] start processing
21:46:17 - cmdstanpy - INFO - Chain [1] done processing


Model for EL trained in 3.09 seconds
**************************************************
Training model for GIS


21:46:18 - cmdstanpy - INFO - Chain [1] start processing
21:46:20 - cmdstanpy - INFO - Chain [1] done processing


Model for GIS trained in 2.95 seconds
**************************************************
Training model for HRL


21:46:21 - cmdstanpy - INFO - Chain [1] start processing
21:46:24 - cmdstanpy - INFO - Chain [1] done processing


Model for HRL trained in 3.22 seconds
**************************************************
Training model for HSY


21:46:24 - cmdstanpy - INFO - Chain [1] start processing
21:46:27 - cmdstanpy - INFO - Chain [1] done processing


Model for HSY trained in 3.15 seconds
**************************************************
Training model for K


21:46:27 - cmdstanpy - INFO - Chain [1] start processing
21:46:30 - cmdstanpy - INFO - Chain [1] done processing


Model for K trained in 3.27 seconds
**************************************************
Training model for KDP


21:46:30 - cmdstanpy - INFO - Chain [1] start processing
21:46:33 - cmdstanpy - INFO - Chain [1] done processing


Model for KDP trained in 3.09 seconds
**************************************************
Training model for KHC


21:46:33 - cmdstanpy - INFO - Chain [1] start processing
21:46:36 - cmdstanpy - INFO - Chain [1] done processing


Model for KHC trained in 3.17 seconds
**************************************************
Training model for KMB


21:46:37 - cmdstanpy - INFO - Chain [1] start processing
21:46:39 - cmdstanpy - INFO - Chain [1] done processing


Model for KMB trained in 3.22 seconds
**************************************************
Training model for KO


21:46:40 - cmdstanpy - INFO - Chain [1] start processing
21:46:43 - cmdstanpy - INFO - Chain [1] done processing


Model for KO trained in 3.19 seconds
**************************************************
Training model for KR


21:46:43 - cmdstanpy - INFO - Chain [1] start processing
21:46:45 - cmdstanpy - INFO - Chain [1] done processing
21:46:45 - cmdstanpy - INFO - Chain [1] start processing


Model for KR trained in 2.23 seconds
**************************************************
Training model for KVUE


21:46:45 - cmdstanpy - INFO - Chain [1] done processing


Model for KVUE trained in 0.50 seconds
**************************************************
Training model for LW


21:46:46 - cmdstanpy - INFO - Chain [1] start processing
21:46:48 - cmdstanpy - INFO - Chain [1] done processing


Model for LW trained in 2.65 seconds
**************************************************
Training model for MDLZ


21:46:48 - cmdstanpy - INFO - Chain [1] start processing
21:46:51 - cmdstanpy - INFO - Chain [1] done processing


Model for MDLZ trained in 3.10 seconds
**************************************************
Training model for MKC


21:46:51 - cmdstanpy - INFO - Chain [1] start processing
21:46:54 - cmdstanpy - INFO - Chain [1] done processing


Model for MKC trained in 3.13 seconds
**************************************************
Training model for MNST


21:46:55 - cmdstanpy - INFO - Chain [1] start processing
21:46:57 - cmdstanpy - INFO - Chain [1] done processing


Model for MNST trained in 3.14 seconds
**************************************************
Training model for MO


21:46:58 - cmdstanpy - INFO - Chain [1] start processing
21:47:01 - cmdstanpy - INFO - Chain [1] done processing


Model for MO trained in 3.15 seconds
**************************************************
Training model for PEP


21:47:01 - cmdstanpy - INFO - Chain [1] start processing
21:47:04 - cmdstanpy - INFO - Chain [1] done processing


Model for PEP trained in 2.98 seconds
**************************************************
Training model for PG


21:47:04 - cmdstanpy - INFO - Chain [1] start processing
21:47:07 - cmdstanpy - INFO - Chain [1] done processing


Model for PG trained in 3.09 seconds
**************************************************
Training model for PM


21:47:07 - cmdstanpy - INFO - Chain [1] start processing
21:47:10 - cmdstanpy - INFO - Chain [1] done processing


Model for PM trained in 3.25 seconds
**************************************************
Training model for SJM


21:47:10 - cmdstanpy - INFO - Chain [1] start processing
21:47:13 - cmdstanpy - INFO - Chain [1] done processing


Model for SJM trained in 3.14 seconds
**************************************************
Training model for STZ


21:47:13 - cmdstanpy - INFO - Chain [1] start processing
21:47:16 - cmdstanpy - INFO - Chain [1] done processing


Model for STZ trained in 3.23 seconds
**************************************************
Training model for SYY


21:47:17 - cmdstanpy - INFO - Chain [1] start processing
21:47:19 - cmdstanpy - INFO - Chain [1] done processing


Model for SYY trained in 3.15 seconds
**************************************************
Training model for TAP


21:47:20 - cmdstanpy - INFO - Chain [1] start processing
21:47:21 - cmdstanpy - INFO - Chain [1] done processing


Model for TAP trained in 1.62 seconds
**************************************************
Training model for TGT


21:47:21 - cmdstanpy - INFO - Chain [1] start processing
21:47:24 - cmdstanpy - INFO - Chain [1] done processing


Model for TGT trained in 3.14 seconds
**************************************************
Training model for TSN


21:47:24 - cmdstanpy - INFO - Chain [1] start processing
21:47:26 - cmdstanpy - INFO - Chain [1] done processing


Model for TSN trained in 2.30 seconds
**************************************************
Training model for WBA


21:47:27 - cmdstanpy - INFO - Chain [1] start processing
21:47:29 - cmdstanpy - INFO - Chain [1] done processing


Model for WBA trained in 2.84 seconds
**************************************************
Training model for WMT


21:47:30 - cmdstanpy - INFO - Chain [1] start processing
21:47:32 - cmdstanpy - INFO - Chain [1] done processing


Model for WMT trained in 3.09 seconds
**************************************************


In [15]:
mapes_prophet = {}
maes_prophet = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_prophet[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_prophet[ticker] = mape_
    maes_prophet[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ADM, MAPE: 47.05, MAE: 24.23
**************************************************
Ticker: BF-B, MAPE: 12.79, MAE: 5.20
**************************************************
Ticker: BG, MAPE: 14.41, MAE: 12.01
**************************************************
Ticker: CAG, MAPE: 52.60, MAE: 14.26
**************************************************
Ticker: CHD, MAPE: 5.45, MAE: 5.61
**************************************************
Ticker: CL, MAPE: 13.50, MAE: 13.00
**************************************************
Ticker: CLX, MAPE: 23.77, MAE: 36.59
**************************************************
Ticker: COST, MAPE: 17.64, MAE: 161.16
**************************************************
Ticker: CPB, MAPE: 47.94, MAE: 20.57
**************************************************
Ticker: DG, MAPE: 48.35, MAE: 44.62
**************************************************
Ticker: DLTR, MAPE: 28.32, MAE: 21.14
**************************************************
Ticker: EL, MAPE: 91.87, MAE: 72.3

In [16]:
np.mean(list(mapes_prophet.values())), np.mean(list(maes_prophet.values()))

(30.188116587585785, 27.689945705883773)

## Theta model

In [17]:
models_theta = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoTheta(
        season_length=30
    )

    model.fit(
        series=train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_theta[ticker] = model

Training model for ADM
Model for ADM trained in 0.74 seconds
**************************************************
Training model for BF-B
Model for BF-B trained in 0.72 seconds
**************************************************
Training model for BG
Model for BG trained in 0.79 seconds
**************************************************
Training model for CAG
Model for CAG trained in 0.69 seconds
**************************************************
Training model for CHD
Model for CHD trained in 1.07 seconds
**************************************************
Training model for CL
Model for CL trained in 1.10 seconds
**************************************************
Training model for CLX
Model for CLX trained in 0.69 seconds
**************************************************
Training model for COST
Model for COST trained in 1.26 seconds
**************************************************
Training model for CPB
Model for CPB trained in 0.62 seconds
*******************************************

In [18]:
mapes_theta = {}
maes_theta = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_theta[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_theta[ticker] = mape_
    maes_theta[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ADM, MAPE: 12.35, MAE: 6.13
**************************************************
Ticker: BF-B, MAPE: 20.82, MAE: 7.46
**************************************************
Ticker: BG, MAPE: 17.59, MAE: 14.04
**************************************************
Ticker: CAG, MAPE: 9.36, MAE: 2.45
**************************************************
Ticker: CHD, MAPE: 3.27, MAE: 3.36
**************************************************
Ticker: CL, MAPE: 4.47, MAE: 4.32
**************************************************
Ticker: CLX, MAPE: 8.63, MAE: 13.30
**************************************************
Ticker: COST, MAPE: 15.98, MAE: 148.59
**************************************************
Ticker: CPB, MAPE: 8.41, MAE: 3.46
**************************************************
Ticker: DG, MAPE: 56.09, MAE: 45.05
**************************************************
Ticker: DLTR, MAPE: 54.53, MAE: 39.91
**************************************************
Ticker: EL, MAPE: 70.06, MAE: 51.78
******

In [19]:
np.mean(list(mapes_theta.values())), np.mean(list(maes_theta.values()))

(15.666385109203825, 15.507690227460156)

## Linear Regression

### Local model

In [20]:
models_lr = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = LinearRegressionModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_lr[ticker] = model

Training model for ADM
Model for ADM trained in 0.13 seconds
**************************************************
Training model for BF-B
Model for BF-B trained in 0.12 seconds
**************************************************
Training model for BG
Model for BG trained in 0.12 seconds
**************************************************
Training model for CAG
Model for CAG trained in 0.12 seconds
**************************************************
Training model for CHD
Model for CHD trained in 0.12 seconds
**************************************************
Training model for CL
Model for CL trained in 0.12 seconds
**************************************************
Training model for CLX
Model for CLX trained in 0.12 seconds
**************************************************
Training model for COST
Model for COST trained in 0.12 seconds
**************************************************
Training model for CPB
Model for CPB trained in 0.12 seconds
*******************************************

In [21]:
mapes_lr = {}
maes_lr = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_lr[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_lr[ticker] = mape_
    maes_lr[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ADM, MAPE: 2.42, MAE: 1.26
**************************************************
Ticker: BF-B, MAPE: 4.97, MAE: 1.89
**************************************************
Ticker: BG, MAPE: 406650594.14, MAE: 316128195.45
**************************************************
Ticker: CAG, MAPE: 2.70, MAE: 0.73
**************************************************
Ticker: CHD, MAPE: 2423675457.13, MAE: 2441418390.13
**************************************************
Ticker: CL, MAPE: 2.06, MAE: 1.96
**************************************************
Ticker: CLX, MAPE: 17445009.72, MAE: 24546607.73
**************************************************
Ticker: COST, MAPE: 2.75, MAE: 25.30
**************************************************
Ticker: CPB, MAPE: 975199.46, MAE: 365300.30
**************************************************
Ticker: DG, MAPE: 3.52, MAE: 3.20
**************************************************
Ticker: DLTR, MAPE: 5.87, MAE: 4.49
**********************************************

In [22]:
np.mean(list(mapes_lr.values())), np.mean(list(maes_lr.values()))

(82038505.32314461, 77356254.77204046)

### Global model

In [23]:
print(f"Training global model")
start = time.time()
model = LinearRegressionModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    }
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 4.64 seconds
**************************************************


In [24]:
mapes_lr_global = {}
maes_lr_global = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes_lr_global[ticker] = mape_
    maes_lr_global[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ADM, MAPE: 2.33, MAE: 1.22
**************************************************
Global model for BF-B, MAPE: 3.36, MAE: 1.31
**************************************************
Global model for BG, MAPE: 2.90, MAE: 2.51
**************************************************
Global model for CAG, MAPE: 2.18, MAE: 0.60
**************************************************
Global model for CHD, MAPE: 1.88, MAE: 1.96
**************************************************
Global model for CL, MAPE: 1.95, MAE: 1.83
**************************************************
Global model for CLX, MAPE: 2.20, MAE: 3.22
**************************************************
Global model for COST, MAPE: 2.55, MAE: 23.26
**************************************************
Global model for CPB, MAPE: 2.49, MAE: 1.06
**************************************************
Global model for DG, MAPE: 4.35, MAE: 3.88
**************************************************
Global model for DLTR, MAPE: 4.46, MAE: 3.50
*****

In [25]:
np.mean(list(mapes_lr_global.values())), np.mean(list(maes_lr_global.values()))

(2.677161687275213, 2.684651877776074)

## ARIMA

In [28]:
models_arima = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoARIMA()

    model.fit(
        series=train[ticker]['close'],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_arima[ticker] = model

Training model for ADM
Model for ADM trained in 0.19 seconds
**************************************************
Training model for BF-B
Model for BF-B trained in 0.22 seconds
**************************************************
Training model for BG
Model for BG trained in 1.35 seconds
**************************************************
Training model for CAG
Model for CAG trained in 0.79 seconds
**************************************************
Training model for CHD
Model for CHD trained in 0.33 seconds
**************************************************
Training model for CL
Model for CL trained in 0.76 seconds
**************************************************
Training model for CLX
Model for CLX trained in 1.42 seconds
**************************************************
Training model for COST
Model for COST trained in 1.67 seconds
**************************************************
Training model for CPB
Model for CPB trained in 0.21 seconds
*******************************************

In [29]:
mapes_arima = {}
maes_arima = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_arima[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_arima[ticker] = mape_
    maes_arima[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ADM, MAPE: 10.29, MAE: 5.15
**************************************************
Ticker: BF-B, MAPE: 17.54, MAE: 6.26
**************************************************
Ticker: BG, MAPE: 15.84, MAE: 12.68
**************************************************
Ticker: CAG, MAPE: 8.89, MAE: 2.33
**************************************************
Ticker: CHD, MAPE: 4.34, MAE: 4.46
**************************************************
Ticker: CL, MAPE: 4.59, MAE: 4.47
**************************************************
Ticker: CLX, MAPE: 9.30, MAE: 14.31
**************************************************
Ticker: COST, MAPE: 15.89, MAE: 147.64
**************************************************
Ticker: CPB, MAPE: 8.38, MAE: 3.45
**************************************************
Ticker: DG, MAPE: 50.12, MAE: 40.17
**************************************************
Ticker: DLTR, MAPE: 50.56, MAE: 36.93
**************************************************
Ticker: EL, MAPE: 62.52, MAE: 46.28
******

In [30]:
np.mean(list(mapes_arima.values())), np.mean(list(maes_arima.values()))

(15.228659909266556, 14.79272391872359)