## Model training and tuning for the financials sector

In [1]:
import pandas as pd
import numpy as np

import time

from darts import TimeSeries, concatenate
from darts.utils.model_selection import train_test_split
from darts.models.forecasting.xgboost import XGBModel
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics.metrics import mape, mae
from darts.models.forecasting.prophet_model import Prophet
from darts.models.forecasting.sf_auto_theta import AutoTheta
from darts.models.forecasting.linear_regression_model import LinearRegressionModel
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.dataprocessing.transformers.boxcox import BoxCox

Importing plotly failed. Interactive plots will not work.
  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore


In [2]:
df = pd.read_csv('../../data/processed/sp500_all_companies_cleaned_with_sector.csv')
df['date'] = pd.to_datetime(df['date'])

df_financials = df[df['GICS Sector'] == 'Financials'].sort_values(by=['ticker', 'date']).reset_index(drop=True)

In [30]:
df['GICS Sector'].value_counts()

GICS Sector
Industrials               193459
Financials                190485
Information Technology    177149
Health Care               151348
Consumer Discretionary    130185
Consumer Staples           96649
Real Estate                79757
Utilities                  78786
Materials                  63394
Energy                     58523
Communication Services     58017
Name: count, dtype: int64

In [3]:
df_financials['ticker'].unique()

array(['ACGL', 'AFL', 'AIG', 'AIZ', 'AJG', 'ALL', 'AMP', 'AON', 'APO',
       'AXP', 'BAC', 'BEN', 'BK', 'BLK', 'BRK-B', 'BRO', 'BX', 'C', 'CB',
       'CBOE', 'CFG', 'CINF', 'CME', 'COF', 'CPAY', 'DFS', 'EG', 'ERIE',
       'FDS', 'FI', 'FIS', 'FITB', 'GL', 'GPN', 'GS', 'HBAN', 'HIG',
       'ICE', 'IVZ', 'JKHY', 'JPM', 'KEY', 'KKR', 'L', 'MA', 'MCO', 'MET',
       'MKTX', 'MMC', 'MS', 'MSCI', 'MTB', 'NDAQ', 'NTRS', 'PFG', 'PGR',
       'PNC', 'PRU', 'PYPL', 'RF', 'RJF', 'SCHW', 'SPGI', 'STT', 'SYF',
       'TFC', 'TROW', 'TRV', 'USB', 'V', 'WFC', 'WRB', 'WTW'],
      dtype=object)

In [4]:
data = {}
static_covs = pd.DataFrame({'ticker': df_financials['ticker'].unique()})

for ticker, group in df_financials.groupby('ticker'):
    series = TimeSeries.from_dataframe(
        group,
        time_col='date',
        value_cols=['close', 'high', 'low', 'open', 'volume'],
        freq='B',
        static_covariates=pd.Series(ticker)
    )
    data[ticker] = series

In [5]:
# encode each ticker as a static covariate
transformer = StaticCovariatesTransformer()
ts = [d for d in data.values()]
transformer.fit(ts)

for ticker, series in data.items():
    data[ticker] = transformer.transform(series)

In [6]:
train = {}
test = {}

for ticker, series in data.items():
    train[ticker], test[ticker] = train_test_split(series, test_size=0.1)
    last_train = train[ticker][-1:]
    test_with_context = last_train.append(test[ticker]) # append last point of train to test as our forecast requires it
    test[ticker] = test_with_context

## Gradient Boosting

### Train a local model first

In [7]:
models = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = XGBModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models[ticker] = model

Training model for ACGL
Model for ACGL trained in 0.30 seconds
**************************************************
Training model for AFL
Model for AFL trained in 0.33 seconds
**************************************************
Training model for AIG
Model for AIG trained in 0.34 seconds
**************************************************
Training model for AIZ
Model for AIZ trained in 0.37 seconds
**************************************************
Training model for AJG
Model for AJG trained in 0.31 seconds
**************************************************
Training model for ALL
Model for ALL trained in 0.34 seconds
**************************************************
Training model for AMP
Model for AMP trained in 0.35 seconds
**************************************************
Training model for AON
Model for AON trained in 0.31 seconds
**************************************************
Training model for APO
Model for APO trained in 0.32 seconds
*****************************************

In [8]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes[ticker] = mape_
    maes[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ACGL, MAPE: 7.81, MAE: 7.75
**************************************************
Ticker: AFL, MAPE: 32.77, MAE: 34.09
**************************************************
Ticker: AIG, MAPE: 2.96, MAE: 2.34
**************************************************
Ticker: AIZ, MAPE: 18.79, MAE: 37.93
**************************************************
Ticker: AJG, MAPE: 15.83, MAE: 48.01
**************************************************
Ticker: ALL, MAPE: 10.08, MAE: 19.28
**************************************************
Ticker: AMP, MAPE: 13.20, MAE: 67.99
**************************************************
Ticker: AON, MAPE: 12.24, MAE: 45.14
**************************************************
Ticker: APO, MAPE: 16.88, MAE: 25.89
**************************************************
Ticker: AXP, MAPE: 14.37, MAE: 40.93
**************************************************
Ticker: BAC, MAPE: 12.76, MAE: 5.49
**************************************************
Ticker: BEN, MAPE: 13.99, MAE: 2.73


In [9]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(11.895948727266521, 24.677879089768687)

### Global model

In [10]:
print(f"Training global model")
start = time.time()
model = XGBModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    },
    use_static_covariates=True,
    n_estimators=1000,
    eta=0.01,
    max_depth=8,
    subsample=0.8
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 22.04 seconds
**************************************************


In [11]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ACGL, MAPE: 2.28, MAE: 2.19
**************************************************
Global model for AFL, MAPE: 2.88, MAE: 2.94
**************************************************
Global model for AIG, MAPE: 2.04, MAE: 1.54
**************************************************
Global model for AIZ, MAPE: 1.91, MAE: 3.70
**************************************************
Global model for AJG, MAPE: 1.78, MAE: 5.24
**************************************************
Global model for ALL, MAPE: 1.76, MAE: 3.24
**************************************************
Global model for AMP, MAPE: 8.00, MAE: 39.88
**************************************************
Global model for AON, MAPE: 1.75, MAE: 6.09
**************************************************
Global model for APO, MAPE: 5.57, MAE: 7.41
**************************************************
Global model for AXP, MAPE: 2.01, MAE: 5.31
**************************************************
Global model for BAC, MAPE: 2.41, MAE: 0.98
****

In [12]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(3.1863993367901537, 8.485099203952252)

## Prophet model

In [13]:
models_prophet = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = Prophet(
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year']  # automatically generate future covariates from time series
            }
        },
        seasonality_mode='multiplicative',
    )

    model.fit(
        train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_prophet[ticker] = model

Training model for ACGL


21:15:50 - cmdstanpy - INFO - Chain [1] start processing
21:15:53 - cmdstanpy - INFO - Chain [1] done processing


Model for ACGL trained in 3.03 seconds
**************************************************
Training model for AFL


21:15:53 - cmdstanpy - INFO - Chain [1] start processing
21:15:55 - cmdstanpy - INFO - Chain [1] done processing


Model for AFL trained in 2.45 seconds
**************************************************
Training model for AIG


21:15:56 - cmdstanpy - INFO - Chain [1] start processing
21:15:58 - cmdstanpy - INFO - Chain [1] done processing


Model for AIG trained in 2.84 seconds
**************************************************
Training model for AIZ


21:15:59 - cmdstanpy - INFO - Chain [1] start processing
21:16:00 - cmdstanpy - INFO - Chain [1] done processing


Model for AIZ trained in 1.93 seconds
**************************************************
Training model for AJG


21:16:01 - cmdstanpy - INFO - Chain [1] start processing
21:16:03 - cmdstanpy - INFO - Chain [1] done processing


Model for AJG trained in 2.88 seconds
**************************************************
Training model for ALL


21:16:03 - cmdstanpy - INFO - Chain [1] start processing
21:16:06 - cmdstanpy - INFO - Chain [1] done processing


Model for ALL trained in 2.71 seconds
**************************************************
Training model for AMP


21:16:06 - cmdstanpy - INFO - Chain [1] start processing
21:16:09 - cmdstanpy - INFO - Chain [1] done processing


Model for AMP trained in 3.05 seconds
**************************************************
Training model for AON


21:16:09 - cmdstanpy - INFO - Chain [1] start processing
21:16:12 - cmdstanpy - INFO - Chain [1] done processing


Model for AON trained in 3.13 seconds
**************************************************
Training model for APO


21:16:12 - cmdstanpy - INFO - Chain [1] start processing
21:16:14 - cmdstanpy - INFO - Chain [1] done processing


Model for APO trained in 2.28 seconds
**************************************************
Training model for AXP


21:16:15 - cmdstanpy - INFO - Chain [1] start processing
21:16:17 - cmdstanpy - INFO - Chain [1] done processing


Model for AXP trained in 2.37 seconds
**************************************************
Training model for BAC


21:16:17 - cmdstanpy - INFO - Chain [1] start processing
21:16:19 - cmdstanpy - INFO - Chain [1] done processing


Model for BAC trained in 2.03 seconds
**************************************************
Training model for BEN


21:16:19 - cmdstanpy - INFO - Chain [1] start processing
21:16:21 - cmdstanpy - INFO - Chain [1] done processing


Model for BEN trained in 2.53 seconds
**************************************************
Training model for BK


21:16:21 - cmdstanpy - INFO - Chain [1] start processing
21:16:24 - cmdstanpy - INFO - Chain [1] done processing


Model for BK trained in 2.53 seconds
**************************************************
Training model for BLK


21:16:24 - cmdstanpy - INFO - Chain [1] start processing
21:16:27 - cmdstanpy - INFO - Chain [1] done processing


Model for BLK trained in 3.22 seconds
**************************************************
Training model for BRK-B


21:16:27 - cmdstanpy - INFO - Chain [1] start processing
21:16:30 - cmdstanpy - INFO - Chain [1] done processing


Model for BRK-B trained in 2.84 seconds
**************************************************
Training model for BRO


21:16:30 - cmdstanpy - INFO - Chain [1] start processing
21:16:32 - cmdstanpy - INFO - Chain [1] done processing


Model for BRO trained in 2.46 seconds
**************************************************
Training model for BX


21:16:33 - cmdstanpy - INFO - Chain [1] start processing
21:16:34 - cmdstanpy - INFO - Chain [1] done processing


Model for BX trained in 1.71 seconds
**************************************************
Training model for C


21:16:34 - cmdstanpy - INFO - Chain [1] start processing
21:16:37 - cmdstanpy - INFO - Chain [1] done processing


Model for C trained in 3.11 seconds
**************************************************
Training model for CB


21:16:37 - cmdstanpy - INFO - Chain [1] start processing
21:16:40 - cmdstanpy - INFO - Chain [1] done processing


Model for CB trained in 2.69 seconds
**************************************************
Training model for CBOE


21:16:40 - cmdstanpy - INFO - Chain [1] start processing
21:16:42 - cmdstanpy - INFO - Chain [1] done processing


Model for CBOE trained in 2.02 seconds
**************************************************
Training model for CFG


21:16:42 - cmdstanpy - INFO - Chain [1] start processing
21:16:45 - cmdstanpy - INFO - Chain [1] done processing


Model for CFG trained in 3.18 seconds
**************************************************
Training model for CINF


21:16:45 - cmdstanpy - INFO - Chain [1] start processing
21:16:48 - cmdstanpy - INFO - Chain [1] done processing


Model for CINF trained in 3.11 seconds
**************************************************
Training model for CME


21:16:48 - cmdstanpy - INFO - Chain [1] start processing
21:16:50 - cmdstanpy - INFO - Chain [1] done processing


Model for CME trained in 2.35 seconds
**************************************************
Training model for COF


21:16:51 - cmdstanpy - INFO - Chain [1] start processing
21:16:54 - cmdstanpy - INFO - Chain [1] done processing


Model for COF trained in 3.20 seconds
**************************************************
Training model for CPAY


21:16:54 - cmdstanpy - INFO - Chain [1] start processing
21:16:57 - cmdstanpy - INFO - Chain [1] done processing


Model for CPAY trained in 3.17 seconds
**************************************************
Training model for DFS


21:16:57 - cmdstanpy - INFO - Chain [1] start processing
21:17:00 - cmdstanpy - INFO - Chain [1] done processing


Model for DFS trained in 3.06 seconds
**************************************************
Training model for EG


21:17:00 - cmdstanpy - INFO - Chain [1] start processing
21:17:03 - cmdstanpy - INFO - Chain [1] done processing


Model for EG trained in 3.18 seconds
**************************************************
Training model for ERIE


21:17:03 - cmdstanpy - INFO - Chain [1] start processing
21:17:05 - cmdstanpy - INFO - Chain [1] done processing


Model for ERIE trained in 1.58 seconds
**************************************************
Training model for FDS


21:17:05 - cmdstanpy - INFO - Chain [1] start processing
21:17:08 - cmdstanpy - INFO - Chain [1] done processing


Model for FDS trained in 3.11 seconds
**************************************************
Training model for FI


21:17:08 - cmdstanpy - INFO - Chain [1] start processing
21:17:10 - cmdstanpy - INFO - Chain [1] done processing


Model for FI trained in 1.91 seconds
**************************************************
Training model for FIS


21:17:10 - cmdstanpy - INFO - Chain [1] start processing
21:17:12 - cmdstanpy - INFO - Chain [1] done processing


Model for FIS trained in 2.31 seconds
**************************************************
Training model for FITB


21:17:12 - cmdstanpy - INFO - Chain [1] start processing
21:17:15 - cmdstanpy - INFO - Chain [1] done processing


Model for FITB trained in 3.36 seconds
**************************************************
Training model for GL


21:17:16 - cmdstanpy - INFO - Chain [1] start processing
21:17:18 - cmdstanpy - INFO - Chain [1] done processing


Model for GL trained in 3.17 seconds
**************************************************
Training model for GPN


21:17:19 - cmdstanpy - INFO - Chain [1] start processing
21:17:22 - cmdstanpy - INFO - Chain [1] done processing


Model for GPN trained in 3.31 seconds
**************************************************
Training model for GS


21:17:22 - cmdstanpy - INFO - Chain [1] start processing
21:17:25 - cmdstanpy - INFO - Chain [1] done processing


Model for GS trained in 3.13 seconds
**************************************************
Training model for HBAN


21:17:25 - cmdstanpy - INFO - Chain [1] start processing
21:17:28 - cmdstanpy - INFO - Chain [1] done processing


Model for HBAN trained in 2.81 seconds
**************************************************
Training model for HIG


21:17:28 - cmdstanpy - INFO - Chain [1] start processing
21:17:29 - cmdstanpy - INFO - Chain [1] done processing


Model for HIG trained in 1.59 seconds
**************************************************
Training model for ICE


21:17:30 - cmdstanpy - INFO - Chain [1] start processing
21:17:32 - cmdstanpy - INFO - Chain [1] done processing


Model for ICE trained in 2.40 seconds
**************************************************
Training model for IVZ


21:17:32 - cmdstanpy - INFO - Chain [1] start processing
21:17:34 - cmdstanpy - INFO - Chain [1] done processing


Model for IVZ trained in 2.41 seconds
**************************************************
Training model for JKHY


21:17:34 - cmdstanpy - INFO - Chain [1] start processing
21:17:37 - cmdstanpy - INFO - Chain [1] done processing


Model for JKHY trained in 3.16 seconds
**************************************************
Training model for JPM


21:17:38 - cmdstanpy - INFO - Chain [1] start processing
21:17:40 - cmdstanpy - INFO - Chain [1] done processing


Model for JPM trained in 3.05 seconds
**************************************************
Training model for KEY


21:17:41 - cmdstanpy - INFO - Chain [1] start processing
21:17:43 - cmdstanpy - INFO - Chain [1] done processing


Model for KEY trained in 2.86 seconds
**************************************************
Training model for KKR


21:17:43 - cmdstanpy - INFO - Chain [1] start processing
21:17:46 - cmdstanpy - INFO - Chain [1] done processing


Model for KKR trained in 2.45 seconds
**************************************************
Training model for L


21:17:46 - cmdstanpy - INFO - Chain [1] start processing
21:17:49 - cmdstanpy - INFO - Chain [1] done processing


Model for L trained in 3.12 seconds
**************************************************
Training model for MA


21:17:49 - cmdstanpy - INFO - Chain [1] start processing
21:17:51 - cmdstanpy - INFO - Chain [1] done processing


Model for MA trained in 2.60 seconds
**************************************************
Training model for MCO


21:17:52 - cmdstanpy - INFO - Chain [1] start processing
21:17:54 - cmdstanpy - INFO - Chain [1] done processing


Model for MCO trained in 2.63 seconds
**************************************************
Training model for MET


21:17:54 - cmdstanpy - INFO - Chain [1] start processing
21:17:55 - cmdstanpy - INFO - Chain [1] done processing


Model for MET trained in 1.10 seconds
**************************************************
Training model for MKTX


21:17:55 - cmdstanpy - INFO - Chain [1] start processing
21:17:58 - cmdstanpy - INFO - Chain [1] done processing


Model for MKTX trained in 3.27 seconds
**************************************************
Training model for MMC


21:17:59 - cmdstanpy - INFO - Chain [1] start processing
21:18:01 - cmdstanpy - INFO - Chain [1] done processing


Model for MMC trained in 2.75 seconds
**************************************************
Training model for MS


21:18:01 - cmdstanpy - INFO - Chain [1] start processing
21:18:04 - cmdstanpy - INFO - Chain [1] done processing


Model for MS trained in 3.02 seconds
**************************************************
Training model for MSCI


21:18:04 - cmdstanpy - INFO - Chain [1] start processing
21:18:07 - cmdstanpy - INFO - Chain [1] done processing


Model for MSCI trained in 3.09 seconds
**************************************************
Training model for MTB


21:18:08 - cmdstanpy - INFO - Chain [1] start processing
21:18:10 - cmdstanpy - INFO - Chain [1] done processing


Model for MTB trained in 3.23 seconds
**************************************************
Training model for NDAQ


21:18:11 - cmdstanpy - INFO - Chain [1] start processing
21:18:13 - cmdstanpy - INFO - Chain [1] done processing


Model for NDAQ trained in 2.94 seconds
**************************************************
Training model for NTRS


21:18:14 - cmdstanpy - INFO - Chain [1] start processing
21:18:16 - cmdstanpy - INFO - Chain [1] done processing


Model for NTRS trained in 2.89 seconds
**************************************************
Training model for PFG


21:18:17 - cmdstanpy - INFO - Chain [1] start processing
21:18:19 - cmdstanpy - INFO - Chain [1] done processing


Model for PFG trained in 3.21 seconds
**************************************************
Training model for PGR


21:18:20 - cmdstanpy - INFO - Chain [1] start processing
21:18:22 - cmdstanpy - INFO - Chain [1] done processing


Model for PGR trained in 2.26 seconds
**************************************************
Training model for PNC


21:18:22 - cmdstanpy - INFO - Chain [1] start processing
21:18:25 - cmdstanpy - INFO - Chain [1] done processing


Model for PNC trained in 3.02 seconds
**************************************************
Training model for PRU


21:18:25 - cmdstanpy - INFO - Chain [1] start processing
21:18:28 - cmdstanpy - INFO - Chain [1] done processing


Model for PRU trained in 3.42 seconds
**************************************************
Training model for PYPL


21:18:28 - cmdstanpy - INFO - Chain [1] start processing
21:18:31 - cmdstanpy - INFO - Chain [1] done processing


Model for PYPL trained in 2.93 seconds
**************************************************
Training model for RF


21:18:31 - cmdstanpy - INFO - Chain [1] start processing
21:18:33 - cmdstanpy - INFO - Chain [1] done processing


Model for RF trained in 1.66 seconds
**************************************************
Training model for RJF


21:18:33 - cmdstanpy - INFO - Chain [1] start processing
21:18:35 - cmdstanpy - INFO - Chain [1] done processing


Model for RJF trained in 2.24 seconds
**************************************************
Training model for SCHW


21:18:35 - cmdstanpy - INFO - Chain [1] start processing
21:18:38 - cmdstanpy - INFO - Chain [1] done processing


Model for SCHW trained in 2.72 seconds
**************************************************
Training model for SPGI


21:18:38 - cmdstanpy - INFO - Chain [1] start processing
21:18:40 - cmdstanpy - INFO - Chain [1] done processing


Model for SPGI trained in 2.46 seconds
**************************************************
Training model for STT


21:18:41 - cmdstanpy - INFO - Chain [1] start processing
21:18:43 - cmdstanpy - INFO - Chain [1] done processing


Model for STT trained in 3.01 seconds
**************************************************
Training model for SYF


21:18:44 - cmdstanpy - INFO - Chain [1] start processing
21:18:46 - cmdstanpy - INFO - Chain [1] done processing


Model for SYF trained in 2.90 seconds
**************************************************
Training model for TFC


21:18:46 - cmdstanpy - INFO - Chain [1] start processing
21:18:49 - cmdstanpy - INFO - Chain [1] done processing


Model for TFC trained in 3.39 seconds
**************************************************
Training model for TROW


21:18:50 - cmdstanpy - INFO - Chain [1] start processing
21:18:52 - cmdstanpy - INFO - Chain [1] done processing


Model for TROW trained in 2.75 seconds
**************************************************
Training model for TRV


21:18:53 - cmdstanpy - INFO - Chain [1] start processing
21:18:55 - cmdstanpy - INFO - Chain [1] done processing


Model for TRV trained in 2.42 seconds
**************************************************
Training model for USB


21:18:55 - cmdstanpy - INFO - Chain [1] start processing
21:18:58 - cmdstanpy - INFO - Chain [1] done processing


Model for USB trained in 3.26 seconds
**************************************************
Training model for V


21:18:58 - cmdstanpy - INFO - Chain [1] start processing
21:19:01 - cmdstanpy - INFO - Chain [1] done processing


Model for V trained in 3.33 seconds
**************************************************
Training model for WFC


21:19:02 - cmdstanpy - INFO - Chain [1] start processing
21:19:04 - cmdstanpy - INFO - Chain [1] done processing


Model for WFC trained in 3.13 seconds
**************************************************
Training model for WRB


21:19:05 - cmdstanpy - INFO - Chain [1] start processing
21:19:06 - cmdstanpy - INFO - Chain [1] done processing


Model for WRB trained in 2.09 seconds
**************************************************
Training model for WTW


21:19:07 - cmdstanpy - INFO - Chain [1] start processing
21:19:10 - cmdstanpy - INFO - Chain [1] done processing


Model for WTW trained in 3.20 seconds
**************************************************


In [14]:
mapes_prophet = {}
maes_prophet = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_prophet[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_prophet[ticker] = mape_
    maes_prophet[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ACGL, MAPE: 15.04, MAE: 14.68
**************************************************
Ticker: AFL, MAPE: 11.96, MAE: 12.46
**************************************************
Ticker: AIG, MAPE: 5.99, MAE: 4.55
**************************************************
Ticker: AIZ, MAPE: 11.32, MAE: 23.10
**************************************************
Ticker: AJG, MAPE: 6.90, MAE: 21.35
**************************************************
Ticker: ALL, MAPE: 22.15, MAE: 41.01
**************************************************
Ticker: AMP, MAPE: 11.69, MAE: 58.34
**************************************************
Ticker: AON, MAPE: 26.41, MAE: 97.52
**************************************************
Ticker: APO, MAPE: 10.73, MAE: 15.62
**************************************************
Ticker: AXP, MAPE: 23.32, MAE: 63.99
**************************************************
Ticker: BAC, MAPE: 28.48, MAE: 11.90
**************************************************
Ticker: BEN, MAPE: 7.54, MAE: 1.51

In [15]:
np.mean(list(mapes_prophet.values())), np.mean(list(maes_prophet.values()))

(20.8301610519168, 33.46482449457745)

## Theta model

In [16]:
models_theta = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoTheta(
        season_length=30
    )

    model.fit(
        series=train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_theta[ticker] = model

Training model for ACGL
Model for ACGL trained in 1.23 seconds
**************************************************
Training model for AFL
Model for AFL trained in 0.98 seconds
**************************************************
Training model for AIG
Model for AIG trained in 0.61 seconds
**************************************************
Training model for AIZ
Model for AIZ trained in 1.18 seconds
**************************************************
Training model for AJG
Model for AJG trained in 2.15 seconds
**************************************************
Training model for ALL
Model for ALL trained in 1.06 seconds
**************************************************
Training model for AMP
Model for AMP trained in 1.34 seconds
**************************************************
Training model for AON
Model for AON trained in 0.70 seconds
**************************************************
Training model for APO
Model for APO trained in 1.19 seconds
*****************************************

In [17]:
mapes_theta = {}
maes_theta = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_theta[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_theta[ticker] = mape_
    maes_theta[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ACGL, MAPE: 5.13, MAE: 5.09
**************************************************
Ticker: AFL, MAPE: 15.98, MAE: 16.88
**************************************************
Ticker: AIG, MAPE: 4.16, MAE: 3.15
**************************************************
Ticker: AIZ, MAPE: 10.13, MAE: 20.60
**************************************************
Ticker: AJG, MAPE: 14.51, MAE: 43.73
**************************************************
Ticker: ALL, MAPE: 8.01, MAE: 15.23
**************************************************
Ticker: AMP, MAPE: 12.30, MAE: 63.63
**************************************************
Ticker: AON, MAPE: 16.13, MAE: 59.08
**************************************************
Ticker: APO, MAPE: 15.06, MAE: 23.07
**************************************************
Ticker: AXP, MAPE: 10.76, MAE: 30.83
**************************************************
Ticker: BAC, MAPE: 10.01, MAE: 4.32
**************************************************
Ticker: BEN, MAPE: 7.76, MAE: 1.51
**

In [18]:
np.mean(list(mapes_theta.values())), np.mean(list(maes_theta.values()))

(11.853871508914272, 23.816626048663643)

## Linear Regression

### Local model

In [19]:
models_lr = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = LinearRegressionModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_lr[ticker] = model

Training model for ACGL
Model for ACGL trained in 0.13 seconds
**************************************************
Training model for AFL
Model for AFL trained in 0.12 seconds
**************************************************
Training model for AIG
Model for AIG trained in 0.12 seconds
**************************************************
Training model for AIZ
Model for AIZ trained in 0.12 seconds
**************************************************
Training model for AJG
Model for AJG trained in 0.12 seconds
**************************************************
Training model for ALL
Model for ALL trained in 0.12 seconds
**************************************************
Training model for AMP
Model for AMP trained in 0.12 seconds
**************************************************
Training model for AON
Model for AON trained in 0.12 seconds
**************************************************
Training model for APO
Model for APO trained in 0.12 seconds
*****************************************

In [20]:
mapes_lr = {}
maes_lr = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_lr[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_lr[ticker] = mape_
    maes_lr[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: ACGL, MAPE: 90.99, MAE: 84.84
**************************************************
Ticker: AFL, MAPE: 2.49, MAE: 2.57
**************************************************
Ticker: AIG, MAPE: 3.45, MAE: 2.67
**************************************************
Ticker: AIZ, MAPE: 3.90, MAE: 7.78
**************************************************
Ticker: AJG, MAPE: 10.15, MAE: 30.52
**************************************************
Ticker: ALL, MAPE: 118.23, MAE: 229.78
**************************************************
Ticker: AMP, MAPE: 2.92, MAE: 14.20
**************************************************
Ticker: AON, MAPE: 1.33, MAE: 4.66
**************************************************
Ticker: APO, MAPE: 5247.90, MAE: 7108.36
**************************************************
Ticker: AXP, MAPE: 5.96, MAE: 16.57
**************************************************
Ticker: BAC, MAPE: 2.13, MAE: 0.86
**************************************************
Ticker: BEN, MAPE: 96.84, MAE: 18.51


In [21]:
np.mean(list(mapes_lr.values())), np.mean(list(maes_lr.values()))

(1.0704046091313797e+19, 3.974853039434333e+19)

### Global model

In [22]:
print(f"Training global model")
start = time.time()
model = LinearRegressionModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    }
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 8.88 seconds
**************************************************


In [23]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for ACGL, MAPE: 4.94, MAE: 4.70
**************************************************
Global model for AFL, MAPE: 4.06, MAE: 4.17
**************************************************
Global model for AIG, MAPE: 4.16, MAE: 3.11
**************************************************
Global model for AIZ, MAPE: 3.72, MAE: 7.27
**************************************************
Global model for AJG, MAPE: 4.02, MAE: 11.88
**************************************************
Global model for ALL, MAPE: 3.21, MAE: 5.99
**************************************************
Global model for AMP, MAPE: 4.92, MAE: 24.32
**************************************************
Global model for AON, MAPE: 4.50, MAE: 15.92
**************************************************
Global model for APO, MAPE: 7.76, MAE: 10.81
**************************************************
Global model for AXP, MAPE: 4.84, MAE: 13.14
**************************************************
Global model for BAC, MAPE: 4.03, MAE: 1.61


In [24]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(4.930623811030288, 8.707990625977335)