## Model training and tuning for the information technology sector

In [1]:
import pandas as pd
import numpy as np

import time

from darts import TimeSeries, concatenate
from darts.utils.model_selection import train_test_split
from darts.models.forecasting.xgboost import XGBModel
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics.metrics import mape, mae
from darts.models.forecasting.prophet_model import Prophet
from darts.models.forecasting.sf_auto_theta import AutoTheta
from darts.models.forecasting.linear_regression_model import LinearRegressionModel
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.dataprocessing.transformers.boxcox import BoxCox

Importing plotly failed. Interactive plots will not work.
  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore


In [2]:
df = pd.read_csv('../../data/processed/sp500_all_companies_cleaned_with_sector.csv')
df['date'] = pd.to_datetime(df['date'])

df_tech = df[df['GICS Sector'] == 'Information Technology'].sort_values(by=['ticker', 'date']).reset_index(drop=True)

In [3]:
df_tech['ticker'].unique()

array(['AAPL', 'ACN', 'ADBE', 'ADI', 'ADSK', 'AKAM', 'AMAT', 'AMD',
       'ANET', 'ANSS', 'APH', 'AVGO', 'CDNS', 'CDW', 'CRM', 'CRWD',
       'CSCO', 'CTSH', 'DELL', 'ENPH', 'EPAM', 'FFIV', 'FICO', 'FSLR',
       'FTNT', 'GDDY', 'GEN', 'GLW', 'HPE', 'HPQ', 'IBM', 'INTC', 'INTU',
       'IT', 'JBL', 'JNPR', 'KEYS', 'KLAC', 'LRCX', 'MCHP', 'MPWR',
       'MSFT', 'MSI', 'MU', 'NOW', 'NTAP', 'NVDA', 'NXPI', 'ON', 'ORCL',
       'PANW', 'PLTR', 'PTC', 'QCOM', 'ROP', 'SMCI', 'SNPS', 'STX',
       'SWKS', 'TDY', 'TEL', 'TER', 'TRMB', 'TXN', 'TYL', 'VRSN', 'WDAY',
       'WDC', 'ZBRA'], dtype=object)

In [4]:
data = {}
static_covs = pd.DataFrame({'ticker': df_tech['ticker'].unique()})

for ticker, group in df_tech.groupby('ticker'):
    series = TimeSeries.from_dataframe(
        group,
        time_col='date',
        value_cols=['close', 'high', 'low', 'open', 'volume'],
        freq='B',
        static_covariates=pd.Series(ticker)
    )
    data[ticker] = series

In [5]:
# encode each ticker as a static covariate
transformer = StaticCovariatesTransformer()
ts = [d for d in data.values()]
transformer.fit(ts)

for ticker, series in data.items():
    data[ticker] = transformer.transform(series)

In [6]:
train = {}
test = {}

for ticker, series in data.items():
    train[ticker], test[ticker] = train_test_split(series, test_size=0.1)
    last_train = train[ticker][-1:]
    test_with_context = last_train.append(test[ticker]) # append last point of train to test as our forecast requires it
    test[ticker] = test_with_context

## Gradient Boosting

### Train a local model first

In [7]:
models = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = XGBModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models[ticker] = model

Training model for AAPL
Model for AAPL trained in 0.24 seconds
**************************************************
Training model for ACN
Model for ACN trained in 0.25 seconds
**************************************************
Training model for ADBE
Model for ADBE trained in 0.22 seconds
**************************************************
Training model for ADI
Model for ADI trained in 0.27 seconds
**************************************************
Training model for ADSK
Model for ADSK trained in 0.28 seconds
**************************************************
Training model for AKAM
Model for AKAM trained in 0.26 seconds
**************************************************
Training model for AMAT
Model for AMAT trained in 0.23 seconds
**************************************************
Training model for AMD
Model for AMD trained in 0.24 seconds
**************************************************
Training model for ANET
Model for ANET trained in 0.27 seconds
*******************************

In [8]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes[ticker] = mape_
    maes[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: AAPL, MAPE: 14.19, MAE: 32.52
**************************************************
Ticker: ACN, MAPE: 4.27, MAE: 14.40
**************************************************
Ticker: ADBE, MAPE: 4.36, MAE: 21.15
**************************************************
Ticker: ADI, MAPE: 12.29, MAE: 27.16
**************************************************
Ticker: ADSK, MAPE: 25.46, MAE: 70.52
**************************************************
Ticker: AKAM, MAPE: 4.21, MAE: 3.78
**************************************************
Ticker: AMAT, MAPE: 13.95, MAE: 24.75
**************************************************
Ticker: AMD, MAPE: 9.37, MAE: 12.33
**************************************************
Ticker: ANET, MAPE: 23.06, MAE: 22.78
**************************************************
Ticker: ANSS, MAPE: 3.41, MAE: 11.27
**************************************************
Ticker: APH, MAPE: 13.84, MAE: 9.53
**************************************************
Ticker: AVGO, MAPE: 24.72, MAE: 

In [9]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(12.958482607062324, 33.217655837182576)

### Global model

In [10]:
print(f"Training global model")
start = time.time()
model = XGBModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    },
    use_static_covariates=True,
    n_estimators=1000,
    eta=0.01,
    max_depth=8,
    subsample=0.8
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 20.61 seconds
**************************************************


In [11]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for AAPL, MAPE: 2.67, MAE: 5.80
**************************************************
Global model for ACN, MAPE: 2.05, MAE: 6.80
**************************************************
Global model for ADBE, MAPE: 2.68, MAE: 12.74
**************************************************
Global model for ADI, MAPE: 3.06, MAE: 6.52
**************************************************
Global model for ADSK, MAPE: 2.27, MAE: 5.96
**************************************************
Global model for AKAM, MAPE: 3.32, MAE: 2.93
**************************************************
Global model for AMAT, MAPE: 3.81, MAE: 7.06
**************************************************
Global model for AMD, MAPE: 4.72, MAE: 5.99
**************************************************
Global model for ANET, MAPE: 4.80, MAE: 4.30
**************************************************
Global model for ANSS, MAPE: 1.59, MAE: 5.14
**************************************************
Global model for APH, MAPE: 3.09, MAE: 2.0

In [12]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(4.379203092186948, 19.712977762464575)

## Prophet model

In [13]:
models_prophet = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = Prophet(
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year']  # automatically generate future covariates from time series
            }
        },
        seasonality_mode='multiplicative',
    )

    model.fit(
        train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_prophet[ticker] = model

Training model for AAPL


21:26:29 - cmdstanpy - INFO - Chain [1] start processing
21:26:32 - cmdstanpy - INFO - Chain [1] done processing


Model for AAPL trained in 3.11 seconds
**************************************************
Training model for ACN


21:26:32 - cmdstanpy - INFO - Chain [1] start processing
21:26:34 - cmdstanpy - INFO - Chain [1] done processing


Model for ACN trained in 2.48 seconds
**************************************************
Training model for ADBE


21:26:35 - cmdstanpy - INFO - Chain [1] start processing
21:26:36 - cmdstanpy - INFO - Chain [1] done processing


Model for ADBE trained in 1.76 seconds
**************************************************
Training model for ADI


21:26:36 - cmdstanpy - INFO - Chain [1] start processing
21:26:39 - cmdstanpy - INFO - Chain [1] done processing


Model for ADI trained in 2.67 seconds
**************************************************
Training model for ADSK


21:26:39 - cmdstanpy - INFO - Chain [1] start processing
21:26:42 - cmdstanpy - INFO - Chain [1] done processing


Model for ADSK trained in 2.74 seconds
**************************************************
Training model for AKAM


21:26:42 - cmdstanpy - INFO - Chain [1] start processing
21:26:45 - cmdstanpy - INFO - Chain [1] done processing


Model for AKAM trained in 3.20 seconds
**************************************************
Training model for AMAT


21:26:45 - cmdstanpy - INFO - Chain [1] start processing
21:26:47 - cmdstanpy - INFO - Chain [1] done processing


Model for AMAT trained in 2.25 seconds
**************************************************
Training model for AMD


21:26:47 - cmdstanpy - INFO - Chain [1] start processing
21:26:49 - cmdstanpy - INFO - Chain [1] done processing


Model for AMD trained in 1.83 seconds
**************************************************
Training model for ANET


21:26:49 - cmdstanpy - INFO - Chain [1] start processing
21:26:51 - cmdstanpy - INFO - Chain [1] done processing


Model for ANET trained in 2.60 seconds
**************************************************
Training model for ANSS


21:26:52 - cmdstanpy - INFO - Chain [1] start processing
21:26:54 - cmdstanpy - INFO - Chain [1] done processing


Model for ANSS trained in 3.12 seconds
**************************************************
Training model for APH


21:26:55 - cmdstanpy - INFO - Chain [1] start processing
21:26:57 - cmdstanpy - INFO - Chain [1] done processing


Model for APH trained in 2.66 seconds
**************************************************
Training model for AVGO


21:26:57 - cmdstanpy - INFO - Chain [1] start processing
21:26:59 - cmdstanpy - INFO - Chain [1] done processing


Model for AVGO trained in 2.03 seconds
**************************************************
Training model for CDNS


21:27:00 - cmdstanpy - INFO - Chain [1] start processing
21:27:01 - cmdstanpy - INFO - Chain [1] done processing


Model for CDNS trained in 2.07 seconds
**************************************************
Training model for CDW


21:27:02 - cmdstanpy - INFO - Chain [1] start processing
21:27:03 - cmdstanpy - INFO - Chain [1] done processing


Model for CDW trained in 1.91 seconds
**************************************************
Training model for CRM


21:27:03 - cmdstanpy - INFO - Chain [1] start processing
21:27:06 - cmdstanpy - INFO - Chain [1] done processing


Model for CRM trained in 2.90 seconds
**************************************************
Training model for CRWD


21:27:06 - cmdstanpy - INFO - Chain [1] start processing
21:27:08 - cmdstanpy - INFO - Chain [1] done processing


Model for CRWD trained in 1.96 seconds
**************************************************
Training model for CSCO


21:27:08 - cmdstanpy - INFO - Chain [1] start processing
21:27:11 - cmdstanpy - INFO - Chain [1] done processing


Model for CSCO trained in 3.10 seconds
**************************************************
Training model for CTSH


21:27:11 - cmdstanpy - INFO - Chain [1] start processing
21:27:14 - cmdstanpy - INFO - Chain [1] done processing


Model for CTSH trained in 3.11 seconds
**************************************************
Training model for DELL


21:27:15 - cmdstanpy - INFO - Chain [1] start processing
21:27:16 - cmdstanpy - INFO - Chain [1] done processing


Model for DELL trained in 2.11 seconds
**************************************************
Training model for ENPH


21:27:17 - cmdstanpy - INFO - Chain [1] start processing
21:27:19 - cmdstanpy - INFO - Chain [1] done processing


Model for ENPH trained in 2.33 seconds
**************************************************
Training model for EPAM


21:27:19 - cmdstanpy - INFO - Chain [1] start processing
21:27:22 - cmdstanpy - INFO - Chain [1] done processing


Model for EPAM trained in 2.89 seconds
**************************************************
Training model for FFIV


21:27:22 - cmdstanpy - INFO - Chain [1] start processing
21:27:25 - cmdstanpy - INFO - Chain [1] done processing


Model for FFIV trained in 3.16 seconds
**************************************************
Training model for FICO


21:27:25 - cmdstanpy - INFO - Chain [1] start processing
21:27:27 - cmdstanpy - INFO - Chain [1] done processing


Model for FICO trained in 2.67 seconds
**************************************************
Training model for FSLR


21:27:28 - cmdstanpy - INFO - Chain [1] start processing
21:27:30 - cmdstanpy - INFO - Chain [1] done processing


Model for FSLR trained in 3.05 seconds
**************************************************
Training model for FTNT


21:27:31 - cmdstanpy - INFO - Chain [1] start processing
21:27:33 - cmdstanpy - INFO - Chain [1] done processing


Model for FTNT trained in 2.72 seconds
**************************************************
Training model for GDDY


21:27:34 - cmdstanpy - INFO - Chain [1] start processing
21:27:36 - cmdstanpy - INFO - Chain [1] done processing


Model for GDDY trained in 2.98 seconds
**************************************************
Training model for GEN


21:27:36 - cmdstanpy - INFO - Chain [1] start processing
21:27:39 - cmdstanpy - INFO - Chain [1] done processing


Model for GEN trained in 3.17 seconds
**************************************************
Training model for GLW


21:27:40 - cmdstanpy - INFO - Chain [1] start processing
21:27:42 - cmdstanpy - INFO - Chain [1] done processing


Model for GLW trained in 3.07 seconds
**************************************************
Training model for HPE


21:27:43 - cmdstanpy - INFO - Chain [1] start processing
21:27:45 - cmdstanpy - INFO - Chain [1] done processing


Model for HPE trained in 3.02 seconds
**************************************************
Training model for HPQ


21:27:46 - cmdstanpy - INFO - Chain [1] start processing
21:27:49 - cmdstanpy - INFO - Chain [1] done processing


Model for HPQ trained in 3.08 seconds
**************************************************
Training model for IBM


21:27:49 - cmdstanpy - INFO - Chain [1] start processing
21:27:51 - cmdstanpy - INFO - Chain [1] done processing


Model for IBM trained in 2.23 seconds
**************************************************
Training model for INTC


21:27:51 - cmdstanpy - INFO - Chain [1] start processing
21:27:53 - cmdstanpy - INFO - Chain [1] done processing


Model for INTC trained in 2.23 seconds
**************************************************
Training model for INTU


21:27:53 - cmdstanpy - INFO - Chain [1] start processing
21:27:55 - cmdstanpy - INFO - Chain [1] done processing


Model for INTU trained in 2.51 seconds
**************************************************
Training model for IT


21:27:56 - cmdstanpy - INFO - Chain [1] start processing
21:27:59 - cmdstanpy - INFO - Chain [1] done processing


Model for IT trained in 3.12 seconds
**************************************************
Training model for JBL


21:27:59 - cmdstanpy - INFO - Chain [1] start processing
21:28:01 - cmdstanpy - INFO - Chain [1] done processing


Model for JBL trained in 2.85 seconds
**************************************************
Training model for JNPR


21:28:02 - cmdstanpy - INFO - Chain [1] start processing
21:28:05 - cmdstanpy - INFO - Chain [1] done processing


Model for JNPR trained in 3.16 seconds
**************************************************
Training model for KEYS


21:28:05 - cmdstanpy - INFO - Chain [1] start processing
21:28:08 - cmdstanpy - INFO - Chain [1] done processing


Model for KEYS trained in 3.24 seconds
**************************************************
Training model for KLAC


21:28:08 - cmdstanpy - INFO - Chain [1] start processing
21:28:10 - cmdstanpy - INFO - Chain [1] done processing


Model for KLAC trained in 2.56 seconds
**************************************************
Training model for LRCX


21:28:11 - cmdstanpy - INFO - Chain [1] start processing
21:28:13 - cmdstanpy - INFO - Chain [1] done processing


Model for LRCX trained in 2.11 seconds
**************************************************
Training model for MCHP


21:28:13 - cmdstanpy - INFO - Chain [1] start processing
21:28:15 - cmdstanpy - INFO - Chain [1] done processing


Model for MCHP trained in 2.58 seconds
**************************************************
Training model for MPWR


21:28:15 - cmdstanpy - INFO - Chain [1] start processing
21:28:18 - cmdstanpy - INFO - Chain [1] done processing


Model for MPWR trained in 2.42 seconds
**************************************************
Training model for MSFT


21:28:18 - cmdstanpy - INFO - Chain [1] start processing
21:28:20 - cmdstanpy - INFO - Chain [1] done processing


Model for MSFT trained in 2.71 seconds
**************************************************
Training model for MSI


21:28:21 - cmdstanpy - INFO - Chain [1] start processing
21:28:23 - cmdstanpy - INFO - Chain [1] done processing


Model for MSI trained in 2.97 seconds
**************************************************
Training model for MU


21:28:23 - cmdstanpy - INFO - Chain [1] start processing
21:28:26 - cmdstanpy - INFO - Chain [1] done processing


Model for MU trained in 3.06 seconds
**************************************************
Training model for NOW


21:28:27 - cmdstanpy - INFO - Chain [1] start processing
21:28:28 - cmdstanpy - INFO - Chain [1] done processing


Model for NOW trained in 1.97 seconds
**************************************************
Training model for NTAP


21:28:29 - cmdstanpy - INFO - Chain [1] start processing
21:28:31 - cmdstanpy - INFO - Chain [1] done processing


Model for NTAP trained in 3.25 seconds
**************************************************
Training model for NVDA


21:28:32 - cmdstanpy - INFO - Chain [1] start processing
21:28:33 - cmdstanpy - INFO - Chain [1] done processing


Model for NVDA trained in 1.94 seconds
**************************************************
Training model for NXPI


21:28:34 - cmdstanpy - INFO - Chain [1] start processing
21:28:36 - cmdstanpy - INFO - Chain [1] done processing


Model for NXPI trained in 2.26 seconds
**************************************************
Training model for ON


21:28:36 - cmdstanpy - INFO - Chain [1] start processing
21:28:39 - cmdstanpy - INFO - Chain [1] done processing


Model for ON trained in 3.13 seconds
**************************************************
Training model for ORCL


21:28:39 - cmdstanpy - INFO - Chain [1] start processing
21:28:41 - cmdstanpy - INFO - Chain [1] done processing


Model for ORCL trained in 2.41 seconds
**************************************************
Training model for PANW


21:28:42 - cmdstanpy - INFO - Chain [1] start processing
21:28:44 - cmdstanpy - INFO - Chain [1] done processing


Model for PANW trained in 2.43 seconds
**************************************************
Training model for PLTR


21:28:44 - cmdstanpy - INFO - Chain [1] start processing
21:28:45 - cmdstanpy - INFO - Chain [1] done processing


Model for PLTR trained in 1.17 seconds
**************************************************
Training model for PTC


21:28:45 - cmdstanpy - INFO - Chain [1] start processing
21:28:48 - cmdstanpy - INFO - Chain [1] done processing


Model for PTC trained in 3.00 seconds
**************************************************
Training model for QCOM


21:28:48 - cmdstanpy - INFO - Chain [1] start processing
21:28:51 - cmdstanpy - INFO - Chain [1] done processing


Model for QCOM trained in 2.72 seconds
**************************************************
Training model for ROP


21:28:51 - cmdstanpy - INFO - Chain [1] start processing
21:28:53 - cmdstanpy - INFO - Chain [1] done processing


Model for ROP trained in 2.58 seconds
**************************************************
Training model for SMCI


21:28:53 - cmdstanpy - INFO - Chain [1] start processing
21:28:56 - cmdstanpy - INFO - Chain [1] done processing


Model for SMCI trained in 3.28 seconds
**************************************************
Training model for SNPS


21:28:57 - cmdstanpy - INFO - Chain [1] start processing
21:28:58 - cmdstanpy - INFO - Chain [1] done processing


Model for SNPS trained in 1.72 seconds
**************************************************
Training model for STX


21:28:58 - cmdstanpy - INFO - Chain [1] start processing
21:29:00 - cmdstanpy - INFO - Chain [1] done processing


Model for STX trained in 2.12 seconds
**************************************************
Training model for SWKS


21:29:01 - cmdstanpy - INFO - Chain [1] start processing
21:29:03 - cmdstanpy - INFO - Chain [1] done processing


Model for SWKS trained in 3.28 seconds
**************************************************
Training model for TDY


21:29:04 - cmdstanpy - INFO - Chain [1] start processing
21:29:06 - cmdstanpy - INFO - Chain [1] done processing


Model for TDY trained in 2.64 seconds
**************************************************
Training model for TEL


21:29:06 - cmdstanpy - INFO - Chain [1] start processing
21:29:09 - cmdstanpy - INFO - Chain [1] done processing


Model for TEL trained in 3.17 seconds
**************************************************
Training model for TER


21:29:10 - cmdstanpy - INFO - Chain [1] start processing
21:29:12 - cmdstanpy - INFO - Chain [1] done processing


Model for TER trained in 3.15 seconds
**************************************************
Training model for TRMB


21:29:13 - cmdstanpy - INFO - Chain [1] start processing
21:29:15 - cmdstanpy - INFO - Chain [1] done processing


Model for TRMB trained in 2.91 seconds
**************************************************
Training model for TXN


21:29:16 - cmdstanpy - INFO - Chain [1] start processing
21:29:18 - cmdstanpy - INFO - Chain [1] done processing


Model for TXN trained in 2.52 seconds
**************************************************
Training model for TYL


21:29:18 - cmdstanpy - INFO - Chain [1] start processing
21:29:21 - cmdstanpy - INFO - Chain [1] done processing


Model for TYL trained in 2.96 seconds
**************************************************
Training model for VRSN


21:29:21 - cmdstanpy - INFO - Chain [1] start processing
21:29:24 - cmdstanpy - INFO - Chain [1] done processing


Model for VRSN trained in 2.71 seconds
**************************************************
Training model for WDAY


21:29:24 - cmdstanpy - INFO - Chain [1] start processing
21:29:26 - cmdstanpy - INFO - Chain [1] done processing


Model for WDAY trained in 2.82 seconds
**************************************************
Training model for WDC


21:29:27 - cmdstanpy - INFO - Chain [1] start processing
21:29:29 - cmdstanpy - INFO - Chain [1] done processing


Model for WDC trained in 2.90 seconds
**************************************************
Training model for ZBRA


21:29:30 - cmdstanpy - INFO - Chain [1] start processing
21:29:32 - cmdstanpy - INFO - Chain [1] done processing


Model for ZBRA trained in 2.84 seconds
**************************************************


In [14]:
mapes_prophet = {}
maes_prophet = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_prophet[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_prophet[ticker] = mape_
    maes_prophet[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: AAPL, MAPE: 10.18, MAE: 23.60
**************************************************
Ticker: ACN, MAPE: 12.60, MAE: 39.40
**************************************************
Ticker: ADBE, MAPE: 47.09, MAE: 215.19
**************************************************
Ticker: ADI, MAPE: 17.42, MAE: 38.45
**************************************************
Ticker: ADSK, MAPE: 8.58, MAE: 23.96
**************************************************
Ticker: AKAM, MAPE: 34.12, MAE: 30.81
**************************************************
Ticker: AMAT, MAPE: 27.43, MAE: 45.54
**************************************************
Ticker: AMD, MAPE: 53.57, MAE: 60.85
**************************************************
Ticker: ANET, MAPE: 19.79, MAE: 18.46
**************************************************
Ticker: ANSS, MAPE: 8.56, MAE: 27.81
**************************************************
Ticker: APH, MAPE: 16.02, MAE: 10.90
**************************************************
Ticker: AVGO, MAPE: 13.82,

In [15]:
np.mean(list(mapes_prophet.values())), np.mean(list(maes_prophet.values()))

(29.977753789030036, 53.273638107463555)

## Theta model

In [16]:
models_theta = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = AutoTheta(
        season_length=30
    )

    model.fit(
        series=train[ticker]['close']
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_theta[ticker] = model

Training model for AAPL
Model for AAPL trained in 0.88 seconds
**************************************************
Training model for ACN
Model for ACN trained in 0.76 seconds
**************************************************
Training model for ADBE
Model for ADBE trained in 1.96 seconds
**************************************************
Training model for ADI
Model for ADI trained in 1.36 seconds
**************************************************
Training model for ADSK
Model for ADSK trained in 0.79 seconds
**************************************************
Training model for AKAM
Model for AKAM trained in 0.69 seconds
**************************************************
Training model for AMAT
Model for AMAT trained in 1.36 seconds
**************************************************
Training model for AMD
Model for AMD trained in 1.14 seconds
**************************************************
Training model for ANET
Model for ANET trained in 1.22 seconds
*******************************

In [17]:
mapes_theta = {}
maes_theta = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_theta[ticker].predict(
        n=len(y_test),
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_theta[ticker] = mape_
    maes_theta[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: AAPL, MAPE: 18.69, MAE: 42.56
**************************************************
Ticker: ACN, MAPE: 9.16, MAE: 31.58
**************************************************
Ticker: ADBE, MAPE: 13.48, MAE: 59.17
**************************************************
Ticker: ADI, MAPE: 9.92, MAE: 21.79
**************************************************
Ticker: ADSK, MAPE: 17.34, MAE: 48.64
**************************************************
Ticker: AKAM, MAPE: 10.35, MAE: 8.73
**************************************************
Ticker: AMAT, MAPE: 15.95, MAE: 27.52
**************************************************
Ticker: AMD, MAPE: 23.58, MAE: 26.88
**************************************************
Ticker: ANET, MAPE: 25.10, MAE: 24.67
**************************************************
Ticker: ANSS, MAPE: 3.64, MAE: 11.77
**************************************************
Ticker: APH, MAPE: 8.16, MAE: 5.67
**************************************************
Ticker: AVGO, MAPE: 26.93, MAE:

In [18]:
np.mean(list(mapes_theta.values())), np.mean(list(maes_theta.values()))

(17.499806532961372, 39.930300327744135)

## Linear Regression

### Local model

In [19]:
models_lr = {}

for ticker in data.keys():
    print(f"Training model for {ticker}")
    start = time.time()
    model = LinearRegressionModel(
        lags=15,
        lags_past_covariates=1,
        lags_future_covariates=[0],
        output_chunk_length=1,
        random_state=42,
        add_encoders={
            'datetime_attribute': {
                'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
            }
        }
    )

    model.fit(
        train[ticker]['close'],
        past_covariates=train[ticker][['high', 'low', 'volume', 'open']],
    )

    end = time.time()
    print(f"Model for {ticker} trained in {end - start:.2f} seconds")
    print("*" * 50)

    models_lr[ticker] = model

Training model for AAPL
Model for AAPL trained in 0.14 seconds
**************************************************
Training model for ACN
Model for ACN trained in 0.13 seconds
**************************************************
Training model for ADBE
Model for ADBE trained in 0.13 seconds
**************************************************
Training model for ADI
Model for ADI trained in 0.13 seconds
**************************************************
Training model for ADSK
Model for ADSK trained in 0.13 seconds
**************************************************
Training model for AKAM
Model for AKAM trained in 0.13 seconds
**************************************************
Training model for AMAT
Model for AMAT trained in 0.13 seconds
**************************************************
Training model for AMD
Model for AMD trained in 0.13 seconds
**************************************************
Training model for ANET
Model for ANET trained in 0.13 seconds
*******************************

In [20]:
mapes_lr = {}
maes_lr = {}

for ticker in data.keys():
    y_test = test[ticker]['close']
    y_pred = models_lr[ticker].predict(
        n=len(y_test),
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)
    mapes_lr[ticker] = mape_
    maes_lr[ticker] = mae_
    print(f"Ticker: {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Ticker: AAPL, MAPE: 2.16, MAE: 4.71
**************************************************
Ticker: ACN, MAPE: 58.85, MAE: 182.03
**************************************************
Ticker: ADBE, MAPE: 2.62, MAE: 12.27
**************************************************
Ticker: ADI, MAPE: 2.53, MAE: 5.40
**************************************************
Ticker: ADSK, MAPE: 4.97, MAE: 13.30
**************************************************
Ticker: AKAM, MAPE: 2.82, MAE: 2.49
**************************************************
Ticker: AMAT, MAPE: 20.11, MAE: 33.90
**************************************************
Ticker: AMD, MAPE: 28468403915945.19, MAE: 26823372586897.35
**************************************************
Ticker: ANET, MAPE: 14.40, MAE: 13.18
**************************************************
Ticker: ANSS, MAPE: 1.71, MAE: 5.53
**************************************************
Ticker: APH, MAPE: 2.86, MAE: 1.92
**************************************************
Ticker: AVGO

In [21]:
np.mean(list(mapes_lr.values())), np.mean(list(maes_lr.values()))

(412718251845.9619, 388892665265.8363)

### Global model

In [22]:
print(f"Training global model")
start = time.time()
model = LinearRegressionModel(
    lags=15,
    lags_past_covariates=1,
    lags_future_covariates=[0],
    output_chunk_length=1,
    random_state=42,
    add_encoders={
        'datetime_attribute': {
            'future': ['month', 'day', 'dayofweek', 'dayofyear', 'quarter', 'year'] # automatically generate future covariates from time series
        }
    }
)

model.fit(
    [ts['close'] for ts in train.values()],
    past_covariates=[ts[['high', 'low', 'volume', 'open']] for ts in train.values()],
)

end = time.time()
print(f"Global model trained in {end - start:.2f} seconds")
print("*" * 50)

Training global model
Global model trained in 8.43 seconds
**************************************************


In [23]:
mapes = {}
maes = {}

for ticker in data.keys():
    y_pred = model.predict(
        n=len(test[ticker]['close']),
        series=train[ticker]['close'],
        past_covariates=test[ticker][['high', 'low', 'volume', 'open']],
        show_warnings=False
    )
    y_test = test[ticker]['close']
    mape_ = mape(y_test, y_pred)
    mae_ = mae(y_test, y_pred)

    mapes[ticker] = mape_
    maes[ticker] = mae_

    print(f"Global model for {ticker}, MAPE: {mape_:.2f}, MAE: {mae_:.2f}")
    print("*" * 50)

Global model for AAPL, MAPE: 3.28, MAE: 7.06
**************************************************
Global model for ACN, MAPE: 3.28, MAE: 10.64
**************************************************
Global model for ADBE, MAPE: 4.59, MAE: 21.16
**************************************************
Global model for ADI, MAPE: 3.47, MAE: 7.27
**************************************************
Global model for ADSK, MAPE: 3.09, MAE: 8.15
**************************************************
Global model for AKAM, MAPE: 4.08, MAE: 3.57
**************************************************
Global model for AMAT, MAPE: 5.15, MAE: 9.40
**************************************************
Global model for AMD, MAPE: 5.92, MAE: 7.70
**************************************************
Global model for ANET, MAPE: 6.00, MAE: 5.30
**************************************************
Global model for ANSS, MAPE: 2.12, MAE: 6.82
**************************************************
Global model for APH, MAPE: 3.28, MAE: 2.

In [24]:
np.mean(list(mapes.values())), np.mean(list(maes.values()))

(4.587321981762914, 9.835426148162545)