<a href="https://colab.research.google.com/github/zq1412/StreamlitLSTMStockPrediction/blob/main/ADBE_Hyperparameter_Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Optuna Hyperparameter Optimization
#pip install darts optuna yfinance

import numpy as np
import optuna
import torch
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import EarlyStopping
from sklearn.preprocessing import MaxAbsScaler

from darts import TimeSeries
from darts.metrics import smape, mape
from darts.models import RNNModel
from darts.utils.likelihood_models import GaussianLikelihood
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.models import forecasting
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller

from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.models import forecasting
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller


import yfinance as yf



## load data and preprocess

#Getting date from one year ago
one_year_ago = datetime.now() - relativedelta(years=1)
one_year_ago = one_year_ago.strftime("%Y-%m-%d")

#Getting date today
today = datetime.now()
today = today.strftime("%Y-%m-%d")

#Adding one day to the date one year ago
one_year_ago_plus_one = datetime.now() - relativedelta(years=1) + timedelta(days=1)
one_year_ago_plus_one = one_year_ago_plus_one.strftime("%Y-%m-%d")

#Accessing YFinance
ticker= yf.Ticker("ADBE")

#Getting pandas dataframe of stock data from one year ago
df = ticker.history(start=one_year_ago, end=today, interval="1d")
idx = pd.date_range(start=one_year_ago_plus_one, end=today)


#Making index into Datetime index
df.index = pd.DatetimeIndex(df.index)
df.index = df.index.tz_localize(None)

#Reindexing dataframe to fill in missing dates due to stock market closing on weekends
df = df.reindex(idx, method = 'pad')

#Put dataframe into a Darts "Timeseries" object so that data can be fed into a Darts forecasting model. For more information on Timeseries objects, please look here: https://unit8co.github.io/darts/generated_api/darts.timeseries.html
series = TimeSeries.from_dataframe(df)

#Drop all columns besides "Close" Column
adj_series = series.drop_columns(['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits'])

# split in train / validation (note: in practice we would also need a test set)
VAL_LEN = 36
TEST_LEN = 37
train, val, test = adj_series[0:292], adj_series[292:-TEST_LEN], adj_series[-TEST_LEN:]

# scale
scaler = Scaler(MaxAbsScaler())
train = scaler.fit_transform(train)
val = scaler.transform(val)

# define objective function
def objective(trial):
    # select input and output chunk lengths
    input_chunk_length = trial.suggest_int("input_chunk_length", 7, 28)


    # Other hyperparameters
    batch_size = trial.suggest_int("batch_size", 16, 64)
    n_rnn_layers = trial.suggest_int("n_rnn_layers", 2, 5)
    hidden_dim = trial.suggest_int("hidden_dim", 1, 10)
    dropout = trial.suggest_float("dropout", 0.0, 0.4)
    n_epochs = trial.suggest_float("n_epochs", 10, 100 )


    # throughout training we'll monitor the validation loss for both pruning and early stopping
    pruner = PyTorchLightningPruningCallback(trial, monitor="val_loss")
    early_stopper = EarlyStopping("val_loss", min_delta=0.001, patience=3, verbose=True)
    callbacks = [pruner, early_stopper]

    # detect if a GPU is available
    if torch.cuda.is_available():
        pl_trainer_kwargs = {
            "accelerator": "gpu",
            "gpus": -1,
            "auto_select_gpus": True,
            "callbacks": callbacks,
        }
        num_workers = 4
    else:
        pl_trainer_kwargs = {"callbacks": callbacks}
        num_workers = 0


    # reproducibility
    torch.manual_seed(42)

    # build the LSTM model, set log_tensorboard = False, random_state was = 42
    my_model = RNNModel(
    model="LSTM",
    n_rnn_layers = n_rnn_layers, # Number of LSTM layers
    hidden_dim=hidden_dim,
    dropout=dropout,
    batch_size=batch_size,
    n_epochs=50,
    optimizer_kwargs={"lr": 1e-3}, #learning rate
    model_name="Stock_Forecast",
    training_length=30,
    input_chunk_length=input_chunk_length,
    force_reset=True,
    save_checkpoints=True,
    )

    # train the model
    my_model.fit(
        series=train,
        val_series=val,
        num_loader_workers=num_workers,
    )

    # reload best model over course of training
    my_model = RNNModel.load_from_checkpoint("Stock_Forecast")

    # Evaluate how good it is on the validation set, using sMAPE
    preds = my_model.predict(series=train, n=len(test))
    smapes = smape(test, preds, n_jobs=-1, verbose=True)
    smape_val = np.mean(smapes)

    return smape_val if smape_val != np.nan else float("inf")


# for convenience, print some optimization trials information
def print_callback(study, trial):
    print(f"Current value: {trial.value}, Current params: {trial.params}")
    print(f"Best value: {study.best_value}, Best params: {study.best_trial.params}")


# optimize hyperparameters by minimizing the sMAPE on the validation set
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, callbacks=[print_callback])

Collecting darts
  Downloading darts-0.24.0-py3-none-any.whl (693 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/693.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.9/693.9 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optuna
  Downloading optuna-3.2.0-py3-none-any.whl (390 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/390.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
Collecting catboost>=1.0.6 (from darts)
  Downloading catboost-1.2-cp310-cp310-manylinux2014_x86_64.whl (98.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting nfoursid>=1.0.0 (from darts)
  Downloading nfoursid-1.0.1-py3-none-any.whl (16 kB)
Collecting pmdarima>=1.8.0 (from darts)
  Downloading

[I 2023-06-27 18:52:47,326] A new study created in memory with name: no-name-eac0a3d2-4953-4bad-b480-b8309790f576
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 1.2 K 
4 | V             | Linear           | 8     
---------------------------------------------------
1.2 K     Trainable params
0         Non-trainable params
1.2 K     Total params
0.005     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: 0it [00:00, ?it/s]


100%|██████████| 1/1 [00:00<00:00, 56.83it/s]
[I 2023-06-27 18:52:54,786] Trial 0 finished with value: 199.90654381835674 and parameters: {'input_chunk_length': 7, 'batch_size': 50, 'n_rnn_layers': 3, 'hidden_dim': 7, 'dropout': 0.35165689625341867, 'n_epochs': 83.64077337409636}. Best is trial 0 with value: 199.90654381835674.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 2.1 K 
4 | V             | 

Current value: 199.90654381835674, Current params: {'input_chunk_length': 7, 'batch_size': 50, 'n_rnn_layers': 3, 'hidden_dim': 7, 'dropout': 0.35165689625341867, 'n_epochs': 83.64077337409636}
Best value: 199.90654381835674, Best params: {'input_chunk_length': 7, 'batch_size': 50, 'n_rnn_layers': 3, 'hidden_dim': 7, 'dropout': 0.35165689625341867, 'n_epochs': 83.64077337409636}


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
ERROR:darts.models.forecasting.torch_forecasting_model:FileNotFoundError: There is no file matching prefix best-* in /content/darts_logs/Stock_Forecast/checkpoints
[W 2023-06-27 18:52:55,484] Trial 1 failed with parameters: {'input_chunk_length': 15, 'batch_size': 63, 'n_rnn_layers': 4, 'hidden_dim': 8, 'dropout': 0.2908805785210279, 'n_epochs': 19.661838521014047} because of the following error: FileNotFoundError('There is no file matching prefix best-* in /content/darts_logs/Stock_Forecast/checkpoints').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-2-17244945559b>", line 144, in objective
    my_model = RNNModel.load_from_checkpoint("Stock_Forecast")
  File "/usr/local/lib/python3.10/dist-packages/darts/models/forecasting/torch_forecasting_model.py", line 1679,

FileNotFoundError: ignored