# Introduction
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. Here we tuned our hyperparamters to maximize the Sharpe Ratio 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl import config
from finrl import config_tickers
# from optuna.integration import PyTorchLightningPruningCallback

# Changed finrl_meta to meta
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
# from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.agents.rllib.dreamer import DRLAgent as DRLAgent
from finrl.meta.data_processor import DataProcessor
import joblib
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint

# Not needed for local run
# import sys
# sys.path.append("../FinRL-Library")

import itertools

  from .autonotebook import tqdm as notebook_tqdm
2024-11-29 15:13:53,940	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-11-29 15:13:54,311	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
  gym.logger.warn(
  gym.logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


In [3]:
import torch
if torch.cuda.is_available():
    print("GPU is available!")
    device = torch.device("cuda")  # Set the device to GPU
elif torch.backends.mps.is_available():
    print("MPS is available!")
    device = torch.device("mps")
else:
    print("GPU is not available. Using CPU.")
    device = torch.device("cpu")  # Set the device to CPU

MPS is available!


In [4]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)  # datasets
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)  # trained_models
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)  # tensorboard_log
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)  # results

## Collecting data and preprocessing

In [5]:
#Custom ticker list dataframe download
# ticker_list = config_tickers.DOW_30_TICKER
# df = YahooDownloader(start_date = '2009-01-01',
#                      end_date = '2021-10-01',
#                      ticker_list = ticker_list).fetch_data()

df_list = []
TRAIN_START_DATE = '2009-01-01'
TEST_END_DATE = '2021-10-01'
# TEST_END_DATE = '2010-10-01'
for ticker in config_tickers.DOW_30_TICKER:
    # Fetch data for each ticker
    portfolio_raw_df = YahooDownloader(start_date=TRAIN_START_DATE,
                                       end_date=TEST_END_DATE,
                                       ticker_list=[ticker]).fetch_data()
    # Append the fetched DataFrame to the list
    df_list.append(portfolio_raw_df)

# Concatenate all DataFrames row-wise
df = pd.concat(df_list, axis=0, ignore_index=True)

[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)





Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)





Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (640, 8)





In [6]:
print(df.head())

         date       open       high        low      close    volume  tic  day
0  2009-01-02  15.014070  19.330000  19.520000  18.570000  10955700  AXP    4
1  2009-01-05  15.495641  19.950001  20.240000  19.200001  16019200  AXP    0
2  2009-01-06  16.365566  21.070000  21.379999  20.299999  13820200  AXP    1
3  2009-01-07  15.676160  20.010000  20.719999  20.530001  15699900  AXP    2
4  2009-01-08  15.699673  20.040001  20.170000  19.799999  12255100  AXP    3


In [7]:
#You can add technical indicators and turbulence factor to dataframe
#Just set the use_technical_indicator=True, use_vix=True and use_turbulence=True
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

[*********************100%***********************]  1 of 1 completed

Successfully added technical indicators
Shape of DataFrame:  (3208, 8)





Successfully added vix
Successfully added turbulence index


In [8]:
print(processed.head().to_markdown())

|    | date       |    open |     high |      low |    close |    volume | tic   |   day |   macd |   boll_ub |   boll_lb |   rsi_30 |   cci_30 |   dx_30 |   close_30_sma |   close_60_sma |   vix |   turbulence |
|---:|:-----------|--------:|---------:|---------:|---------:|----------:|:------|------:|-------:|----------:|----------:|---------:|---------:|--------:|---------------:|---------------:|------:|-------------:|
|  0 | 2009-01-02 |  2.734  |  3.24107 |  3.25143 |  3.06714 | 746015200 | AAPL  |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |     100 |        3.06714 |        3.06714 | 39.58 |            0 |
|  1 | 2009-01-02 | 41.4844 | 58.99    | 59.08    | 58.59    |   6547900 | AMGN  |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |     100 |       58.59    |       58.59    | 39.58 |            0 |
|  2 | 2009-01-02 | 15.0141 | 19.33    | 19.52    | 18.57    |  10955700 | AXP   |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |   

In [9]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True).head(5)

processed_full.to_csv('processed_full.csv')

In [10]:
train = data_split(processed_full, '2009-01-01','2020-07-01')
trade = data_split(processed_full, '2020-05-01','2021-10-01')
print(len(train))
print(len(trade))

83897
10353


In [11]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [12]:
#Defining the environment kwargs
num_stock_shares = [0] * stock_dimension
# From: https://github.com/AI4Finance-Foundation/FinRL/issues/540\
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,  # added argument
    "buy_cost_pct": buy_cost_list,  # changed to list
    "sell_cost_pct": sell_cost_list,  # changed to list
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.INDICATORS, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
#Instantiate the training gym compatible environment
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [13]:
from ray.tune.registry import register_env
from dataclasses import dataclass

def env_creator(env_config):
    return StockTradingEnv(df = env_config["df"], **env_config["env_kwargs"])

register_env("finrl_stock_trading", env_creator)

In [None]:
#Instantiate the training environment
# Also instantiate our training gent
# print(type(e_train_gym))
agent = DRLAgent(env="finrl_stock_trading", env_config={"df": train, "env_kwargs": env_kwargs})

2024-11-29 15:16:47,393	INFO worker.py:1810 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
[36m(pid=45194)[0m   gym.logger.warn(
[36m(pid=45194)[0m   gym.logger.warn(
[36m(pid=45194)[0m   logger.warn(
[36m(pid=45194)[0m   logger.warn(f"{pre} is not within the observation space.")
2024-11-29 15:16:54,375	ERROR actor_manager.py:804 -- Ray error (The actor died because of an error raised in its creation task, [36mray::DreamerV3EnvRunner.__init__()[39m (pid=45194, ip=127.0.0.1, actor_id=8db112d93f706e185f249c3f01000000, repr=<ray.rllib.algorithms.dreamerv3.utils.env_runner.DreamerV3EnvRunner object at 0x395579900>)
  File "/Users/maxwill/miniconda3/envs/finrl/lib/python3.10/site-packages/ray/rllib/algorithms/dreamerv3/utils/env_runner.py", line 199, in __init__
    self.module = self.multi_rl_module_spec.build()[DEFAULT_MODULE_ID]
  File "/Users/maxwill/miniconda3/envs/finrl/lib/python3.10/site-packages/ray/rllib/core/rl_module/multi_rl_

IndexError: list index out of range

[33m(raylet)[0m [2024-11-29 15:17:17,596 E 45182 87701746] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2024-11-29_15-16-46_324959_37300 is over 95% full, available space: 5.7914 GB; capacity: 460.432 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-11-29 15:17:27,689 E 45182 87701746] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2024-11-29_15-16-46_324959_37300 is over 95% full, available space: 5.79105 GB; capacity: 460.432 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-11-29 15:17:37,780 E 45182 87701746] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2024-11-29_15-16-46_324959_37300 is over 95% full, available space: 5.79089 GB; capacity: 460.432 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2024-11-29 15:17:47,872 E 45182 87701746] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2024-11-29_15-16-46_324959_37300 is over 95% full, available space: 5.79028 GB; ca

In [None]:
#Instantiate the trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

## Tuning hyperparameters using Optuna
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space. 

In [None]:
def sample_ddpg_params(trial:optuna.Trial):
  # Size of the replay buffer
  buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  
  return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

In [None]:
def sample_a2c_params(trial:optuna.Trial):
    # learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)  # fix deprecation
    n_steps = trial.suggest_categorical(
        "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    )
    # ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
    ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)  # fix deprecation
    # vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
    return {
        "learning_rate": learning_rate,
        "n_steps": n_steps,
        "ent_coef": ent_coef
    }


In [None]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
    df['daily_return'] = df['account_value'].pct_change(1)
    if df['daily_return'].std() != 0:
        sharpe = (252 ** 0.5) * df['daily_return'].mean()/ \
            df['daily_return'].std()
        return sharpe
    else:
        return 0

## Callbacks
1. The callback will terminate if the improvement margin is below certain point
2. It will terminate after certain number of trial_number are reached, not before that
3. It will hold its patience to reach the threshold

In [None]:
class LoggingCallback:
    def __init__(self, threshold: int, trial_number: int, patience: int):
        """
        threshold:int tolerance for increase in sharpe ratio
        trial_number: int Prune after minimum number of trials
        patience: int patience for the threshold
        """
        self.threshold = threshold
        self.trial_number = trial_number
        self.patience = patience
        self.cb_list = []  # Trials list for which threshold is reached

    def __call__(self, study: optuna.study, frozen_trial: optuna.Trial):
        # Setting the best value in the current trial
        study.set_user_attr("previous_best_value", study.best_value)

        # Checking if the minimum number of trials have pass
        if frozen_trial.number > self.trial_number:
            previous_best_value = study.user_attrs.get("previous_best_value", None)
            # Checking if the previous and current objective values have the same sign
            if previous_best_value * study.best_value >= 0:
                # Checking for the threshold condition
                if abs(previous_best_value - study.best_value) < self.threshold:
                    self.cb_list.append(frozen_trial.number)
                    # If threshold is achieved for the patience amount of time
                    if len(self.cb_list) > self.patience:
                        print("The study stops now...")
                        print(
                            "With number",
                            frozen_trial.number,
                            "and value ",
                            frozen_trial.value,
                        )
                        print(
                            "The previous and current best values are {} and {} respectively".format(
                                previous_best_value, study.best_value
                            )
                        )
                        study.stop()

A2C with optuna

In [None]:
from IPython.display import clear_output
import sys   

os.makedirs("A2C_optuna_models",exist_ok=True)

def objective(trial: optuna.Trial):  # Optuna objective
    hyperparameters = sample_a2c_params(trial)
    model_a2c = agent.get_model("a2c", model_kwargs=hyperparameters)  # TODO: Could try out self-implemented A2C
    trained_model = agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=10000)  # train stablebaselines3's A2C
    trained_model.save('A2C_optuna_models/a2c_{}.pth'.format(trial.number))  # save model with trial number as ID
    # clear_output(wait=True)  # This will keep only the last trial tested
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_model,
        environment=e_trade_gym
    )
    sharpe = calculate_sharpe(df_account_value)

    return sharpe

In [None]:
study = joblib.load("final_a2c_study__.pkl")

In [None]:
# Create a study object and specify the direction as 'maximize'
# As you want to maximize sharpe
# Pruner stops not promising iterations
# Use a pruner, else you will get error related to divergence of model
# You can also use Multivariate samplere
# sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)

sampler = optuna.samplers.TPESampler(seed=1234)
study = optuna.create_study(
    study_name='a2c_study',
    direction='maximize',
    sampler=sampler,
    pruner=optuna.pruners.HyperbandPruner(),
)
logging_callback = LoggingCallback(threshold=1e-5, trial_number=5, patience=30)
study.optimize(
    objective,
    n_trials=30,
    catch=(ValueError,),
    callbacks=[logging_callback],
)

[I 2024-11-28 18:53:28,813] A new study created in memory with name: a2c_study


{'learning_rate': 9.069790423538583e-05, 'n_steps': 1024, 'ent_coef': 3.1968399196034624e-06}
Using cpu device


[W 2024-11-28 18:53:53,323] Trial 0 failed with parameters: {'learning_rate': 9.069790423538583e-05, 'n_steps': 1024, 'ent_coef': 3.1968399196034624e-06} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/maxwill/miniconda3/envs/finrl/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/j6/50x0w2917vq9tgkn3rsv11jr0000gn/T/ipykernel_41491/1794347763.py", line 9, in objective
    trained_model = agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=10000)  # train stablebaselines3's A2C
  File "/Users/maxwill/Tech/FinRL-test/finrl/agents/stablebaselines3/models.py", line 117, in train_model
    model = model.learn(
  File "/Users/maxwill/miniconda3/envs/finrl/lib/python3.10/site-packages/stable_baselines3/a2c/a2c.py", line 201, in learn
    return super().learn(
  File "/Users/maxwill/miniconda3/envs/finrl/lib/python3.10/site-packages/stab

KeyboardInterrupt: 

In [None]:
joblib.dump(study, "final_a2c_study__.pkl")

['final_a2c_study__.pkl']

DDPG with optuna

In [None]:
from IPython.display import clear_output
import sys   

os.makedirs("DDPG_optuna_models",exist_ok=True)

def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ddpg_params(trial)
  model_ddpg = agent.get_model("ddpg",model_kwargs = hyperparameters )
  #You can increase it for better comparison
  trained_ddpg = agent.train_model(model=model_ddpg,
                                  tb_log_name="ddpg" ,
                             total_timesteps=50000)
  trained_ddpg.save('models/ddpg_{}.pth'.format(trial.number))
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)

  return sharpe

#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(study_name="ddpg_study",direction='maximize',
                            sampler = sampler, pruner=optuna.pruners.HyperbandPruner())

logging_callback = LoggingCallback(threshold=1e-5,patience=30,trial_number=5)
#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=30,catch=(ValueError,),callbacks=[logging_callback])

In [None]:
joblib.dump(study, "final_ddpg_study__.pkl")

['final_ddpg_study__.pkl']

In [None]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.A2C_PARAMS)

Hyperparameters after tuning {'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}
Hyperparameters before tuning {'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}


In [None]:
study.best_trial

FrozenTrial(number=16, state=TrialState.COMPLETE, values=[1.7893212360303676], datetime_start=datetime.datetime(2024, 11, 6, 14, 32, 12, 340339), datetime_complete=datetime.datetime(2024, 11, 6, 14, 33, 38, 233786), params={'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=1.0, log=True, low=1e-05, step=None), 'n_steps': CategoricalDistribution(choices=(8, 16, 32, 64, 128, 256, 512, 1024, 2048)), 'ent_coef': FloatDistribution(high=0.1, log=True, low=1e-08, step=None)}, trial_id=16, value=None)

In [None]:
# from stable_baselines3 import DDPG
# tuned_model_ddpg = DDPG.load('models/ddpg_{}.pth'.format(study.best_trial.number),env=env_train)

from stable_baselines3 import A2C
tuned_model_a2c = A2C.load('A2C_optuna_models/a2c_{}.pth'.format(study.best_trial.number), env=env_train)

In [None]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_a2c, 
    environment = e_trade_gym)

hit end!


In [None]:
print(df_account_value_tuned.head())
print(df_actions_tuned.head())

         date  account_value
0  2020-05-01   1.000000e+06
1  2020-05-04   9.978485e+05
2  2020-05-05   1.001751e+06
3  2020-05-06   1.003632e+06
4  2020-05-07   1.000501e+06
            AAPL  AMGN  AXP  BA  CAT  CRM  CSCO  CVX  DIS   GS  ...  MRK  \
date                                                            ...        
2020-05-01   100     0  100   0    0  100     0    0  100  100  ...    0   
2020-05-04   100     0  100   0    0  100     0    0  100  100  ...    0   
2020-05-05   100     0  100   0    0  100     0    0  100  100  ...    0   
2020-05-06   100     0  100   0    0  100     0    0  100  100  ...    0   
2020-05-07   100     0  100   0    0  100     0    0  100  100  ...    0   

            MSFT  NKE   PG  TRV  UNH  V   VZ  WBA  WMT  
date                                                    
2020-05-01     0    0  100    0  100  0  100    0  100  
2020-05-04     0    0  100    0  100  0  100    0  100  
2020-05-05     0    0  100    0  100  0  100    0  100  
2020-05-

In [None]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

Annual return          0.356834
Cumulative returns     0.540800
Annual volatility      0.180128
Sharpe ratio           1.789321
Calmar ratio           3.765003
Stability              0.965409
Max drawdown          -0.094777
Omega ratio            1.360814
Sortino ratio          2.816772
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.023460
Daily value at risk   -0.021415
dtype: float64


In [None]:
#Now train with not tuned hyperaparameters
#Default config.ddpg_PARAMS
# non_tuned_model_ddpg = agent.get_model("ddpg",model_kwargs = config.DDPG_PARAMS )
# trained_ddpg = agent.train_model(model=non_tuned_model_ddpg, 
#                              tb_log_name='ddpg',
#                              total_timesteps=50000)
non_tuned_model_a2c = agent.get_model("a2c", model_kwargs=config.A2C_PARAMS)
trained_a2c = agent.train_model(model=non_tuned_model_a2c, tb_log_name='a2c', total_timesteps=10000)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
-------------------------------------
| time/                 |           |
|    fps                | 107       |
|    iterations         | 100       |
|    time_elapsed       | 4         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -41.5     |
|    explained_variance | -2.12     |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | 52.7      |
|    reward             | 0.1687497 |
|    std                | 1.01      |
|    value_loss         | 2.45      |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 113       |
|    iterations         | 200       |
|    time_elapsed       | 8         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -41.5     |
|    explaine

In [None]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym)

hit end!


In [None]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.490339
Cumulative returns     0.759897
Annual volatility      0.253207
Sharpe ratio           1.707331
Calmar ratio           3.534638
Stability              0.896093
Max drawdown          -0.138724
Omega ratio            1.324939
Sortino ratio          2.646543
Skew                        NaN
Kurtosis                    NaN
Tail ratio             0.992514
Daily value at risk   -0.030186
dtype: float64


In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [None]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
plot_param_importances(study)

## Further works

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters



In [None]:
plot_edf(study)