# Introduction
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. Here we tuned our hyperparamters to maximize the Sharpe Ratio 

In [None]:
#Installing FinRL
%%capture
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

In [None]:
#Installing Optuna
%%capture
!pip3 install optuna

In [3]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl import config
from finrl import config_tickers
from optuna.integration import PyTorchLightningPruningCallback

# Changed finrl_meta to meta
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.meta.data_processor import DataProcessor
import joblib
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint

# Not needed for local run
# import sys
# sys.path.append("../FinRL-Library")

import itertools

In [None]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)  # datasets
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)  # trained_models
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)  # tensorboard_log
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)  # results

## Collecting data and preprocessing

In [6]:
#Custom ticker list dataframe download
# ticker_list = config_tickers.DOW_30_TICKER
# df = YahooDownloader(start_date = '2009-01-01',
#                      end_date = '2021-10-01',
#                      ticker_list = ticker_list).fetch_data()

df_list = []
TRAIN_START_DATE = '2009-01-01'
TEST_END_DATE = '2021-10-01'
for ticker in config_tickers.DOW_30_TICKER:
    # Fetch data for each ticker
    portfolio_raw_df = YahooDownloader(start_date=TRAIN_START_DATE,
                                       end_date=TEST_END_DATE,
                                       ticker_list=[ticker]).fetch_data()
    # Append the fetched DataFrame to the list
    df_list.append(portfolio_raw_df)

# Concatenate all DataFrames row-wise
df = pd.concat(df_list, axis=0, ignore_index=True)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)
Shape of DataFrame:  (3209, 8)





Shape of DataFrame:  (640, 8)


In [7]:
print(df.head())

         date       open       high        low      close    volume  tic  day
0  2009-01-02  15.014077  19.330000  19.520000  18.570000  10955700  AXP    4
1  2009-01-05  15.495640  19.950001  20.240000  19.200001  16019200  AXP    0
2  2009-01-06  16.365574  21.070000  21.379999  20.299999  13820200  AXP    1
3  2009-01-07  15.676163  20.010000  20.719999  20.530001  15699900  AXP    2
4  2009-01-08  15.699665  20.040001  20.170000  19.799999  12255100  AXP    3


In [8]:
#You can add technical indicators and turbulence factor to dataframe
#Just set the use_technical_indicator=True, use_vix=True and use_turbulence=True
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3208, 8)
Successfully added vix
Successfully added turbulence index


In [10]:
print(processed.head().to_markdown())

|    | date       |    open |     high |      low |    close |    volume | tic   |   day |   macd |   boll_ub |   boll_lb |   rsi_30 |   cci_30 |   dx_30 |   close_30_sma |   close_60_sma |   vix |   turbulence |
|---:|:-----------|--------:|---------:|---------:|---------:|----------:|:------|------:|-------:|----------:|----------:|---------:|---------:|--------:|---------------:|---------------:|------:|-------------:|
|  0 | 2009-01-02 |  2.737  |  3.24107 |  3.25143 |  3.06714 | 746015200 | AAPL  |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |     100 |        3.06714 |        3.06714 | 39.58 |            0 |
|  1 | 2009-01-02 | 41.8162 | 58.99    | 59.08    | 58.59    |   6547900 | AMGN  |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |     100 |       58.59    |       58.59    | 39.58 |            0 |
|  2 | 2009-01-02 | 15.0141 | 19.33    | 19.52    | 18.57    |  10955700 | AXP   |     4 |      0 |   3.56552 |   2.82912 |      100 |  66.6667 |   

In [11]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True).head(5)

processed_full.to_csv('processed_full.csv')

In [12]:
train = data_split(processed_full, '2009-01-01','2020-07-01')
trade = data_split(processed_full, '2020-05-01','2021-10-01')
print(len(train))
print(len(trade))

83897
10353


In [13]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [33]:
#Defining the environment kwargs
num_stock_shares = [0] * stock_dimension
# From: https://github.com/AI4Finance-Foundation/FinRL/issues/540\
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,  # added argument
    "buy_cost_pct": buy_cost_list,  # changed to list
    "sell_cost_pct": sell_cost_list,  # changed to list
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.INDICATORS, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
#Instantiate the training gym compatible environment
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [34]:
#Instantiate the training environment
# Also instantiate our training gent
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [35]:
#Instantiate the trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

## Tuning hyperparameters using Optuna
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space. 

In [29]:
def sample_ddpg_params(trial:optuna.Trial):
  # Size of the replay buffer
  buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  
  return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

In [None]:
def sample_a2c_params(trial:optuna.Trial):
    # learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)  # fix deprecation
    n_steps = trial.suggest_categorical(
        "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    )
    # ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
    ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)  # fix deprecation
    # vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
    return {
        "learning_rate": learning_rate,
        "n_steps": n_steps,
        "ent_coef": ent_coef
    }


In [37]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
    df['daily_return'] = df['account_value'].pct_change(1)
    if df['daily_return'].std() != 0:
        sharpe = (252 ** 0.5) * df['daily_return'].mean()/ \
            df['daily_return'].std()
        return sharpe
    else:
        return 0

## Callbacks
1. The callback will terminate if the improvement margin is below certain point
2. It will terminate after certain number of trial_number are reached, not before that
3. It will hold its patience to reach the threshold

In [38]:
class LoggingCallback:
    def __init__(self, threshold: int, trial_number: int, patience: int):
        """
        threshold:int tolerance for increase in sharpe ratio
        trial_number: int Prune after minimum number of trials
        patience: int patience for the threshold
        """
        self.threshold = threshold
        self.trial_number = trial_number
        self.patience = patience
        self.cb_list = []  # Trials list for which threshold is reached

    def __call__(self, study: optuna.study, frozen_trial: optuna.Trial):
        # Setting the best value in the current trial
        study.set_user_attr("previous_best_value", study.best_value)

        # Checking if the minimum number of trials have pass
        if frozen_trial.number > self.trial_number:
            previous_best_value = study.user_attrs.get("previous_best_value", None)
            # Checking if the previous and current objective values have the same sign
            if previous_best_value * study.best_value >= 0:
                # Checking for the threshold condition
                if abs(previous_best_value - study.best_value) < self.threshold:
                    self.cb_list.append(frozen_trial.number)
                    # If threshold is achieved for the patience amount of time
                    if len(self.cb_list) > self.patience:
                        print("The study stops now...")
                        print(
                            "With number",
                            frozen_trial.number,
                            "and value ",
                            frozen_trial.value,
                        )
                        print(
                            "The previous and current best values are {} and {} respectively".format(
                                previous_best_value, study.best_value
                            )
                        )
                        study.stop()

A2C with optuna

In [39]:
from IPython.display import clear_output
import sys   

os.makedirs("A2C_optuna_models",exist_ok=True)

def objective(trial: optuna.Trial):  # Optuna objective
    hyperparameters = sample_a2c_params(trial)
    model_a2c = agent.get_model("a2c", model_kwargs=hyperparameters)  # TODO: Could try out self-implemented A2C
    trained_model = agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=10000)  # train stablebaselines3's A2C
    trained_model.save('A2C_optuna_models/a2c_{}.pth'.format(trial.number))  # save model with trial number as ID
    # clear_output(wait=True)  # This will keep only the last trial tested
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_model,
        environment=e_trade_gym
    )
    sharpe = calculate_sharpe(df_account_value)

    return sharpe

In [40]:
# Create a study object and specify the direction as 'maximize'
# As you want to maximize sharpe
# Pruner stops not promising iterations
# Use a pruner, else you will get error related to divergence of model
# You can also use Multivariate samplere
# sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)

sampler = optuna.samplers.TPESampler(seed=1234)
study = optuna.create_study(
    study_name='a2c_study',
    direction='maximize',
    sampler=sampler,
    pruner=optuna.pruners.HyperbandPruner(),
)
logging_callback = LoggingCallback(threshold=1e-5, trial_number=5, patience=30)
study.optimize(
    objective,
    n_trials=30,
    catch=(ValueError,),
    callbacks=[logging_callback],
)

[I 2024-11-06 14:08:38,145] A new study created in memory with name: a2c_study
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


{'learning_rate': 9.069790423538583e-05, 'n_steps': 1024, 'ent_coef': 3.1968399196034683e-06}
Using cuda device


[I 2024-11-06 14:10:09,130] Trial 0 finished with value: 1.0467206287624573 and parameters: {'learning_rate': 9.069790423538583e-05, 'n_steps': 1024, 'ent_coef': 3.1968399196034683e-06}. Best is trial 0 with value: 1.0467206287624573.


hit end!
{'learning_rate': 0.0031987155943821507, 'n_steps': 1024, 'ent_coef': 0.00020312961670857716}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:11:41,778] Trial 1 finished with value: 1.2321234462758197 and parameters: {'learning_rate': 0.0031987155943821507, 'n_steps': 1024, 'ent_coef': 0.00020312961670857716}. Best is trial 1 with value: 1.2321234462758197.


hit end!
{'learning_rate': 2.38180503070213e-05, 'n_steps': 16, 'ent_coef': 0.004121293029638071}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


-------------------------------------
| time/                 |           |
|    fps                | 104       |
|    iterations         | 100       |
|    time_elapsed       | 15        |
|    total_timesteps    | 1600      |
| train/                |           |
|    entropy_loss       | -41.2     |
|    explained_variance | 0.173     |
|    learning_rate      | 2.38e-05  |
|    n_updates          | 99        |
|    policy_loss        | 83.4      |
|    reward             | 1.2544177 |
|    std                | 1         |
|    value_loss         | 6.32      |
-------------------------------------
day: 2892, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3551227.09
total_reward: 2551227.09
total_cost: 401478.64
total_trades: 80949
Sharpe: 0.776
---------------------------------------
| time/                 |             |
|    fps                | 101         |
|    iterations         | 200         |
|    time_elapsed       | 31          |
|    total_timesteps    | 3200

[I 2024-11-06 14:13:18,323] Trial 2 finished with value: 0.40917030757284817 and parameters: {'learning_rate': 2.38180503070213e-05, 'n_steps': 16, 'ent_coef': 0.004121293029638071}. Best is trial 1 with value: 1.2321234462758197.


hit end!
{'learning_rate': 5.2340048721427795e-05, 'n_steps': 64, 'ent_coef': 0.0005298666736139674}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


--------------------------------------
| time/                 |            |
|    fps                | 112        |
|    iterations         | 100        |
|    time_elapsed       | 56         |
|    total_timesteps    | 6400       |
| train/                |            |
|    entropy_loss       | -41.1      |
|    explained_variance | 0.153      |
|    learning_rate      | 5.23e-05   |
|    n_updates          | 99         |
|    policy_loss        | 48.2       |
|    reward             | 0.21873954 |
|    std                | 1          |
|    value_loss         | 4.97       |
--------------------------------------


[I 2024-11-06 14:14:48,217] Trial 3 finished with value: 0.528719921300583 and parameters: {'learning_rate': 5.2340048721427795e-05, 'n_steps': 64, 'ent_coef': 0.0005298666736139674}. Best is trial 1 with value: 1.2321234462758197.


hit end!
{'learning_rate': 0.009399914420398066, 'n_steps': 512, 'ent_coef': 0.00020993843801086698}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 20
begin_total_asset: 1000000.00
end_total_asset: 4911662.03
total_reward: 3911662.03
total_cost: 10391.64
total_trades: 41792
Sharpe: 0.835


[I 2024-11-06 14:16:19,541] Trial 4 finished with value: 1.5110648087592045 and parameters: {'learning_rate': 0.009399914420398066, 'n_steps': 512, 'ent_coef': 0.00020993843801086698}. Best is trial 4 with value: 1.5110648087592045.


hit end!
{'learning_rate': 0.36359209473364146, 'n_steps': 16, 'ent_coef': 2.3829547523593072e-08}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


-------------------------------------
| time/                 |           |
|    fps                | 110       |
|    iterations         | 100       |
|    time_elapsed       | 14        |
|    total_timesteps    | 1600      |
| train/                |           |
|    entropy_loss       | -32.4     |
|    explained_variance | -1.67e-06 |
|    learning_rate      | 0.364     |
|    n_updates          | 99        |
|    policy_loss        | 77.7      |
|    reward             | 1.1691    |
|    std                | 235       |
|    value_loss         | 8.41      |
-------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 111         |
|    iterations         | 200         |
|    time_elapsed       | 28          |
|    total_timesteps    | 3200        |
| train/                |             |
|    entropy_loss       | -42.4       |
|    explained_variance | 0           |
|    learning_rate      | 0.364 

[I 2024-11-06 14:17:51,089] Trial 5 finished with value: 1.3695975223072174 and parameters: {'learning_rate': 0.36359209473364146, 'n_steps': 16, 'ent_coef': 2.3829547523593072e-08}. Best is trial 4 with value: 1.5110648087592045.


hit end!
{'learning_rate': 0.001812349871305581, 'n_steps': 8, 'ent_coef': 8.265607234648155e-06}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


--------------------------------------
| time/                 |            |
|    fps                | 112        |
|    iterations         | 100        |
|    time_elapsed       | 7          |
|    total_timesteps    | 800        |
| train/                |            |
|    entropy_loss       | -41.1      |
|    explained_variance | -0.0311    |
|    learning_rate      | 0.00181    |
|    n_updates          | 99         |
|    policy_loss        | 234        |
|    reward             | 0.57526433 |
|    std                | 0.999      |
|    value_loss         | 34.6       |
--------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 113       |
|    iterations         | 200       |
|    time_elapsed       | 14        |
|    total_timesteps    | 1600      |
| train/                |           |
|    entropy_loss       | -41.3     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.00181 

[I 2024-11-06 14:19:19,628] Trial 6 finished with value: 1.3627999357161382 and parameters: {'learning_rate': 0.001812349871305581, 'n_steps': 8, 'ent_coef': 8.265607234648155e-06}. Best is trial 4 with value: 1.5110648087592045.


day: 356, episode: 20
begin_total_asset: 1000000.00
end_total_asset: 1364529.71
total_reward: 364529.71
total_cost: 1975.88
total_trades: 6981
Sharpe: 1.363
hit end!
{'learning_rate': 0.004778133862873408, 'n_steps': 128, 'ent_coef': 0.006562192211340152}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 30
begin_total_asset: 1000000.00
end_total_asset: 2346458.10
total_reward: 1346458.10
total_cost: 54797.22
total_trades: 50391
Sharpe: 0.544


[I 2024-11-06 14:20:42,372] Trial 7 finished with value: 1.2668300270488502 and parameters: {'learning_rate': 0.004778133862873408, 'n_steps': 128, 'ent_coef': 0.006562192211340152}. Best is trial 4 with value: 1.5110648087592045.


hit end!
{'learning_rate': 0.014744459088243693, 'n_steps': 128, 'ent_coef': 2.510784660114704e-08}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:22:03,108] Trial 8 finished with value: 0.9096160921768621 and parameters: {'learning_rate': 0.014744459088243693, 'n_steps': 128, 'ent_coef': 2.510784660114704e-08}. Best is trial 4 with value: 1.5110648087592045.


hit end!
{'learning_rate': 0.022238665134707736, 'n_steps': 128, 'ent_coef': 0.04525524814651032}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 40
begin_total_asset: 1000000.00
end_total_asset: 3208457.03
total_reward: 2208457.03
total_cost: 3160.22
total_trades: 54306
Sharpe: 0.710


[I 2024-11-06 14:23:28,614] Trial 9 finished with value: 1.7751415667028998 and parameters: {'learning_rate': 0.022238665134707736, 'n_steps': 128, 'ent_coef': 0.04525524814651032}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.4579045629871961, 'n_steps': 32, 'ent_coef': 0.09498013326989069}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


--------------------------------------
| time/                 |            |
|    fps                | 121        |
|    iterations         | 100        |
|    time_elapsed       | 26         |
|    total_timesteps    | 3200       |
| train/                |            |
|    entropy_loss       | -98.5      |
|    explained_variance | 0          |
|    learning_rate      | 0.458      |
|    n_updates          | 99         |
|    policy_loss        | -1.17e+03  |
|    reward             | 0.48748147 |
|    std                | 1.48e+03   |
|    value_loss         | 159        |
--------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 125       |
|    iterations         | 200       |
|    time_elapsed       | 50        |
|    total_timesteps    | 6400      |
| train/                |           |
|    entropy_loss       | -144      |
|    explained_variance | 0         |
|    learning_rate      | 0.458   

[I 2024-11-06 14:24:50,001] Trial 10 finished with value: 1.5154913501978873 and parameters: {'learning_rate': 0.4579045629871961, 'n_steps': 32, 'ent_coef': 0.09498013326989069}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.6911073279866081, 'n_steps': 32, 'ent_coef': 0.07807076488793632}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


--------------------------------------
| time/                 |            |
|    fps                | 116        |
|    iterations         | 100        |
|    time_elapsed       | 27         |
|    total_timesteps    | 3200       |
| train/                |            |
|    entropy_loss       | -103       |
|    explained_variance | 0          |
|    learning_rate      | 0.691      |
|    n_updates          | 99         |
|    policy_loss        | -921       |
|    reward             | -0.6632277 |
|    std                | 824        |
|    value_loss         | 97.8       |
--------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 117         |
|    iterations         | 200         |
|    time_elapsed       | 54          |
|    total_timesteps    | 6400        |
| train/                |             |
|    entropy_loss       | -120        |
|    explained_variance | 0           |
|    learning_ra

[I 2024-11-06 14:26:18,524] Trial 11 finished with value: 1.4973116170530973 and parameters: {'learning_rate': 0.6911073279866081, 'n_steps': 32, 'ent_coef': 0.07807076488793632}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.10276422167929608, 'n_steps': 32, 'ent_coef': 0.06461742577600674}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 50
begin_total_asset: 1000000.00
end_total_asset: 4393603.03
total_reward: 3393603.03
total_cost: 23487.95
total_trades: 49804
Sharpe: 0.804
---------------------------------------
| time/                 |             |
|    fps                | 107         |
|    iterations         | 100         |
|    time_elapsed       | 29          |
|    total_timesteps    | 3200        |
| train/                |             |
|    entropy_loss       | -48.7       |
|    explained_variance | 0           |
|    learning_rate      | 0.103       |
|    n_updates          | 99          |
|    policy_loss        | -441        |
|    reward             | -0.38010812 |
|    std                | 14.3        |
|    value_loss         | 99.3        |
---------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 107        |
|    iterations         | 200        |
|    time_elapsed       | 59         |
|   

[I 2024-11-06 14:27:48,825] Trial 12 finished with value: 1.4927361570680404 and parameters: {'learning_rate': 0.10276422167929608, 'n_steps': 32, 'ent_coef': 0.06461742577600674}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.037460050210520165, 'n_steps': 256, 'ent_coef': 0.006663008686116493}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:29:16,274] Trial 13 finished with value: 1.6431194022832425 and parameters: {'learning_rate': 0.037460050210520165, 'n_steps': 256, 'ent_coef': 0.006663008686116493}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.04131516139407045, 'n_steps': 256, 'ent_coef': 0.005879003863387715}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 60
begin_total_asset: 1000000.00
end_total_asset: 3496281.81
total_reward: 2496281.81
total_cost: 2337.04
total_trades: 38845
Sharpe: 0.728


[I 2024-11-06 14:30:44,742] Trial 14 finished with value: 1.434404789325061 and parameters: {'learning_rate': 0.04131516139407045, 'n_steps': 256, 'ent_coef': 0.005879003863387715}. Best is trial 9 with value: 1.7751415667028998.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


hit end!
{'learning_rate': 0.0005721124989524309, 'n_steps': 256, 'ent_coef': 0.0011505406271831633}
Using cuda device


[I 2024-11-06 14:32:12,339] Trial 15 finished with value: 1.3206598711777402 and parameters: {'learning_rate': 0.0005721124989524309, 'n_steps': 256, 'ent_coef': 0.0011505406271831633}. Best is trial 9 with value: 1.7751415667028998.


hit end!
{'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:33:38,234] Trial 16 finished with value: 1.7893212360303676 and parameters: {'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}. Best is trial 16 with value: 1.7893212360303676.


day: 356, episode: 50
begin_total_asset: 1000000.00
end_total_asset: 1540799.98
total_reward: 540799.98
total_cost: 998.99
total_trades: 4272
Sharpe: 1.789
hit end!
{'learning_rate': 0.11077319352350509, 'n_steps': 2048, 'ent_coef': 2.9511016700070576e-07}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 70
begin_total_asset: 1000000.00
end_total_asset: 3183461.96
total_reward: 2183461.96
total_cost: 226101.83
total_trades: 69714
Sharpe: 0.652


[I 2024-11-06 14:35:09,650] Trial 17 finished with value: 1.3579212893575994 and parameters: {'learning_rate': 0.11077319352350509, 'n_steps': 2048, 'ent_coef': 2.9511016700070576e-07}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.1313631722525307, 'n_steps': 2048, 'ent_coef': 3.515179870104018e-05}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:36:39,721] Trial 18 finished with value: 1.459772772250219 and parameters: {'learning_rate': 0.1313631722525307, 'n_steps': 2048, 'ent_coef': 3.515179870104018e-05}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.0007292444504090859, 'n_steps': 128, 'ent_coef': 4.978111945260601e-07}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 80
begin_total_asset: 1000000.00
end_total_asset: 5900295.27
total_reward: 4900295.27
total_cost: 216671.07
total_trades: 65757
Sharpe: 0.943


[I 2024-11-06 14:38:08,097] Trial 19 finished with value: 1.4578761421597068 and parameters: {'learning_rate': 0.0007292444504090859, 'n_steps': 128, 'ent_coef': 4.978111945260601e-07}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.020228108874944747, 'n_steps': 2048, 'ent_coef': 1.7087499197772204e-05}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:39:42,577] Trial 20 finished with value: 1.5274815175513323 and parameters: {'learning_rate': 0.020228108874944747, 'n_steps': 2048, 'ent_coef': 1.7087499197772204e-05}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.03354432122000949, 'n_steps': 256, 'ent_coef': 0.015038797027987558}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:41:10,529] Trial 21 finished with value: 1.486846644618596 and parameters: {'learning_rate': 0.03354432122000949, 'n_steps': 256, 'ent_coef': 0.015038797027987558}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.0760536710731251, 'n_steps': 512, 'ent_coef': 0.018450789051394325}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 90
begin_total_asset: 1000000.00
end_total_asset: 3413081.30
total_reward: 2413081.30
total_cost: 42965.41
total_trades: 47559
Sharpe: 0.819


[I 2024-11-06 14:42:37,190] Trial 22 finished with value: 1.6887648326819964 and parameters: {'learning_rate': 0.0760536710731251, 'n_steps': 512, 'ent_coef': 0.018450789051394325}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.20349866496274885, 'n_steps': 512, 'ent_coef': 7.24921025913154e-05}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
[I 2024-11-06 14:44:04,109] Trial 23 finished with value: 1.6260066771547579 and parameters: {'learning_rate': 0.20349866496274885, 'n_steps': 512, 'ent_coef': 7.24921025913154e-05}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.06812796382784457, 'n_steps': 512, 'ent_coef': 0.001266671862863341}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 100
begin_total_asset: 1000000.00
end_total_asset: 5333110.75
total_reward: 4333110.75
total_cost: 2806.67
total_trades: 33536
Sharpe: 0.943


[I 2024-11-06 14:45:33,214] Trial 24 finished with value: 1.3020355140487767 and parameters: {'learning_rate': 0.06812796382784457, 'n_steps': 512, 'ent_coef': 0.001266671862863341}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.008592159025154643, 'n_steps': 8, 'ent_coef': 0.018743417515056645}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


-------------------------------------
| time/                 |           |
|    fps                | 97        |
|    iterations         | 100       |
|    time_elapsed       | 8         |
|    total_timesteps    | 800       |
| train/                |           |
|    entropy_loss       | -42.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.00859   |
|    n_updates          | 99        |
|    policy_loss        | 279       |
|    reward             | 1.4657485 |
|    std                | 1.08      |
|    value_loss         | 39.6      |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 96        |
|    iterations         | 200       |
|    time_elapsed       | 16        |
|    total_timesteps    | 1600      |
| train/                |           |
|    entropy_loss       | -43.7     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.00859   |
|    n_updat

[I 2024-11-06 14:47:09,499] Trial 25 finished with value: 1.5583527224375198 and parameters: {'learning_rate': 0.008592159025154643, 'n_steps': 8, 'ent_coef': 0.018743417515056645}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.1878881903723904, 'n_steps': 64, 'ent_coef': 2.044250520221642e-06}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


------------------------------------
| time/                 |          |
|    fps                | 114      |
|    iterations         | 100      |
|    time_elapsed       | 55       |
|    total_timesteps    | 6400     |
| train/                |          |
|    entropy_loss       | -35.3    |
|    explained_variance | 5.96e-08 |
|    learning_rate      | 0.188    |
|    n_updates          | 99       |
|    policy_loss        | -231     |
|    reward             | 0.339887 |
|    std                | 11.8     |
|    value_loss         | 47.7     |
------------------------------------


[I 2024-11-06 14:48:38,143] Trial 26 finished with value: 1.707410524917026 and parameters: {'learning_rate': 0.1878881903723904, 'n_steps': 64, 'ent_coef': 2.044250520221642e-06}. Best is trial 16 with value: 1.7893212360303676.


day: 356, episode: 80
begin_total_asset: 1000000.00
end_total_asset: 1550754.30
total_reward: 550754.30
total_cost: 998.97
total_trades: 5340
Sharpe: 1.707
hit end!
{'learning_rate': 0.21428442513566318, 'n_steps': 64, 'ent_coef': 8.906276757349905e-07}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 110
begin_total_asset: 1000000.00
end_total_asset: 2702145.27
total_reward: 1702145.27
total_cost: 28612.67
total_trades: 42276
Sharpe: 0.592
-------------------------------------
| time/                 |           |
|    fps                | 112       |
|    iterations         | 100       |
|    time_elapsed       | 56        |
|    total_timesteps    | 6400      |
| train/                |           |
|    entropy_loss       | -27.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.214     |
|    n_updates          | 99        |
|    policy_loss        | 16        |
|    reward             | 1.1665299 |
|    std                | 14.4      |
|    value_loss         | 5.83      |
-------------------------------------


[I 2024-11-06 14:50:08,157] Trial 27 finished with value: 1.5002806008378098 and parameters: {'learning_rate': 0.21428442513566318, 'n_steps': 64, 'ent_coef': 8.906276757349905e-07}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.8926488060017259, 'n_steps': 64, 'ent_coef': 9.047331185197167e-08}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


--------------------------------------
| time/                 |            |
|    fps                | 105        |
|    iterations         | 100        |
|    time_elapsed       | 60         |
|    total_timesteps    | 6400       |
| train/                |            |
|    entropy_loss       | 18         |
|    explained_variance | 0          |
|    learning_rate      | 0.893      |
|    n_updates          | 99         |
|    policy_loss        | 75.3       |
|    reward             | 0.26836628 |
|    std                | 2.44e+03   |
|    value_loss         | 169        |
--------------------------------------


[I 2024-11-06 14:51:42,687] Trial 28 finished with value: 1.3383390191981641 and parameters: {'learning_rate': 0.8926488060017259, 'n_steps': 64, 'ent_coef': 9.047331185197167e-08}. Best is trial 16 with value: 1.7893212360303676.


hit end!
{'learning_rate': 0.001465603914067428, 'n_steps': 1024, 'ent_coef': 2.1776324009492536e-06}
Using cuda device


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)


day: 2892, episode: 120
begin_total_asset: 1000000.00
end_total_asset: 3125653.92
total_reward: 2125653.92
total_cost: 211792.87
total_trades: 66268
Sharpe: 0.612


[I 2024-11-06 14:53:11,570] Trial 29 finished with value: 1.525426542844777 and parameters: {'learning_rate': 0.001465603914067428, 'n_steps': 1024, 'ent_coef': 2.1776324009492536e-06}. Best is trial 16 with value: 1.7893212360303676.


hit end!


In [41]:
joblib.dump(study, "final_a2c_study__.pkl")

['final_a2c_study__.pkl']

DDPG with optuna

In [None]:
from IPython.display import clear_output
import sys   

os.makedirs("DDPG_optuna_models",exist_ok=True)

def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ddpg_params(trial)
  model_ddpg = agent.get_model("ddpg",model_kwargs = hyperparameters )
  #You can increase it for better comparison
  trained_ddpg = agent.train_model(model=model_ddpg,
                                  tb_log_name="ddpg" ,
                             total_timesteps=50000)
  trained_ddpg.save('models/ddpg_{}.pth'.format(trial.number))
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)

  return sharpe

#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(study_name="ddpg_study",direction='maximize',
                            sampler = sampler, pruner=optuna.pruners.HyperbandPruner())

logging_callback = LoggingCallback(threshold=1e-5,patience=30,trial_number=5)
#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=30,catch=(ValueError,),callbacks=[logging_callback])

In [None]:
joblib.dump(study, "final_ddpg_study__.pkl")

['final_ddpg_study__.pkl']

In [42]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.A2C_PARAMS)

Hyperparameters after tuning {'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}
Hyperparameters before tuning {'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}


In [43]:
study.best_trial

FrozenTrial(number=16, state=TrialState.COMPLETE, values=[1.7893212360303676], datetime_start=datetime.datetime(2024, 11, 6, 14, 32, 12, 340339), datetime_complete=datetime.datetime(2024, 11, 6, 14, 33, 38, 233786), params={'learning_rate': 0.0720661604820144, 'n_steps': 2048, 'ent_coef': 4.009097084190781e-05}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=1.0, log=True, low=1e-05, step=None), 'n_steps': CategoricalDistribution(choices=(8, 16, 32, 64, 128, 256, 512, 1024, 2048)), 'ent_coef': FloatDistribution(high=0.1, log=True, low=1e-08, step=None)}, trial_id=16, value=None)

In [44]:
# from stable_baselines3 import DDPG
# tuned_model_ddpg = DDPG.load('models/ddpg_{}.pth'.format(study.best_trial.number),env=env_train)

from stable_baselines3 import A2C
tuned_model_a2c = A2C.load('A2C_optuna_models/a2c_{}.pth'.format(study.best_trial.number), env=env_train)



In [45]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_a2c, 
    environment = e_trade_gym)

hit end!


In [46]:
print(df_account_value_tuned.head())
print(df_actions_tuned.head())

         date  account_value
0  2020-05-01   1.000000e+06
1  2020-05-04   9.978485e+05
2  2020-05-05   1.001751e+06
3  2020-05-06   1.003632e+06
4  2020-05-07   1.000501e+06
            AAPL  AMGN  AXP  BA  CAT  CRM  ...  TRV  UNH  V   VZ  WBA  WMT
date                                       ...                            
2020-05-01   100     0  100   0    0  100  ...    0  100  0  100    0  100
2020-05-04   100     0  100   0    0  100  ...    0  100  0  100    0  100
2020-05-05   100     0  100   0    0  100  ...    0  100  0  100    0  100
2020-05-06   100     0  100   0    0  100  ...    0  100  0  100    0  100
2020-05-07   100     0  100   0    0  100  ...    0  100  0  100    0  100

[5 rows x 29 columns]


In [48]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

Annual return          0.356834
Cumulative returns     0.540800
Annual volatility      0.180128
Sharpe ratio           1.789321
Calmar ratio           3.765003
Stability              0.965409
Max drawdown          -0.094777
Omega ratio            1.360814
Sortino ratio          2.816772
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.023460
Daily value at risk   -0.021415
dtype: float64


In [49]:
#Now train with not tuned hyperaparameters
#Default config.ddpg_PARAMS
# non_tuned_model_ddpg = agent.get_model("ddpg",model_kwargs = config.DDPG_PARAMS )
# trained_ddpg = agent.train_model(model=non_tuned_model_ddpg, 
#                              tb_log_name='ddpg',
#                              total_timesteps=50000)
non_tuned_model_a2c = agent.get_model("a2c", model_kwargs=config.A2C_PARAMS)
trained_a2c = agent.train_model(model=non_tuned_model_a2c, tb_log_name='a2c', total_timesteps=10000)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device




-------------------------------------
| time/                 |           |
|    fps                | 98        |
|    iterations         | 100       |
|    time_elapsed       | 5         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -41.2     |
|    explained_variance | -0.0485   |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -1.4      |
|    reward             | 0.5177753 |
|    std                | 1         |
|    value_loss         | 0.308     |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 99        |
|    iterations         | 200       |
|    time_elapsed       | 10        |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -41.2     |
|    explained_variance | -0.0188   |
|    learning_rate      | 0.0007    |
|    n_updat

In [50]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym)

hit end!


In [51]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.246024
Cumulative returns     0.365618
Annual volatility      0.187158
Sharpe ratio           1.271821
Calmar ratio           2.602306
Stability              0.921153
Max drawdown          -0.094541
Omega ratio            1.255015
Sortino ratio          2.041262
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.146094
Daily value at risk   -0.022635
dtype: float64


In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [54]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
plot_param_importances(study)

## Further works

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters



In [None]:
plot_edf(study)