In [49]:
"""
@Time ： 2021/11/21 16:06
@Auth ： Aaron Liang
"""

import datetime
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ray
import sys

sys.path.append("../FinRL-Library")

from finrl.apps import config
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.neo_finrl.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint
        
import os

if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

In [50]:
raw = h5py.File(os.getcwd() + '/data/train_data_format1_week_final.h5', 'r')

data = {}
for type in ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']:
    temp = raw[type + '.csv']
    data[type] = pd.DataFrame(data=temp['block0_values'][()],
                              columns=pd.Series(temp['axis0'][()]).apply(lambda x: str(x, encoding="utf8")).values,
                              index=temp['axis1'][()])
    data[type]['tic'] = type

train = pd.DataFrame(columns=['Open', 'High', 'Low', 'Close', 'Volume', 'tic'])

for type in ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']:
    train = pd.concat([train, data[type]], axis=0)

train.reset_index(inplace=True)
train.columns = ['date', 'Open', 'High', 'Low', 'close', 'Volume', 'tic']
train.sort_values(by=['date', 'tic'], inplace=True)
train.date = pd.Series(train.date).apply(
    lambda x: datetime.datetime.fromtimestamp(x / 1000000000).strftime('%Y-%m-%d %H:%M:%S'))
train = train[train.date != '2020-12-21 22:09:00']

if 'cci_30' in config.TECHNICAL_INDICATORS_LIST:
    config.TECHNICAL_INDICATORS_LIST.remove('cci_30')

fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
    use_vix=False,
    use_turbulence=False,
    user_defined_feature=False)

newtrain = fe.preprocess_data(train)
newtrain.index = newtrain.date.factorize()[0]

  0%|          | 0/4 [00:00<?, ?it/s]

macd


100%|██████████| 4/4 [00:03<00:00,  1.18it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

boll_ub


100%|██████████| 4/4 [00:05<00:00,  1.42s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

boll_lb


100%|██████████| 4/4 [00:05<00:00,  1.40s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

rsi_30


100%|██████████| 4/4 [00:03<00:00,  1.11it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

dx_30


100%|██████████| 4/4 [00:04<00:00,  1.22s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

close_30_sma


100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

close_60_sma


100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Successfully added technical indicators


In [51]:
## Design Environment
dimension = len(newtrain.tic.unique())
state_space = 1 + 2 * dimension + len(config.TECHNICAL_INDICATORS_LIST) * dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "buy_cost_pct": 0.0005,
    "sell_cost_pct": 0.0005,
    "state_space": state_space,
    "stock_dim": dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": dimension,
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df=newtrain, **env_kwargs)

# Environment for Training
env_train, _ = e_train_gym.get_sb_env()

In [4]:
## Implement DRL Algorithms
agent = DRLAgent(env=env_train)
model_a2c = agent.get_model("a2c")
trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c')

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to tensorboard_log/a2c/a2c_3
-----------------------------------------
| time/                 |               |
|    fps                | 2             |
|    iterations         | 100           |
|    time_elapsed       | 184           |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -5.81         |
|    explained_variance | 5.96e-08      |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -0.0314       |
|    reward             | -0.0036038684 |
|    std                | 1.04          |
|    value_loss         | 0.000114      |
-----------------------------------------
-----------------------------------------
| time/                 |               |
|    fps                | 2             |
|    iterations         | 200           |
|    time_elapsed       | 367           |
|    t

In [52]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo')

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_2
--------------------------------------
| time/              |               |
|    fps             | 2             |
|    iterations      | 1             |
|    time_elapsed    | 855           |
|    total_timesteps | 2048          |
| train/             |               |
|    reward          | -0.0010924032 |
--------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 2            |
|    iterations           | 2            |
|    time_elapsed         | 1676         |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.007758527  |
|    clip_fraction        | 0.0868       |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -8.08        

In [57]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 1000000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}
model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)
trained_sac = agent.train_model(model=model_sac, tb_log_name='sac')

{'batch_size': 128, 'buffer_size': 1000000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_1


In [37]:
# BacktestData
trade = h5py.File(os.getcwd() + '/data/backtest_data_format1_week_final.h5', 'r')

tradeDict = {}
for type in ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']:
    temp = trade[type + '.csv']
    tradeDict[type] = pd.DataFrame(data=temp['block0_values'][()],
                                   columns=pd.Series(temp['axis0'][()]).apply(lambda x: str(x, encoding="utf8")).values,
                                   index=temp['axis1'][()])
    tradeDict[type]['tic'] = type

trade = pd.DataFrame(columns=['Open', 'High', 'Low', 'Close', 'Volume', 'tic'])

for type in ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']:
    trade = pd.concat([trade, tradeDict[type]], axis=0)

trade.index = pd.Series(trade.index).apply(lambda x: datetime.datetime.fromtimestamp(x / 1000000000).strftime('%Y-%m-%d %H:%M:%S'))
trade.drop('2021-08-14 06:00:00', inplace=True)
trade.drop('2021-09-29 18:40:00', inplace=True)
trade.reset_index(inplace=True)
trade.columns = ['date', 'Open', 'High', 'Low', 'close', 'Volume', 'tic']
trade.sort_values(by=['date', 'tic'], inplace=True)

fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
    use_vix=False,
    use_turbulence=False,
    user_defined_feature=False)

if 'cci_30' in config.TECHNICAL_INDICATORS_LIST:
    config.TECHNICAL_INDICATORS_LIST.remove('cci_30')

newtrade = fe.preprocess_data(trade)
newtrade.index = newtrade.date.factorize()[0]

 25%|██▌       | 1/4 [00:00<00:00,  5.37it/s]

macd


100%|██████████| 4/4 [00:00<00:00,  5.60it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

boll_ub


100%|██████████| 4/4 [00:01<00:00,  3.40it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

boll_lb


100%|██████████| 4/4 [00:01<00:00,  3.55it/s]
 25%|██▌       | 1/4 [00:00<00:00,  4.96it/s]

rsi_30


100%|██████████| 4/4 [00:00<00:00,  5.05it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

dx_30


100%|██████████| 4/4 [00:00<00:00,  4.36it/s]
 25%|██▌       | 1/4 [00:00<00:00,  7.23it/s]

close_30_sma


100%|██████████| 4/4 [00:00<00:00,  6.93it/s]
 25%|██▌       | 1/4 [00:00<00:00,  7.26it/s]

close_60_sma


100%|██████████| 4/4 [00:00<00:00,  7.12it/s]


Successfully added technical indicators


In [42]:
e_trade_gym = StockTradingEnv(df=newtrade, **env_kwargs)
df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, environment=e_trade_gym)

100%|█████████▉| 300887/300888 [9:23:20<00:00,  8.90it/s]  

hit end!





In [45]:
## Backtesting
# ac2
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return                NaN
Cumulative returns     -9.254218
Annual volatility       0.106177
Sharpe ratio            0.018853
Calmar ratio                 NaN
Stability                    NaN
Max drawdown          -10.279980
Omega ratio             1.017095
Sortino ratio           0.022678
Skew                         NaN
Kurtosis                     NaN
Tail ratio              1.023641
Daily value at risk    -0.013369
dtype: float64


  return ending_value ** (1 / num_years) - 1
  cum_log_returns = np.log1p(returns).cumsum()


In [53]:
# ppo
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=trained_ppo, environment=e_trade_gym)

100%|█████████▉| 300887/300888 [10:24:39<00:00,  8.03it/s]  

hit end!





In [54]:
# Backtesting
print("==============Get Backtest Results===========")

perf_stats_all = backtest_stats(account_value=df_account_value_ppo)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return               NaN
Cumulative returns    -4.540072
Annual volatility      5.403767
Sharpe ratio           0.030649
Calmar ratio                NaN
Stability                   NaN
Max drawdown          -5.057766
Omega ratio            2.334322
Sortino ratio          0.927480
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.012889
Daily value at risk   -0.680153
dtype: float64


  return ending_value ** (1 / num_years) - 1
  cum_log_returns = np.log1p(returns).cumsum()
