# **_Reinforcement Learning tools for Auto-Stock Trading_**  

### 1. Importing Necessary Libraries

In [1]:
#Basic Data Science Libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
%matplotlib inline

In [2]:
#Finrl utilities
from finrl import config
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split
from finrl.agents.stablebaselines3.models import DRLAgent

  align: pd.Timedelta | str = pd.Timedelta(1, "T"),


In [3]:
#Processing Utilities
import datetime
import itertools


In [4]:
#Make finrl imports accessible
import sys
sys.path.append("../FinRL-Library")

In [5]:
#Setup libraries
from __future__ import annotations
#postponed evaluation of type annotations and evaluation available at runtime

In [6]:
#other imports will be used wherever applicable

In [7]:
#Symbols of BSE SENSEX30 whose data is to be downloaded
symbols = [
    "MEDINOVA", "RJBIOTECH", "ADDICTIVELE", "SWADESHPOLY", "STERLGUAR", "SBEC", "TRIVENI", "RELHOME", "MODERNSTEEL", 
    "KRISHNAFIL", "TRF", "SERVOTECH", "YAARIDIGI", "BASILIC", "KSOLVES", "LIFEINSUR", "NESTLEIND", "GAYAPROJECT", 
    "ABIRAMIFI", "RAJKAMSYN", "SONAMACH", "BLUEPEB", "PRESSNS", "IBINFOTECH", "REMEDIES", "EXHICON", "PGHH", "ADHBHUT", 
    "IEL", "INFRONIC"
]




In [8]:
#Globally accesible training and trading s/e
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2023-05-01'

In [9]:

df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
                                end_date = TRADE_END_DATE,
                                ticker_list = symbols).fetch_data()

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['MEDINOVA']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['RJBIOTECH']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['ADDICTIVELE']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SWADESHPOLY']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['STERLGUAR']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SBEC']: Exception('%ticker%: No timezone found, symbol may be

Shape of DataFrame:  (2, 8)


In [10]:
df_raw.to_csv("indianpennystocks30.csv")

In [11]:
df_raw['tic'].value_counts()

tic
NTES         3353
TCEHY        3352
0883.HK      3285
0941.HK      3285
1398.HK      3285
3968.HK      3285
0728.HK      3285
000858.SZ    3236
601628.SS    3236
601988.SS    3236
601899.SS    3236
601857.SS    3236
601939.SS    3236
601328.SS    3236
601318.SS    3236
601088.SS    3236
600900.SS    3236
600519.SS    3236
600028.SS    3236
601288.SS    3107
002594.SZ    2874
000333.SZ    2336
BABA         2167
PDD          1198
300750.SZ    1186
601138.SS    1184
3690.HK      1133
601658.SS     822
9633.HK       649
XIACF         535
Name: count, dtype: int64

### 2. Loading the Data

In [12]:
df_raw=pd.read_csv('datasets/chinese30.csv')

In [13]:
df_raw.head()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,day
0,0,2010-01-04,31.799999,32.0,31.360001,24.145697,26409039,000858.SZ,0
1,1,2010-01-04,3.23,3.27,3.18,1.953881,52124000,0728.HK,0
2,2,2010-01-04,12.22,12.36,12.16,5.35486,31983302,0883.HK,0
3,3,2010-01-04,72.849998,73.550003,72.0,36.793877,14919781,0941.HK,0
4,4,2010-01-04,6.318309,6.386986,6.200576,2.901017,161950404,1398.HK,0


In [14]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79153 entries, 0 to 79152
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  79153 non-null  int64  
 1   date        79153 non-null  object 
 2   open        79153 non-null  float64
 3   high        79153 non-null  float64
 4   low         79153 non-null  float64
 5   close       79153 non-null  float64
 6   volume      79153 non-null  int64  
 7   tic         79153 non-null  object 
 8   day         79153 non-null  int64  
dtypes: float64(4), int64(3), object(2)
memory usage: 5.4+ MB


### 3. Preprocessing the data

In [15]:
from finrl.config import INDICATORS
from dataprocessing import FeatureEngineer, load_dataset, data_split, convert_to_datetime

fe = FeatureEngineer(use_technical_indicator=True,
                      tech_indicator_list = INDICATORS,
                      use_vix=False,
                      use_turbulence=True,
                      user_defined_feature = False)

processed = fe.preprocess_data(df_raw)

Successfully added technical indicators


  df_price_pivot = df_price_pivot.pct_change()


Successfully added turbulence index


In [16]:
processed

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,0,2010-01-04,31.799999,32.000000,31.360001,24.145697,26409039,000858.SZ,0,0.000000,24.630672,23.914076,100.000000,66.666667,100.000000,24.145697,24.145697,0.00000
1,1,2010-01-04,3.230000,3.270000,3.180000,1.953881,52124000,0728.HK,0,0.000000,24.630672,23.914076,100.000000,66.666667,100.000000,1.953881,1.953881,0.00000
2,2,2010-01-04,12.220000,12.360000,12.160000,5.354860,31983302,0883.HK,0,0.000000,24.630672,23.914076,100.000000,66.666667,100.000000,5.354860,5.354860,0.00000
3,3,2010-01-04,72.849998,73.550003,72.000000,36.793877,14919781,0941.HK,0,0.000000,24.630672,23.914076,100.000000,66.666667,100.000000,36.793877,36.793877,0.00000
4,4,2010-01-04,6.318309,6.386986,6.200576,2.901017,161950404,1398.HK,0,0.000000,24.630672,23.914076,100.000000,66.666667,100.000000,2.901017,2.901017,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79148,79148,2023-04-28,84.180000,85.080002,83.870003,83.566643,18109300,BABA,4,-2.450452,105.660563,79.401707,43.215749,-79.054358,27.470570,91.002023,92.223602,30.11322
79149,79149,2023-04-28,88.599998,89.919998,88.400002,86.956947,799100,NTES,4,0.251507,91.559780,83.229253,52.779820,3.494252,2.712654,86.832397,84.851831,30.11322
79150,79150,2023-04-28,65.400002,68.809998,65.400002,68.150002,6107700,PDD,4,-3.619802,75.740592,63.510407,41.628790,-80.314258,18.641053,72.103333,81.958500,30.11322
79151,79151,2023-04-28,43.740002,44.439999,43.740002,44.055988,2618700,TCEHY,4,-0.778918,50.455900,41.542259,47.930947,-72.481164,18.516555,46.084987,45.965147,30.11322


In [17]:
df=processed

In [18]:
list_ticker = df["tic"].unique().tolist()
# only apply to daily level data, need to fix for minute level
list_date = list(pd.date_range(df['date'].min(),df['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

df_full = pd.DataFrame(combination,columns=["date","tic"]).merge(df,on=["date","tic"],how="left")
df_full = df_full[df_full['date'].isin(df['date'])]
df_full = df_full.sort_values(['date','tic'])
df_full = df_full.fillna(1)

In [19]:
df_full.info()

<class 'pandas.core.frame.DataFrame'>
Index: 103830 entries, 21 to 145889
Data columns (total 18 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   date          103830 non-null  object 
 1   tic           103830 non-null  object 
 2   Unnamed: 0    103830 non-null  float64
 3   open          103830 non-null  float64
 4   high          103830 non-null  float64
 5   low           103830 non-null  float64
 6   close         103830 non-null  float64
 7   volume        103830 non-null  float64
 8   day           103830 non-null  float64
 9   macd          103830 non-null  float64
 10  boll_ub       103830 non-null  float64
 11  boll_lb       103830 non-null  float64
 12  rsi_30        103830 non-null  float64
 13  cci_30        103830 non-null  float64
 14  dx_30         103830 non-null  float64
 15  close_30_sma  103830 non-null  float64
 16  close_60_sma  103830 non-null  float64
 17  turbulence    103830 non-null  float64
dtypes: float

In [20]:
df=df_full

In [21]:
df.head()

Unnamed: 0.1,date,tic,Unnamed: 0,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
21,2010-01-04,000333.SZ,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,2010-01-04,000858.SZ,0.0,31.799999,32.0,31.360001,24.145697,26409039.0,0.0,0.0,24.630672,23.914076,100.0,66.666667,100.0,24.145697,24.145697,0.0
20,2010-01-04,002594.SZ,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-04,0728.HK,1.0,3.23,3.27,3.18,1.953881,52124000.0,0.0,0.0,24.630672,23.914076,100.0,66.666667,100.0,1.953881,1.953881,0.0
2,2010-01-04,0883.HK,2.0,12.22,12.36,12.16,5.35486,31983302.0,0.0,0.0,24.630672,23.914076,100.0,66.666667,100.0,5.35486,5.35486,0.0


In [22]:
df.shape

(103830, 18)

### 4.Splitting Training and Trading Data

In [23]:
train = data_split(df, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(df, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))
     

81780
22050


In [24]:
train.to_csv('train_data.csv')
trade.to_csv('trade_data.csv')

### 5. Construction of Trading Environment

In [25]:
from TradingEnv import StockTradingEnv

In [26]:


# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

### Testing A2C

In [27]:
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3

trained_a2c = A2C.load("trained_models/agent_a2c") if if_using_a2c else None

In [28]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [29]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [30]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [31]:
df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym) if if_using_a2c else (None, None)

hit end!


In [32]:
df_account_value_a2c.tail()

Unnamed: 0,date,account_value
730,2023-04-24,27534.776958
731,2023-04-25,26358.978586
732,2023-04-26,26524.980761
733,2023-04-27,27046.885301
734,2023-04-28,27019.838481


In [33]:
from finrl.plot import backtest_stats
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value_a2c)
perf_stats_all = pd.DataFrame(perf_stats_all)




Annual return          -0.361518
Cumulative returns     -0.729802
Annual volatility      37.717795
Sharpe ratio            1.084126
Calmar ratio           -0.363140
Stability               0.556695
Max drawdown           -0.995534
Omega ratio             5.893613
Sortino ratio          19.160290
Skew                         NaN
Kurtosis                     NaN
Tail ratio              1.099884
Daily value at risk    -4.589730
dtype: float64


## Testing DDPG

In [34]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [35]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [36]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [37]:
trained_ddpg = DDPG.load("trained_models/agent_ddpg") if if_using_ddpg else None

In [38]:
df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym) if if_using_ddpg else (None, None)

hit end!


In [39]:
df_account_value_ddpg.tail()

Unnamed: 0,date,account_value
730,2023-04-24,77096.002535
731,2023-04-25,78210.612436
732,2023-04-26,78822.725904
733,2023-04-27,79459.809576
734,2023-04-28,79380.35299


In [40]:
from finrl.plot import backtest_stats
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value_ddpg)
perf_stats_all = pd.DataFrame(perf_stats_all)


Annual return           -0.076119
Cumulative returns      -0.206196
Annual volatility      618.903077
Sharpe ratio             1.245201
Calmar ratio            -0.076162
Stability                0.001921
Max drawdown            -0.999433
Omega ratio            125.813579
Sortino ratio          366.120939
Skew                          NaN
Kurtosis                      NaN
Tail ratio               0.925581
Daily value at risk    -74.916288
dtype: float64


## Testing PPO

In [41]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [42]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [43]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [44]:
trained_ppo = PPO.load("trained_models/agent_ppo") if if_using_ppo else None

In [45]:
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym) if if_using_ppo else (None, None)

hit end!


In [46]:
df_account_value_ppo.tail()

Unnamed: 0,date,account_value
730,2023-04-24,57436.667444
731,2023-04-25,56015.424748
732,2023-04-26,56611.29053
733,2023-04-27,57700.474226
734,2023-04-28,57642.775346


In [47]:
from finrl.plot import backtest_stats
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value_ppo)
perf_stats_all = pd.DataFrame(perf_stats_all)


Annual return         -0.172116
Cumulative returns    -0.423572
Annual volatility      3.327070
Sharpe ratio           1.246701
Calmar ratio          -0.211429
Stability              0.502685
Max drawdown          -0.814059
Omega ratio            1.671939
Sortino ratio          2.790702
Skew                        NaN
Kurtosis                    NaN
Tail ratio             0.710849
Daily value at risk   -0.402712
dtype: float64


## Testing TD3

In [46]:
#retrieve from github

In [47]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

NameError: name 'agent' is not defined

In [None]:
trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_td3.tail()

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_td3.tail()

In [None]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

In [None]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

In [None]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None

In [None]:
df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
    model=trained_sac, 
    environment = e_trade_gym) if if_using_sac else (None, None)

In [None]:
df_account_value_sac.tail()