# **_Reinforcement Learning tools for Auto-Stock Trading_**  

### 1. Importing Necessary Libraries

In [1]:
#Basic Data Science Libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
%matplotlib inline

In [2]:
#Finrl utilities
from finrl import config
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split
from finrl.agents.stablebaselines3.models import DRLAgent

  _empty_series = pd.Series()


In [3]:
#Processing Utilities
import datetime
import itertools


In [4]:
#Make finrl imports accessible
import sys
sys.path.append("../FinRL-Library")

In [5]:
#Setup libraries
from __future__ import annotations
#postponed evaluation of type annotations and evaluation available at runtime

In [6]:
#other imports will be used wherever applicable

In [7]:
#Symbols of BSE SENSEX30 whose data is to be downloaded
# symbols = [
#     'AXISBANK.BO', 'BAJAJ-AUTO.BO', 'BAJFINANCE.BO', 'BAJAJFINSV.BO',
#     'BHARTIARTL.BO', 'DRREDDY.BO', 'HCLTECH.BO', 'JSWSTEEL.BO', 'HDFCBANK.BO',
#     'HINDUNILVR.BO', 'ICICIBANK.BO', 'INDUSINDBK.BO', 'INFY.BO', 'ITC.BO',
#     'KOTAKBANK.BO', 'LT.BO', 'M&M.BO', 'MARUTI.BO', 'NESTLEIND.BO',
#     'NTPC.BO', 'ONGC.BO', 'POWERGRID.BO', 'RELIANCE.BO', 'SBIN.BO',
#     'SUNPHARMA.BO', 'TCS.BO', 'TECHM.BO', 'TITAN.BO', 'ULTRACEMCO.BO','ASIANPAINT.BO'
# ]


In [6]:
#Globally accesible training and trading s/e
TRAIN_START_DATE = '2024-08-16 09:15:00+05:30'
TRAIN_END_DATE = '2024-09-25 15:25:00+05:30'
TRADE_START_DATE = '2024-09-25 15:25:00+05:30'
TRADE_END_DATE = '2024-10-08 14:00:00+05:30'

In [9]:
#How we downloaded the data
# df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
#                                 end_date = TRADE_END_DATE,
#                                 ticker_list = symbols).fetch_data()

### 2. Loading the Data

In [10]:
df_raw=pd.read_csv('datasets/intraday_data_by_ticker.csv')

In [11]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tic
0,2024-08-16 09:15:00+05:30,3056.0,3056.0,3026.600098,3028.350098,323.0,ASIANPAINT.BO
1,2024-08-16 09:15:00+05:30,1159.599976,1163.550049,1156.75,1162.949951,4737.0,AXISBANK.BO
2,2024-08-16 09:15:00+05:30,9770.0,9775.299805,9740.0,9770.0,105.0,BAJAJ-AUTO.BO
3,2024-08-16 09:15:00+05:30,1535.900024,1539.900024,1535.199951,1538.400024,1689.0,BAJAJFINSV.BO
4,2024-08-16 09:15:00+05:30,6509.299805,6509.299805,6472.450195,6487.700195,1716.0,BAJFINANCE.BO


In [12]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82740 entries, 0 to 82739
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    82740 non-null  object 
 1   open    82473 non-null  float64
 2   high    82473 non-null  float64
 3   low     82473 non-null  float64
 4   close   82473 non-null  float64
 5   volume  82473 non-null  float64
 6   tic     82740 non-null  object 
dtypes: float64(5), object(2)
memory usage: 4.4+ MB


### 3. Preprocessing the data

In [13]:
from finrl.config import INDICATORS
from dataprocessing import FeatureEngineer, load_dataset, data_split, convert_to_datetime

fe = FeatureEngineer(use_technical_indicator=True,
                      tech_indicator_list = INDICATORS,
                      use_vix=False,
                      use_turbulence=True,
                      user_defined_feature = False)

processed = fe.preprocess_data(df_raw)

Successfully added technical indicators
Successfully added turbulence index


In [7]:
processed=pd.read_csv("intraday_data_processed.csv")

In [8]:
processed.head()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,0,2024-08-16 09:15:00+05:30,3056.0,3056.0,3026.600098,3028.350098,323.0,ASIANPAINT.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,3028.350098,3028.350098,0.0
1,1,2024-08-16 09:15:00+05:30,1159.599976,1163.550049,1156.75,1162.949951,4737.0,AXISBANK.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1162.949951,1162.949951,0.0
2,2,2024-08-16 09:15:00+05:30,9770.0,9775.299805,9740.0,9770.0,105.0,BAJAJ-AUTO.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,9770.0,9770.0,0.0
3,3,2024-08-16 09:15:00+05:30,1535.900024,1539.900024,1535.199951,1538.400024,1689.0,BAJAJFINSV.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1538.400024,1538.400024,0.0
4,4,2024-08-16 09:15:00+05:30,6509.299805,6509.299805,6472.450195,6487.700195,1716.0,BAJFINANCE.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,6487.700195,6487.700195,0.0


In [9]:
processed.drop('Unnamed: 0',axis=1,inplace=True)

In [10]:
processed.head()

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-08-16 09:15:00+05:30,3056.0,3056.0,3026.600098,3028.350098,323.0,ASIANPAINT.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,3028.350098,3028.350098,0.0
1,2024-08-16 09:15:00+05:30,1159.599976,1163.550049,1156.75,1162.949951,4737.0,AXISBANK.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1162.949951,1162.949951,0.0
2,2024-08-16 09:15:00+05:30,9770.0,9775.299805,9740.0,9770.0,105.0,BAJAJ-AUTO.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,9770.0,9770.0,0.0
3,2024-08-16 09:15:00+05:30,1535.900024,1539.900024,1535.199951,1538.400024,1689.0,BAJAJFINSV.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1538.400024,1538.400024,0.0
4,2024-08-16 09:15:00+05:30,6509.299805,6509.299805,6472.450195,6487.700195,1716.0,BAJFINANCE.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,6487.700195,6487.700195,0.0


In [11]:
df=processed

In [12]:
list_ticker = df["tic"].unique().tolist()
# only apply to daily level data, need to fix for minute level
list_date = list(pd.date_range(df['date'].min(),df['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

df_full = pd.DataFrame(combination,columns=["date","tic"]).merge(df,on=["date","tic"],how="left")
df_full = df_full[df_full['date'].isin(df['date'])]
df_full = df_full.sort_values(['date','tic'])
df_full = df_full.fillna(1)

In [13]:
df_full.head()

Unnamed: 0,date,tic,open,high,low,close,volume,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-08-16 09:15:00+05:30,ASIANPAINT.BO,3056.0,3056.0,3026.600098,3028.350098,323.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,3028.350098,3028.350098,0.0
1,2024-08-16 09:15:00+05:30,AXISBANK.BO,1159.599976,1163.550049,1156.75,1162.949951,4737.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1162.949951,1162.949951,0.0
2,2024-08-16 09:15:00+05:30,BAJAJ-AUTO.BO,9770.0,9775.299805,9740.0,9770.0,105.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,9770.0,9770.0,0.0
3,2024-08-16 09:15:00+05:30,BAJAJFINSV.BO,1535.900024,1539.900024,1535.199951,1538.400024,1689.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1538.400024,1538.400024,0.0
4,2024-08-16 09:15:00+05:30,BAJFINANCE.BO,6509.299805,6509.299805,6472.450195,6487.700195,1716.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,6487.700195,6487.700195,0.0


In [14]:
df=df_full

In [15]:
df.head()

Unnamed: 0,date,tic,open,high,low,close,volume,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-08-16 09:15:00+05:30,ASIANPAINT.BO,3056.0,3056.0,3026.600098,3028.350098,323.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,3028.350098,3028.350098,0.0
1,2024-08-16 09:15:00+05:30,AXISBANK.BO,1159.599976,1163.550049,1156.75,1162.949951,4737.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1162.949951,1162.949951,0.0
2,2024-08-16 09:15:00+05:30,BAJAJ-AUTO.BO,9770.0,9775.299805,9740.0,9770.0,105.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,9770.0,9770.0,0.0
3,2024-08-16 09:15:00+05:30,BAJAJFINSV.BO,1535.900024,1539.900024,1535.199951,1538.400024,1689.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1538.400024,1538.400024,0.0
4,2024-08-16 09:15:00+05:30,BAJFINANCE.BO,6509.299805,6509.299805,6472.450195,6487.700195,1716.0,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,6487.700195,6487.700195,0.0


In [16]:
df.shape

(1110, 16)

In [17]:
def data_split1(df, start, end, target_date_col="date"):
    """
    Split the dataset into training or testing using date and time with timezone.
    
    :param df: pandas dataframe
    :param start: start date in 'YYYY-MM-DD HH:MM:SS+TZ' format
    :param end: end date in 'YYYY-MM-DD HH:MM:SS+TZ' format
    :param target_date_col: the column name representing the date
    :return: pandas dataframe
    """
    # Ensure the target_date_col is in datetime format with timezone
    df[target_date_col] = pd.to_datetime(df[target_date_col])
    
    # Filtering the data between start and end dates
    data = df[(df[target_date_col] >= pd.Timestamp(start)) & (df[target_date_col] < pd.Timestamp(end))]
    
    # Sorting the data by date and "tic" column
    data = data.sort_values([target_date_col, "tic"], ignore_index=True)
    
    # Re-indexing based on the factorized target_date_col
    data.index = data[target_date_col].factorize()[0]
    
    return data

### 4.Splitting Training and Trading Data

In [76]:
train=pd.read_csv('train_data.csv')
trade=pd.read_csv('trade_data.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'train_datalala.csv'

In [71]:
trade.head()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,65250,2024-09-26 09:15:00+05:30,3250.0,3262.199951,3244.149902,3260.850098,1336,ASIANPAINT.BO,5.69377,3253.649352,3224.380653,63.582353,266.933037,47.135347,3236.245003,3227.441671,365.729838
1,65251,2024-09-26 09:15:00+05:30,1266.150024,1269.449951,1262.199951,1265.050049,7455,AXISBANK.BO,3.291383,1271.731478,1252.63853,61.074894,89.119652,17.176047,1259.620003,1256.490835,365.729838
2,65252,2024-09-26 09:15:00+05:30,12427.2002,12460.0,12396.40039,12396.40039,346,BAJAJ-AUTO.BO,12.589037,12412.23048,12335.73945,55.121774,182.692533,46.939163,12359.55827,12362.73249,365.729838
3,65253,2024-09-26 09:15:00+05:30,1940.300049,1947.449951,1933.5,1946.650024,2017,BAJAJFINSV.BO,6.185544,1937.594372,1896.865637,75.55257,301.805853,60.999199,1915.625,1912.0275,365.729838
4,65254,2024-09-26 09:15:00+05:30,7619.350098,7626.700195,7606.549805,7609.450195,811,BAJFINANCE.BO,5.184029,7634.889583,7581.715398,52.200986,51.249181,2.922274,7608.25166,7603.052515,365.729838


In [72]:
train.drop('Unnamed: 0',axis=1,inplace=True)
trade.drop('Unnamed: 0',axis=1,inplace=True)

In [28]:
train.shape[0]

69240

In [75]:


trade.head()

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-09-26 09:15:00+05:30,3250.0,3262.199951,3244.149902,3260.850098,1336,ASIANPAINT.BO,5.69377,3253.649352,3224.380653,63.582353,266.933037,47.135347,3236.245003,3227.441671,365.729838
1,2024-09-26 09:15:00+05:30,1266.150024,1269.449951,1262.199951,1265.050049,7455,AXISBANK.BO,3.291383,1271.731478,1252.63853,61.074894,89.119652,17.176047,1259.620003,1256.490835,365.729838
2,2024-09-26 09:15:00+05:30,12427.2002,12460.0,12396.40039,12396.40039,346,BAJAJ-AUTO.BO,12.589037,12412.23048,12335.73945,55.121774,182.692533,46.939163,12359.55827,12362.73249,365.729838
3,2024-09-26 09:15:00+05:30,1940.300049,1947.449951,1933.5,1946.650024,2017,BAJAJFINSV.BO,6.185544,1937.594372,1896.865637,75.55257,301.805853,60.999199,1915.625,1912.0275,365.729838
4,2024-09-26 09:15:00+05:30,7619.350098,7626.700195,7606.549805,7609.450195,811,BAJFINANCE.BO,5.184029,7634.889583,7581.715398,52.200986,51.249181,2.922274,7608.25166,7603.052515,365.729838


In [60]:
trade.drop('Unnamed: 0',axis=1,inplace=True)

In [66]:
# train['transformed_index'] = train.groupby('date').ngroup()

# # Set transformed index as the new DataFrame index if needed
# train.set_index('transformed_index', drop=True,inplace=True)
# train.index.name = None
# print(train)

trade['transformed_index'] = trade.groupby('date').ngroup()

# Set transformed index as the new DataFrame index if needed
trade.set_index('transformed_index', drop=True,inplace=True)
trade.index.name = None
# print(train)

In [68]:
trade.head()

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
1592,2024-09-26 09:15:00+05:30,3250.0,3262.199951,3244.149902,3260.850098,1336,ASIANPAINT.BO,5.69377,3253.649352,3224.380653,63.582353,266.933037,47.135347,3236.245003,3227.441671,365.729838
1592,2024-09-26 09:15:00+05:30,1266.150024,1269.449951,1262.199951,1265.050049,7455,AXISBANK.BO,3.291383,1271.731478,1252.63853,61.074894,89.119652,17.176047,1259.620003,1256.490835,365.729838
1592,2024-09-26 09:15:00+05:30,12427.2002,12460.0,12396.40039,12396.40039,346,BAJAJ-AUTO.BO,12.589037,12412.23048,12335.73945,55.121774,182.692533,46.939163,12359.55827,12362.73249,365.729838
1592,2024-09-26 09:15:00+05:30,1940.300049,1947.449951,1933.5,1946.650024,2017,BAJAJFINSV.BO,6.185544,1937.594372,1896.865637,75.55257,301.805853,60.999199,1915.625,1912.0275,365.729838
1592,2024-09-26 09:15:00+05:30,7619.350098,7626.700195,7606.549805,7609.450195,811,BAJFINANCE.BO,5.184029,7634.889583,7581.715398,52.200986,51.249181,2.922274,7608.25166,7603.052515,365.729838


In [82]:
trade.to_csv('tradelala.csv')

In [81]:
train.to_csv("trainlala.csv")

In [84]:
trade.head()

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-09-26 09:15:00+05:30,3250.0,3262.199951,3244.149902,3260.850098,1336,ASIANPAINT.BO,5.69377,3253.649352,3224.380653,63.582353,266.933037,47.135347,3236.245003,3227.441671,365.729838
1,2024-09-26 09:15:00+05:30,1266.150024,1269.449951,1262.199951,1265.050049,7455,AXISBANK.BO,3.291383,1271.731478,1252.63853,61.074894,89.119652,17.176047,1259.620003,1256.490835,365.729838
2,2024-09-26 09:15:00+05:30,12427.2002,12460.0,12396.40039,12396.40039,346,BAJAJ-AUTO.BO,12.589037,12412.23048,12335.73945,55.121774,182.692533,46.939163,12359.55827,12362.73249,365.729838
3,2024-09-26 09:15:00+05:30,1940.300049,1947.449951,1933.5,1946.650024,2017,BAJAJFINSV.BO,6.185544,1937.594372,1896.865637,75.55257,301.805853,60.999199,1915.625,1912.0275,365.729838
4,2024-09-26 09:15:00+05:30,7619.350098,7626.700195,7606.549805,7609.450195,811,BAJFINANCE.BO,5.184029,7634.889583,7581.715398,52.200986,51.249181,2.922274,7608.25166,7603.052515,365.729838


In [107]:
# train = data_split(df, TRAIN_START_DATE,TRAIN_END_DATE)
# trade = data_split(df, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

69240
69240


### 5. Construction of Trading Environment

In [35]:
from TradingEnv import StockTradingEnv

In [37]:
from finrl.config import INDICATORS

In [38]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [104]:
train.head()

Unnamed: 0,date,tic,open,high,low,close,volume,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-08-26 09:15:00+05:30,3160.0,3164.949951,3144.600098,3144.600098,1149,ASIANPAINT.BO,-3.100598,3168.335605,3147.54938,38.554848,-185.609583,15.263966,3158.422405,3161.633048,1018.954971
0,2024-08-26 09:15:00+05:30,1169.400024,1172.5,1166.949951,1172.099976,14169,AXISBANK.BO,-0.461603,1171.318389,1164.286616,55.705405,89.148906,13.97506,1168.573336,1169.387286,1018.954971
0,2024-08-26 09:15:00+05:30,10390.40039,10492.0,10336.59961,10492.0,968,BAJAJ-AUTO.BO,28.281197,10451.07382,10326.4713,69.437647,203.898645,51.19603,10373.01504,10312.92754,1018.954971
0,2024-08-26 09:15:00+05:30,1650.0,1651.5,1642.949951,1643.900024,1830,BAJAJFINSV.BO,2.261813,1643.801025,1630.268957,66.122814,242.847447,70.50437,1635.731661,1634.464164,1018.954971
0,2024-08-26 09:15:00+05:30,6758.799805,6774.0,6753.299805,6768.25,440,BAJFINANCE.BO,-0.942347,6767.529586,6727.705326,59.941815,197.308194,39.900623,6748.049967,6742.945833,1018.954971


In [39]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "min_portfolio_value": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [40]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [41]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = False
if_using_ppo = False
if_using_td3 = False
if_using_sac = False

In [42]:
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from stable_baselines3.common.logger import configure
check_and_make_directories([TRAINED_MODEL_DIR])

## A2C

In [43]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to results/a2c


In [44]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

-----------------------------------------
| time/                 |               |
|    fps                | 90            |
|    iterations         | 100           |
|    time_elapsed       | 5             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -43.3         |
|    explained_variance | -0.745        |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -11.1         |
|    reward             | -0.0064009377 |
|    std                | 1.02          |
|    value_loss         | 0.0675        |
-----------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 91          |
|    iterations         | 200         |
|    time_elapsed       | 10          |
|    total_timesteps    | 1000        |
| train/                |             |
|    entropy_loss       | -43.7       |
|    exp

In [52]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c_intraday") if if_using_a2c else None

### Testing A2C

In [59]:
train.head()

Unnamed: 0,date,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2024-08-16 09:15:00+05:30,3056.0,3056.0,3026.600098,3028.350098,323,ASIANPAINT.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,3028.350098,3028.350098,0.0
0,2024-08-16 09:15:00+05:30,1159.599976,1163.550049,1156.75,1162.949951,4737,AXISBANK.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1162.949951,1162.949951,0.0
0,2024-08-16 09:15:00+05:30,9770.0,9775.299805,9740.0,9770.0,105,BAJAJ-AUTO.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,9770.0,9770.0,0.0
0,2024-08-16 09:15:00+05:30,1535.900024,1539.900024,1535.199951,1538.400024,1689,BAJAJFINSV.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,1538.400024,1538.400024,0.0
0,2024-08-16 09:15:00+05:30,6509.299805,6509.299805,6472.450195,6487.700195,1716,BAJFINANCE.BO,0.0,3054.287317,3015.962683,100.0,66.666667,100.0,6487.700195,6487.700195,0.0


In [85]:
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3

trained_a2c = A2C.load("trained_models/agent_a2c_intraday") if if_using_a2c else None

In [87]:
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [88]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "min_portfolio_value": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [89]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

AttributeError: 'numpy.float64' object has no attribute 'values'

In [57]:
df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym) if if_using_a2c else (None, None)

ValueError: could not broadcast input array from shape (571,) into shape (301,)

In [38]:
df_account_value_a2c.tail()

Unnamed: 0,date,account_value
4,2024-10-01 09:15:00+05:30,1000879.0
5,2024-10-03 09:15:00+05:30,1000879.0
6,2024-10-04 09:15:00+05:30,1000879.0
7,2024-10-07 09:15:00+05:30,1000879.0
8,2024-10-08 09:15:00+05:30,1000879.0


## DDPG

In [39]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device


In [40]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

In [41]:
trained_ddpg.save("trained_models/agent_ddpg") if if_using_ddpg else None

In [42]:
df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym) if if_using_ddpg else (None, None)

In [43]:
df_account_value_ddpg.tail()

AttributeError: 'NoneType' object has no attribute 'tail'

In [37]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to results/ppo


In [38]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=200000) if if_using_ppo else None

----------------------------------
| time/              |           |
|    fps             | 87        |
|    iterations      | 1         |
|    time_elapsed    | 23        |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 4.0592203 |
----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 91          |
|    iterations           | 2           |
|    time_elapsed         | 44          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.018641617 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -42.7       |
|    explained_variance   | -0.0099     |
|    learning_rate        | 0.00025     |
|    loss                 | 16.8        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0225     |
|    reward  

In [39]:
trained_ppo.save("/agent_ppo") if if_using_ppo else None

In [None]:
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym) if if_using_ppo else (None, None)

In [None]:
df_account_value_ppo.tail()

In [None]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = RESULTS_DIR + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

In [None]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

In [None]:
trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_td3.tail()

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_td3.tail()

In [None]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

In [None]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

In [None]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None

In [None]:
df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
    model=trained_sac, 
    environment = e_trade_gym) if if_using_sac else (None, None)

In [None]:
df_account_value_sac.tail()