In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
%matplotlib inline
import datetime

from finrl import config
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split
from finrl.agents.stablebaselines3.models import DRLAgent

import sys
sys.path.append("../FinRL-Library")

import itertools

In [None]:
# company_symbols = [
#     'ADANIENT', 'ADANIPORTS', 'APOLLOHOSP', 'ASIANPAINT', 'AXISBANK',
#     'BAJAJ-AUTO', 'BAJFINANCE', 'BAJAJFINSV', 'BPCL', 'BHARTIARTL',
#     'BRITANNIA', 'CIPLA', 'COALINDIA', 'DIVISLAB', 'DRREDDY', 'EICHERMOT',
#     'GRASIM', 'HCLTECH', 'HDFCBANK', 'HDFCLIFE', 'HEROMOTOCO', 'HINDALCO',
#     'HINDUNILVR', 'ICICIBANK', 'INDUSINDBK', 'INFY', 'ITC', 'JSWSTEEL',
#     'KOTAKBANK', 'LT', 'LTIM', 'M&M', 'MARUTI', 'NESTLEIND', 'NTPC', 'ONGC',
#     'POWERGRID', 'RELIANCE', 'SBILIFE', 'SBIN', 'SUNPHARMA', 'TATAMOTORS',
#     'TATASTEEL', 'TCS', 'TATACONSUM', 'TECHM', 'TITAN', 'ULTRACEMCO', 'UPL',
#     'WIPRO'
# ]

# ns_company_symbols = [symbol + '.NS' for symbol in company_symbols]

# print(ns_company_symbols)
# symbols=ns_company_symbols

In [2]:
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2023-05-01'

In [3]:
# df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
#                                 end_date = TRADE_END_DATE,
#                                 ticker_list = symbols).fetch_data()

In [4]:
df_raw=pd.read_csv('datasets/BSE30.csv')

In [4]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105703 entries, 0 to 105702
Data columns (total 8 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   date    105703 non-null  object 
 1   open    105703 non-null  float64
 2   high    105703 non-null  float64
 3   low     105703 non-null  float64
 4   close   105703 non-null  float64
 5   volume  105703 non-null  int64  
 6   tic     105703 non-null  object 
 7   day     105703 non-null  int64  
dtypes: float64(4), int64(2), object(2)
memory usage: 6.5+ MB


In [5]:
from __future__ import annotations




from stockstats import StockDataFrame as Sdf



def load_dataset(*, file_name: str) -> pd.DataFrame:
    """
    load csv dataset from path
    :return: (df) pandas dataframe
    """
    # _data = pd.read_csv(f"{config.DATASET_DIR}/{file_name}")
    _data = pd.read_csv(file_name)
    return _data


def data_split(df, start, end, target_date_col="date"):
    """
    split the dataset into training or testing using date
    :param data: (df) pandas dataframe, start, end
    :return: (df) pandas dataframe
    """
    data = df[(df[target_date_col] >= start) & (df[target_date_col] < end)]
    data = data.sort_values([target_date_col, "tic"], ignore_index=True)
    data.index = data[target_date_col].factorize()[0]
    return data


def convert_to_datetime(time):
    time_fmt = "%Y-%m-%dT%H:%M:%S"
    if isinstance(time, str):
        return datetime.datetime.strptime(time, time_fmt)


class FeatureEngineer:
    """Provides methods for preprocessing the stock price data

    Attributes
    ----------
        use_technical_indicator : boolean
            we technical indicator or not
        tech_indicator_list : list
            a list of technical indicator names (modified from neofinrl_config.py)
        use_turbulence : boolean
            use turbulence index or not
        user_defined_feature:boolean
            use user defined features or not

    Methods
    -------
    preprocess_data()
        main method to do the feature engineering

    """

    def __init__(
        self,
        use_technical_indicator=True,
        tech_indicator_list=config.INDICATORS,
        use_vix=False,
        use_turbulence=False,
        user_defined_feature=False,
    ):
        self.use_technical_indicator = use_technical_indicator
        self.tech_indicator_list = tech_indicator_list
        self.use_vix = use_vix
        self.use_turbulence = use_turbulence
        self.user_defined_feature = user_defined_feature

    def preprocess_data(self, df):
        """main method to do the feature engineering
        @:param config: source dataframe
        @:return: a DataMatrices object
        """
        # clean data
        df = self.clean_data(df)

        # add technical indicators using stockstats
        if self.use_technical_indicator:
            df = self.add_technical_indicator(df)
            print("Successfully added technical indicators")

        # add vix for multiple stock
        if self.use_vix:
            df = self.add_vix(df)
            print("Successfully added vix")

        # add turbulence index for multiple stock
        if self.use_turbulence:
            df = self.add_turbulence(df)
            print("Successfully added turbulence index")

        # add user defined feature
        if self.user_defined_feature:
            df = self.add_user_defined_feature(df)
            print("Successfully added user defined features")

        # fill the missing values at the beginning and the end
        df = df.ffill().bfill()
        return df

    def clean_data(self, data):
        """
        clean the raw data
        deal with missing values
        reasons: stocks could be delisted, not incorporated at the time step
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        df = df.sort_values(["date", "tic"], ignore_index=True)
        df.index = df.date.factorize()[0]
        merged_closes = df.pivot_table(index="date", columns="tic", values="close")
        merged_closes = merged_closes.fillna(merged_closes.mean())
        # merged_closes = merged_closes.fillna(merged_closes.mean())
        tics = merged_closes.columns
        df = df[df.tic.isin(tics)]
        
        # df = data.copy()
        # list_ticker = df["tic"].unique().tolist()
        # # only apply to daily level data, need to fix for minute level
        # list_date = list(pd.date_range(df['date'].min(),df['date'].max()).astype(str))
        # combination = list(itertools.product(list_date,list_ticker))

        # df_full = pd.DataFrame(combination,columns=["date","tic"]).merge(df,on=["date","tic"],how="left")
        # df_full = df_full[df_full['date'].isin(df['date'])]
        # df_full = df_full.sort_values(['date','tic'])
        # df_full = df_full.fillna(0)
        return df

    def add_technical_indicator(self, data):
        """
        calculate technical indicators
        use stockstats package to add technical inidactors
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        df = df.sort_values(by=["tic", "date"])
        stock = Sdf.retype(df.copy())
        unique_ticker = stock.tic.unique()

        for indicator in self.tech_indicator_list:
            indicator_df = pd.DataFrame()
            for i in range(len(unique_ticker)):
                try:
                    temp_indicator = stock[stock.tic == unique_ticker[i]][indicator]
                    temp_indicator = pd.DataFrame(temp_indicator)
                    temp_indicator["tic"] = unique_ticker[i]
                    temp_indicator["date"] = df[df.tic == unique_ticker[i]][
                        "date"
                    ].to_list()
                    # indicator_df = indicator_df.append(
                    #     temp_indicator, ignore_index=True
                    # )
                    indicator_df = pd.concat(
                        [indicator_df, temp_indicator], axis=0, ignore_index=True
                    )
                except Exception as e:
                    print(e)
            df = df.merge(
                indicator_df[["tic", "date", indicator]], on=["tic", "date"], how="left"
            )
        df = df.sort_values(by=["date", "tic"])
        return df
        # df = data.set_index(['date','tic']).sort_index()
        # df = df.join(df.groupby(level=0, group_keys=False).apply(lambda x, y: Sdf.retype(x)[y], y=self.tech_indicator_list))
        # return df.reset_index()

    def add_user_defined_feature(self, data):
        """
         add user defined features
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        df["daily_return"] = df.close.pct_change(1)
        # df['return_lag_1']=df.close.pct_change(2)
        # df['return_lag_2']=df.close.pct_change(3)
        # df['return_lag_3']=df.close.pct_change(4)
        # df['return_lag_4']=df.close.pct_change(5)
        return df

    def add_vix(self, data):
        """
        add vix from yahoo finance
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        df_vix = YahooDownloader(
            start_date=df.date.min(), end_date=df.date.max(), ticker_list=["^VIX"]
        ).fetch_data()
        vix = df_vix[["date", "close"]]
        vix.columns = ["date", "vix"]

        df = df.merge(vix, on="date")
        df = df.sort_values(["date", "tic"]).reset_index(drop=True)
        return df

    def add_turbulence(self, data):
        """
        add turbulence index from a precalcualted dataframe
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        turbulence_index = self.calculate_turbulence(df)
        df = df.merge(turbulence_index, on="date")
        df = df.sort_values(["date", "tic"]).reset_index(drop=True)
        return df

    def calculate_turbulence(self, data):
        """calculate turbulence index based on dow 30"""
        # can add other market assets
        df = data.copy()
        df_price_pivot = df.pivot(index="date", columns="tic", values="close")
        # use returns to calculate turbulence
        df_price_pivot = df_price_pivot.pct_change()

        unique_date = df.date.unique()
        # start after a year
        start = 252
        turbulence_index = [0] * start
        # turbulence_index = [0]
        count = 0
        for i in range(start, len(unique_date)):
            current_price = df_price_pivot[df_price_pivot.index == unique_date[i]]
            # use one year rolling window to calcualte covariance
            hist_price = df_price_pivot[
                (df_price_pivot.index < unique_date[i])
                & (df_price_pivot.index >= unique_date[i - 252])
            ]
            # Drop tickers which has number missing values more than the "oldest" ticker
            filtered_hist_price = hist_price.iloc[
                hist_price.isna().sum().min() :
            ].dropna(axis=1)

            cov_temp = filtered_hist_price.cov()
            current_temp = current_price[[x for x in filtered_hist_price]] - np.mean(
                filtered_hist_price, axis=0
            )
            # cov_temp = hist_price.cov()
            # current_temp=(current_price - np.mean(hist_price,axis=0))

            temp = current_temp.values.dot(np.linalg.pinv(cov_temp)).dot(
                current_temp.values.T
            )
            if temp > 0:
                count += 1
                if count > 2:
                    turbulence_temp = temp[0][0]
                else:
                    # avoid large outlier because of the calculation just begins
                    turbulence_temp = 0
            else:
                turbulence_temp = 0
            turbulence_index.append(turbulence_temp)
        try:
            turbulence_index = pd.DataFrame(
                {"date": df_price_pivot.index, "turbulence": turbulence_index}
            )
        except ValueError:
            raise Exception("Turbulence information could not be added.")
        return turbulence_index

In [6]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-01-02,90.75,90.75,88.550003,48.861801,19140,ASIANPAINT.BO,4
1,2009-01-02,105.800003,109.599998,103.459999,71.914917,4536215,AXISBANK.BO,4
2,2009-01-02,206.050003,210.5,196.5,158.413025,52648,BAJAJ-AUTO.BO,4
3,2009-01-02,15.14,15.8,14.975,13.401811,136590,BAJAJFINSV.BO,4
4,2009-01-02,6.66,6.97,6.35,2.746401,274220,BAJFINANCE.BO,4


In [7]:
from finrl.config import INDICATORS
fe = FeatureEngineer(use_technical_indicator=True,
                      tech_indicator_list = INDICATORS,
                      use_vix=False,
                      use_turbulence=True,
                      user_defined_feature = False)

processed = fe.preprocess_data(df_raw)

Successfully added technical indicators


  df_price_pivot = df_price_pivot.pct_change()


Successfully added turbulence index


In [8]:
processed

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2009-01-02,90.750000,90.750000,88.550003,48.861801,19140,ASIANPAINT.BO,4,0.000000,50.523346,48.068260,100.000000,66.666667,100.000000,48.861801,48.861801,0.000000
1,2009-01-02,105.800003,109.599998,103.459999,71.914917,4536215,AXISBANK.BO,4,0.000000,50.523346,48.068260,100.000000,66.666667,100.000000,71.914917,71.914917,0.000000
2,2009-01-02,206.050003,210.500000,196.500000,158.413025,52648,BAJAJ-AUTO.BO,4,0.000000,50.523346,48.068260,100.000000,66.666667,100.000000,158.413025,158.413025,0.000000
3,2009-01-02,15.140000,15.800000,14.975000,13.401811,136590,BAJAJFINSV.BO,4,0.000000,50.523346,48.068260,100.000000,66.666667,100.000000,13.401811,13.401811,0.000000
4,2009-01-02,6.660000,6.970000,6.350000,2.746401,274220,BAJFINANCE.BO,4,0.000000,50.523346,48.068260,100.000000,66.666667,100.000000,2.746401,2.746401,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105698,2023-04-28,981.000000,992.500000,979.250000,986.799988,26056,SUNPHARMA.BO,4,-0.263414,1019.314408,965.265603,50.085294,14.481255,1.567920,983.446670,985.046100,43.069415
105699,2023-04-28,3208.000000,3227.199951,3197.149902,3175.769043,51644,TCS.BO,4,-15.398183,3235.633708,3045.249324,48.649310,67.966063,0.407494,3131.238102,3257.234477,43.069415
105700,2023-04-28,983.000000,1026.650024,982.950012,986.955139,279514,TECHM.BO,4,-22.941437,1102.074200,929.293964,44.970681,-99.119890,22.233939,1033.226742,1032.633037,43.069415
105701,2023-04-28,2663.500000,2679.300049,2620.050049,2640.399902,32742,TITAN.BO,4,43.161331,2669.834479,2492.325506,60.306098,116.653875,37.463255,2542.391650,2482.099988,43.069415


In [9]:
df=processed

In [10]:
list_ticker = df["tic"].unique().tolist()
# only apply to daily level data, need to fix for minute level
list_date = list(pd.date_range(df['date'].min(),df['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

df_full = pd.DataFrame(combination,columns=["date","tic"]).merge(df,on=["date","tic"],how="left")
df_full = df_full[df_full['date'].isin(df['date'])]
df_full = df_full.sort_values(['date','tic'])
df_full = df_full.fillna(1)

In [11]:
df_full.info()

<class 'pandas.core.frame.DataFrame'>
Index: 105900 entries, 0 to 156899
Data columns (total 17 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   date          105900 non-null  object 
 1   tic           105900 non-null  object 
 2   open          105900 non-null  float64
 3   high          105900 non-null  float64
 4   low           105900 non-null  float64
 5   close         105900 non-null  float64
 6   volume        105900 non-null  float64
 7   day           105900 non-null  float64
 8   macd          105900 non-null  float64
 9   boll_ub       105900 non-null  float64
 10  boll_lb       105900 non-null  float64
 11  rsi_30        105900 non-null  float64
 12  cci_30        105900 non-null  float64
 13  dx_30         105900 non-null  float64
 14  close_30_sma  105900 non-null  float64
 15  close_60_sma  105900 non-null  float64
 16  turbulence    105900 non-null  float64
dtypes: float64(15), object(2)
memory usage: 14.5+ MB


In [12]:
df=df_full

In [13]:
df.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2009-01-02,ASIANPAINT.BO,90.75,90.75,88.550003,48.861801,19140.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,48.861801,48.861801,0.0
1,2009-01-02,AXISBANK.BO,105.800003,109.599998,103.459999,71.914917,4536215.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,71.914917,71.914917,0.0
2,2009-01-02,BAJAJ-AUTO.BO,206.050003,210.5,196.5,158.413025,52648.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,158.413025,158.413025,0.0
3,2009-01-02,BAJAJFINSV.BO,15.14,15.8,14.975,13.401811,136590.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,13.401811,13.401811,0.0
4,2009-01-02,BAJFINANCE.BO,6.66,6.97,6.35,2.746401,274220.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,2.746401,2.746401,0.0


In [14]:
df.shape

(105900, 17)

In [16]:
df.to_csv("main_processed.csv")

In [17]:
df.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2009-01-02,ASIANPAINT.BO,90.75,90.75,88.550003,48.861801,19140.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,48.861801,48.861801,0.0
1,2009-01-02,AXISBANK.BO,105.800003,109.599998,103.459999,71.914917,4536215.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,71.914917,71.914917,0.0
2,2009-01-02,BAJAJ-AUTO.BO,206.050003,210.5,196.5,158.413025,52648.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,158.413025,158.413025,0.0
3,2009-01-02,BAJAJFINSV.BO,15.14,15.8,14.975,13.401811,136590.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,13.401811,13.401811,0.0
4,2009-01-02,BAJFINANCE.BO,6.66,6.97,6.35,2.746401,274220.0,4.0,0.0,50.523346,48.06826,100.0,66.666667,100.0,2.746401,2.746401,0.0


In [16]:

from gym.utils import seeding
import gym
from gym import spaces

import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv

In [17]:





class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""

    metadata = {"render.modes": ["human"]}

    def __init__(
        self,
        df,
        stock_dim,
        hmax,
        initial_amount,
        buy_cost_pct,
        sell_cost_pct,
        reward_scaling,
        state_space,
        action_space,
        tech_indicator_list,
        turbulence_threshold=None,
        make_plots=False,
        print_verbosity=2,
        day=0,
        initial=True,
        previous_state=[],
        model_name="",
        mode="",
        iteration="",
        initial_buy=False,  # Use half of initial amount to buy
        hundred_each_trade=True,  # The number of shares per lot must be an integer multiple of 100
    ):
        self.day = day
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list
        self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,))
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.state_space,)
        )
        self.data = self.df.loc[self.day, :]
        self.terminal = False
        self.make_plots = make_plots
        self.print_verbosity = print_verbosity
        self.turbulence_threshold = turbulence_threshold
        self.initial = initial
        self.previous_state = previous_state
        self.model_name = model_name
        self.mode = mode
        self.iteration = iteration
        # initalize state
        self.initial_buy = initial_buy
        self.hundred_each_trade = hundred_each_trade
        self.state = self._initiate_state()

        # initialize reward
        self.reward = 0
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.episode = 0
        # memorize all the total balance change
        self.portfolio_memory = []
        self.actions_memory = []
        self.date_memory = [self._get_date()]
        self._seed()

    def _sell_stock(self, index, action):
        def _do_sell_normal():
            if self.state[index + 1] > 0:
                # Sell only if the price is > 0 (no missing data in this particular date)
                # perform sell action based on the sign of the action
                if self.state[index + self.stock_dim + 1] > 0:
                    # Sell only if current asset is > 0
                    sell_num_shares = min(
                        abs(action), self.state[index + self.stock_dim + 1]
                    )
                    if self.hundred_each_trade:
                        sell_num_shares = sell_num_shares // 100 * 100

                    sell_amount = self.state[index + 1] * sell_num_shares
                    cost_amount = sell_amount * self.sell_cost_pct
                    self.state[0] += sell_amount - cost_amount
                    self.state[index + self.stock_dim + 1] -= sell_num_shares
                    self.cost += cost_amount
                    self.trades += 1
                else:
                    sell_num_shares = 0
            else:
                sell_num_shares = 0

            return sell_num_shares

        # perform sell action based on the sign of the action
        if self.turbulence_threshold is not None:
            if self.turbulence >= self.turbulence_threshold:
                if self.state[index + 1] > 0:
                    # Sell only if the price is > 0 (no missing data in this particular date)
                    # if turbulence goes over threshold, just clear out all positions
                    if self.state[index + self.stock_dim + 1] > 0:
                        # Sell only if current asset is > 0
                        sell_num_shares = self.state[index + self.stock_dim + 1]
                        sell_amount = self.state[index + 1] * sell_num_shares
                        cost_amount = sell_amount * self.sell_cost_pct

                        self.state[0] += sell_amount - cost_amount

                        self.state[index + self.stock_dim + 1] = 0
                        self.cost += cost_amount
                        self.trades += 1
                    else:
                        sell_num_shares = 0
                else:
                    sell_num_shares = 0
            else:
                sell_num_shares = _do_sell_normal()
        else:
            sell_num_shares = _do_sell_normal()

        return sell_num_shares

    def _buy_stock(self, index, action):
        def _do_buy():
            if self.state[index + 1] > 0:
                # Buy only if the price is > 0 (no missing data in this particular date)
                available_amount = self.state[0] // self.state[index + 1]

                # update balance
                buy_num_shares = min(available_amount, action)
                if self.hundred_each_trade:
                    buy_num_shares = buy_num_shares // 100 * 100

                if buy_num_shares > 0:
                    buy_amount = self.state[index + 1] * buy_num_shares
                    cost_amount = buy_amount * self.buy_cost_pct

                    self.state[0] -= buy_amount + cost_amount

                    self.state[index + self.stock_dim + 1] += buy_num_shares

                    self.cost += cost_amount
                    self.trades += 1
                else:
                    buy_num_shares = 0
            else:
                buy_num_shares = 0

            return buy_num_shares

        # perform buy action based on the sign of the action
        if self.turbulence_threshold is None:
            buy_num_shares = _do_buy()
        else:
            if self.turbulence < self.turbulence_threshold:
                buy_num_shares = _do_buy()
            else:
                buy_num_shares = 0
                pass

        return buy_num_shares

    def _make_plot(self):
        portfolio_df = self.get_portfolio_df()
        plt.plot(portfolio_df["date"], portfolio_df["total_asset"], color="r")
        plt.savefig(f"results/account_value_trade_{self.episode}.png")
        plt.close()

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique()) - 1
        if self.terminal:
            print(f"Episode: {self.episode}")
            if self.make_plots:
                self._make_plot()

            portfolio_df = self.get_portfolio_df()
            begin_total_asset = portfolio_df["prev_total_asset"].iloc[0]
            end_total_asset = portfolio_df["total_asset"].iloc[-1]
            tot_reward = end_total_asset - begin_total_asset

            portfolio_df["daily_return"] = portfolio_df["total_asset"].pct_change(1)

            sharpe = None
            if portfolio_df["daily_return"].std() != 0:
                sharpe = (
                    (252**0.5)
                    * portfolio_df["daily_return"].mean()
                    / portfolio_df["daily_return"].std()
                )

            if self.episode % self.print_verbosity == 0:
                print(f"day: {self.day}, episode: {self.episode}")
                print(f"begin_total_asset: {begin_total_asset:0.2f}")
                print(f"end_total_asset: {end_total_asset:0.2f}")
                print(f"total_reward: {tot_reward:0.2f}")
                print(f"total_cost: {self.cost:0.2f}")
                print(f"total_trades: {self.trades}")
                if sharpe is not None:
                    print(f"Sharpe: {sharpe:0.3f}")
                print("=================================")

            if (self.model_name != "") and (self.mode != ""):
                df_actions = self.save_action_memory()
                df_actions.to_csv(
                    f"results/actions_{self.mode}_{self.model_name}_{self.episode}.csv"
                )
                portfolio_df.to_csv(
                    f"results/portfolio_{self.mode}_{self.model_name}_{self.episode}.csv",
                    index=False,
                )

            # Add outputs to logger interface
            # logger.record(key="environment/portfolio_value", value=end_total_asset)
            # logger.record(key="environment/total_reward", value=tot_reward)
            # logger.record(key="environment/total_reward_pct", value=(tot_reward / (end_total_asset - tot_reward)) * 100)
            # logger.record(key="environment/total_cost", value=self.cost)
            # logger.record(key="environment/total_trades", value=self.trades)

            return self.state, self.reward, self.terminal, {}

        else:
            actions = actions * self.hmax  # actions initially is scaled between 0 to 1
            actions = actions.astype(
                int
            )  # convert into integer because we can't by fraction of shares
            if self.turbulence_threshold is not None:
                if self.turbulence >= self.turbulence_threshold:
                    actions = np.array([-self.hmax] * self.stock_dim)

            # calculate information before trading
            begin_cash = self.state[0]
            begin_market_value = self._get_market_value()
            begin_total_asset = begin_cash + begin_market_value
            begin_cost = self.cost
            begin_trades = self.trades
            begin_stock = self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]

            argsort_actions = np.argsort(actions)

            sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]]

            for index in sell_index:
                actions[index] = self._sell_stock(index, actions[index]) * (-1)

            for index in buy_index:
                actions[index] = self._buy_stock(index, actions[index])

            if self.turbulence_threshold is not None:
                self.turbulence = self.data["turbulence"].values[0]

            # calculate information after trading
            end_cash = self.state[0]
            end_market_value = self._get_market_value()
            end_total_asset = end_cash + end_market_value
            end_cost = self.cost
            end_trades = self.trades
            end_stock = self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]

            self.actions_memory.append(actions)

            i_list = []
            for i in range(self.stock_dim):
                if begin_stock[i] - end_stock[i] == 0:
                    i_list.append(i)

            self.reward = end_total_asset - begin_total_asset
            for i in i_list:
                self.reward -= (
                    self.state[i + 1] * self.state[self.stock_dim + 1 + i]
                ) * 0.001

            date = self._get_date()

            self.portfolio_memory.append(
                {
                    "date": date,
                    "prev_total_asset": begin_total_asset,
                    "prev_cash": begin_cash,
                    "prev_market_value": begin_market_value,
                    "total_asset": end_total_asset,
                    "cash": end_cash,
                    "market_value": end_market_value,
                    "cost": end_cost - begin_cost,
                    "trades": end_trades - begin_trades,
                    "reward": self.reward,
                }
            )
            self.date_memory.append(date)

            self.reward = self.reward * self.reward_scaling

            # update next state
            self.day += 1
            self.data = self.df.loc[self.day, :]
            self.state = self._update_state()

        return self.state, self.reward, self.terminal, {}

    def reset(self):
        # initiate state
        self.day = 0
        self.data = self.df.loc[self.day, :]

        self.state = self._initiate_state()
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False
        # self.iteration=self.iteration
        self.actions_memory = []
        self.date_memory = [self._get_date()]
        self.portfolio_memory = []

        self.episode += 1

        return self.state

    def render(self, mode="human", close=False):
        return self.state

    def _initiate_state(self):
        if self.initial:
            # For Initial State
            if len(self.df.tic.unique()) > 1:
                # for multiple stock
                state = (
                    [self.initial_amount]
                    + self.data.close.values.tolist()
                    + [0] * self.stock_dim
                    + sum(
                        [
                            self.data[tech].values.tolist()
                            for tech in self.tech_indicator_list
                        ],
                        [],
                    )
                )

                if self.initial_buy:
                    state = self.initial_buy_()
            else:
                # for single stock
                state = (
                    [self.initial_amount]
                    + [self.data.close]
                    + [0] * self.stock_dim
                    + sum(
                        [[self.data[tech]] for tech in self.tech_indicator_list],
                        [],
                    )
                )
        else:
            # Using Previous State
            if len(self.df.tic.unique()) > 1:
                # for multiple stock
                state = (
                    [self.previous_state[0]]
                    + self.data.close.values.tolist()
                    + self.previous_state[
                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
                    ]
                    + sum(
                        [
                            self.data[tech].values.tolist()
                            for tech in self.tech_indicator_list
                        ],
                        [],
                    )
                )
            else:
                # for single stock
                state = (
                    [self.previous_state[0]]
                    + [self.data.close]
                    + self.previous_state[
                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
                    ]
                    + sum(
                        [[self.data[tech]] for tech in self.tech_indicator_list],
                        [],
                    )
                )
        return state

    def _update_state(self):
        if len(self.df.tic.unique()) > 1:
            # for multiple stock
            state = (
                [self.state[0]]
                + self.data.close.values.tolist()
                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
                + sum(
                    [
                        self.data[tech].values.tolist()
                        for tech in self.tech_indicator_list
                    ],
                    [],
                )
            )

        else:
            # for single stock
            state = (
                [self.state[0]]
                + [self.data.close]
                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
                + sum(
                    [[self.data[tech]] for tech in self.tech_indicator_list],
                    [],
                )
            )

        return state

    def _get_date(self):
        if len(self.df.tic.unique()) > 1:
            date = self.data.date.unique()[0]
        else:
            date = self.data.date
        return date

    def get_portfolio_df(self):
        portfolio_df = pd.DataFrame(self.portfolio_memory)
        portfolio_df["date"] = pd.to_datetime(portfolio_df["date"])
        portfolio_df.sort_values("date", inplace=True)
        return portfolio_df[
            [
                "date",
                "prev_total_asset",
                "prev_cash",
                "prev_market_value",
                "total_asset",
                "cash",
                "market_value",
                "cost",
                "trades",
                "reward",
            ]
        ]

    def _get_total_asset(self):
        """
        get current total asset value
        """
        return self.state[0] + self._get_market_value()

    def _get_market_value(self):
        """
        get current market value
        """
        return sum(
            np.array(self.state[1 : (self.stock_dim + 1)])
            * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
        )

    def save_asset_memory(self):
        portfolio_df = self.get_portfolio_df()
        df_account_value = portfolio_df[["date", "total_asset"]].rename(
            columns={"total_asset": "account_value"}
        )
        return df_account_value

    def save_action_memory(self):
        if len(self.df.tic.unique()) > 1:
            # date and close price length must match actions length
            date_list = self.date_memory[:-1]
            df_date = pd.DataFrame(date_list)
            df_date.columns = ["date"]

            action_list = self.actions_memory
            df_actions = pd.DataFrame(action_list)
            df_actions.columns = self.data.tic.values
            df_actions.index = df_date.date
            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
        else:
            date_list = self.date_memory[:-1]
            action_list = self.actions_memory
            df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

    def initial_buy_(self):
        """Initialize the state, already bought some"""
        prices = self.data.close.values.tolist()
        # only use half of the initial amount
        market_values_each_tic = 0.5 * self.initial_amount // len(prices)
        buy_nums_each_tic = [int(market_values_each_tic // p) for p in prices]
        if self.hundred_each_trade:
            buy_nums_each_tic = buy_nums_each_tic // 100 * 100


        buy_amount = sum(np.array(prices) * np.array(buy_nums_each_tic))

        state = (
            [self.initial_amount - buy_amount]
            + prices
            + buy_nums_each_tic
            + sum(
                [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
                [],
            )
        )

        return state

In [18]:
train = data_split(df, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(df, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))
     

77550
21120


In [19]:
train.to_csv('train_data.csv')
trade.to_csv('trade_data.csv')

In [20]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [21]:
# from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "stock_dim": stock_dimension,
    "hmax": 1000, 
    "initial_amount": 100, 
    "buy_cost_pct":6.87e-5,
    "sell_cost_pct":1.0687e-3,
    "reward_scaling": 1e-4,
    "state_space": state_space, 
    "action_space": stock_dimension,
    "tech_indicator_list": config.INDICATORS, 
    "print_verbosity": 1,
    "initial_buy":True,
    "hundred_each_trade":False
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [22]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>




In [23]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

In [24]:
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from stable_baselines3.common.logger import configure
check_and_make_directories([TRAINED_MODEL_DIR])

In [None]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

In [None]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

In [None]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

In [31]:
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3

# trained_a2c = A2C.load(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

## ddpg

In [25]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [26]:
agent = DRLAgent(env = env_train)
DDPG_PARAMS = {
                "batch_size": 256, 
               "buffer_size": 50000, 
               "learning_rate": 0.0005,
               "action_noise":"normal",
                }
POLICY_KWARGS = dict(net_arch=dict(pi=[64, 64], qf=[400, 300]))
model_ddpg = agent.get_model("ddpg", model_kwargs = DDPG_PARAMS, policy_kwargs=POLICY_KWARGS)

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 256, 'buffer_size': 50000, 'learning_rate': 0.0005, 'action_noise': NormalActionNoise(mu=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.], sigma=[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1])}
Using cpu device
Logging to results/ddpg


In [47]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

KeyboardInterrupt: 

In [None]:
trained_ddpg.save("agent_ddpg") if if_using_ddpg else None


In [35]:
env_kwargs = {
    "stock_dim": stock_dimension,
    "hmax": 1000, 
    "initial_amount": 10000, 
    "buy_cost_pct":6.87e-5,
    "sell_cost_pct":1.0687e-3,
    "reward_scaling": 1e-4,
    "state_space": state_space, 
    "action_space": stock_dimension,
    "tech_indicator_list": config.INDICATORS, 
    "print_verbosity": 1,
    
    
}


In [36]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()



In [37]:
trained_ddpg=DDPG.load("agent_ddpg")

In [38]:
df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym) if if_using_ddpg else (None, None)

Episode: 3
day: 703, episode: 3
begin_total_asset: 10000.00
end_total_asset: 37865865.49
total_reward: 37855865.49
total_cost: 531796.00
total_trades: 500
Sharpe: 0.851
hit end!


In [39]:
df_account_value_ddpg.tail()

Unnamed: 0,date,account_value
698,2023-04-21,37489600.0
699,2023-04-24,37484480.0
700,2023-04-25,37551060.0
701,2023-04-26,37632930.0
702,2023-04-27,37865870.0


## PPO

In [40]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to results/ppo


In [None]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=200000) if if_using_ppo else None

-------------------------------------
| time/              |              |
|    fps             | 77           |
|    iterations      | 1            |
|    time_elapsed    | 26           |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.012801195 |
-------------------------------------
Episode: 3
day: 2584, episode: 3
begin_total_asset: 100.00
end_total_asset: 220926.77
total_reward: 220826.77
total_cost: 52131.21
total_trades: 7047
Sharpe: 0.513
------------------------------------------
| time/                   |              |
|    fps                  | 75           |
|    iterations           | 2            |
|    time_elapsed         | 53           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.013154267  |
|    clip_fraction        | 0.173        |
|    clip_range           | 0.2          |
|    entropy_loss         | -42.6        |
|    explained_varian

------------------------------------------
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 10           |
|    time_elapsed         | 280          |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 0.020195201  |
|    clip_fraction        | 0.231        |
|    clip_range           | 0.2          |
|    entropy_loss         | -43.1        |
|    explained_variance   | 0.982        |
|    learning_rate        | 0.00025      |
|    loss                 | -0.398       |
|    n_updates            | 90           |
|    policy_gradient_loss | -0.00905     |
|    reward               | -0.034887977 |
|    std                  | 1.02         |
|    value_loss           | 0.0705       |
------------------------------------------
Episode: 10
day: 2584, episode: 10
begin_total_asset: 100.00
end_total_asset: 896395.30
total_reward: 896295.30
total_cost: 188509.23
total_

Episode: 16
day: 2584, episode: 16
begin_total_asset: 100.00
end_total_asset: 15920176.39
total_reward: 15920076.39
total_cost: 2052230.08
total_trades: 11486
Sharpe: 0.439
--------------------------------------------
| time/                   |                |
|    fps                  | 72             |
|    iterations           | 18             |
|    time_elapsed         | 505            |
|    total_timesteps      | 36864          |
| train/                  |                |
|    approx_kl            | 0.018419063    |
|    clip_fraction        | 0.155          |
|    clip_range           | 0.2            |
|    entropy_loss         | -43.8          |
|    explained_variance   | 0.979          |
|    learning_rate        | 0.00025        |
|    loss                 | -0.445         |
|    n_updates            | 170            |
|    policy_gradient_loss | -0.00805       |
|    reward               | -0.00014046287 |
|    std                  | 1.04           |
|    value_loss  

Episode: 22
day: 2584, episode: 22
begin_total_asset: 100.00
end_total_asset: 29235.51
total_reward: 29135.51
total_cost: 28580.80
total_trades: 5589
Sharpe: 0.472
--------------------------------------------
| time/                   |                |
|    fps                  | 70             |
|    iterations           | 26             |
|    time_elapsed         | 753            |
|    total_timesteps      | 53248          |
| train/                  |                |
|    approx_kl            | 0.010851881    |
|    clip_fraction        | 0.156          |
|    clip_range           | 0.2            |
|    entropy_loss         | -44.1          |
|    explained_variance   | 0.964          |
|    learning_rate        | 0.00025        |
|    loss                 | -0.275         |
|    n_updates            | 250            |
|    policy_gradient_loss | -0.0139        |
|    reward               | -0.00020357504 |
|    std                  | 1.05           |
|    value_loss           

-------------------------------------------
| time/                   |               |
|    fps                  | 71            |
|    iterations           | 34            |
|    time_elapsed         | 980           |
|    total_timesteps      | 69632         |
| train/                  |               |
|    approx_kl            | 0.017162492   |
|    clip_fraction        | 0.189         |
|    clip_range           | 0.2           |
|    entropy_loss         | -44.7         |
|    explained_variance   | 0.941         |
|    learning_rate        | 0.00025       |
|    loss                 | -0.437        |
|    n_updates            | 330           |
|    policy_gradient_loss | -0.0126       |
|    reward               | -0.0065130456 |
|    std                  | 1.08          |
|    value_loss           | 0.149         |
-------------------------------------------
Episode: 29
day: 2584, episode: 29
begin_total_asset: 100.00
end_total_asset: 271828.76
total_reward: 271728.76
total_co

Episode: 35
day: 2584, episode: 35
begin_total_asset: 100.00
end_total_asset: 46829.96
total_reward: 46729.96
total_cost: 23409.65
total_trades: 5632
Sharpe: 0.459
--------------------------------------------
| time/                   |                |
|    fps                  | 71             |
|    iterations           | 42             |
|    time_elapsed         | 1204           |
|    total_timesteps      | 86016          |
| train/                  |                |
|    approx_kl            | 0.0104141645   |
|    clip_fraction        | 0.115          |
|    clip_range           | 0.2            |
|    entropy_loss         | -45.2          |
|    explained_variance   | 0.958          |
|    learning_rate        | 0.00025        |
|    loss                 | -0.347         |
|    n_updates            | 410            |
|    policy_gradient_loss | -0.0139        |
|    reward               | -0.00038228836 |
|    std                  | 1.09           |
|    value_loss           

Episode: 41
day: 2584, episode: 41
begin_total_asset: 100.00
end_total_asset: 42400.00
total_reward: 42300.00
total_cost: 16291.98
total_trades: 5922
Sharpe: 0.544
--------------------------------------------
| time/                   |                |
|    fps                  | 71             |
|    iterations           | 50             |
|    time_elapsed         | 1428           |
|    total_timesteps      | 102400         |
| train/                  |                |
|    approx_kl            | 0.015066218    |
|    clip_fraction        | 0.192          |
|    clip_range           | 0.2            |
|    entropy_loss         | -45.7          |
|    explained_variance   | 0.97           |
|    learning_rate        | 0.00025        |
|    loss                 | -0.163         |
|    n_updates            | 490            |
|    policy_gradient_loss | -0.0145        |
|    reward               | -0.00015870566 |
|    std                  | 1.11           |
|    value_loss           

------------------------------------------
| time/                   |              |
|    fps                  | 71           |
|    iterations           | 58           |
|    time_elapsed         | 1654         |
|    total_timesteps      | 118784       |
| train/                  |              |
|    approx_kl            | 0.0071790265 |
|    clip_fraction        | 0.0609       |
|    clip_range           | 0.2          |
|    entropy_loss         | -46.2        |
|    explained_variance   | 0.716        |
|    learning_rate        | 0.00025      |
|    loss                 | 0.557        |
|    n_updates            | 570          |
|    policy_gradient_loss | -0.00528     |
|    reward               | -0.005816503 |
|    std                  | 1.13         |
|    value_loss           | 5.55         |
------------------------------------------
Episode: 48
day: 2584, episode: 48
begin_total_asset: 100.00
end_total_asset: 1435262.55
total_reward: 1435162.55
total_cost: 28173.25
total

Episode: 54
day: 2584, episode: 54
begin_total_asset: 100.00
end_total_asset: 1333432.42
total_reward: 1333332.42
total_cost: 92952.96
total_trades: 5867
Sharpe: 0.432
--------------------------------------------
| time/                   |                |
|    fps                  | 71             |
|    iterations           | 66             |
|    time_elapsed         | 1879           |
|    total_timesteps      | 135168         |
| train/                  |                |
|    approx_kl            | 0.010111347    |
|    clip_fraction        | 0.11           |
|    clip_range           | 0.2            |
|    entropy_loss         | -46.6          |
|    explained_variance   | 0.944          |
|    learning_rate        | 0.00025        |
|    loss                 | -0.441         |
|    n_updates            | 650            |
|    policy_gradient_loss | -0.011         |
|    reward               | -0.00065063185 |
|    std                  | 1.15           |
|    value_loss       

In [None]:
trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo") if if_using_ppo else None

In [None]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [None]:
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym) if if_using_ppo else (None, None)

In [None]:
df_account_value_ppo.tail()

## TD3


In [None]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = RESULTS_DIR + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

In [None]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

In [None]:
trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None

In [None]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [None]:
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym) if if_using_td3 else (None, None)

In [None]:
df_account_value_td3.tail()

## SAC

In [None]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

In [None]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

In [None]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None

In [None]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

In [None]:
df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
    model=trained_sac, 
    environment = e_trade_gym) if if_using_sac else (None, None)

In [None]:
df_account_value_sac.tail()