In [1]:
import datetime
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
%matplotlib inline
from dateutil.relativedelta import relativedelta

from finrl.apps import config
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline, get_baseline_tdx,convert_daily_return_to_pyfolio_ts

  'Module "zipline.assets" not found; multipliers will not be applied'


In [2]:
from lutils.stock import LTdxHq

In [3]:
ddf = pd.read_pickle('d:/ddf.pkl').dropna()

In [4]:
ddf

Unnamed: 0,open,close,high,low,volume,amount,tic,date,day
1103,95.00,82.92,95.00,80.00,552103872.0,4.796717e+10,688981,2020-07-16,3
1104,79.00,77.06,84.90,75.00,219471584.0,1.738815e+10,688981,2020-07-17,4
1105,77.19,79.17,80.51,70.02,228583344.0,1.700523e+10,688981,2020-07-20,0
1106,78.30,78.63,82.89,77.77,161873264.0,1.297766e+10,688981,2020-07-21,1
1107,77.80,79.57,81.78,77.20,133942792.0,1.068249e+10,688981,2020-07-22,2
...,...,...,...,...,...,...,...,...,...
1467,1.15,1.14,1.16,1.12,41564420.0,4.722449e+07,600145,2022-01-12,2
1468,1.14,1.13,1.15,1.12,26707056.0,3.029393e+07,600145,2022-01-13,3
1469,1.08,1.07,1.10,1.07,68835016.0,7.400834e+07,600145,2022-01-14,4
1470,1.02,1.02,1.02,1.02,5638200.0,5.750964e+06,600145,2022-01-17,0


In [5]:
ddf = ddf.sort_values(['date','tic'], ignore_index=True)
ddf.index = ddf.date.factorize()[0]

In [6]:
dl = ddf.loc[-252:,:]
dd = dl.pivot_table(index = 'date',columns = 'tic', values = 'close').pct_change().dropna()
corr = dd.cov().corr()
# corr.style.background_gradient(cmap='coolwarm')

# c1 = corr.abs().unstack().sort_values(ascending = True)
# c1 = corr.unstack().sort_values(ascending = True)

In [7]:
# corr.style.background_gradient(cmap='coolwarm')

In [8]:
# corr.sum().sort_values()

In [9]:
stock_codes = list(corr.sum().sort_values().index.values[:6])

In [10]:
stock_codes

['000921', '600365', '000609', '601868', '002372', '001965']

In [11]:
# stock_codes = ['000921', '002032', '300406', '603789']
# stock_codes = ['000609', '000921', '001965', '002372']

In [12]:
ltdxhq = LTdxHq()

indexs = None
dfs = []
for code in stock_codes:
    df = ltdxhq.get_k_data_daily(code, start='2018-01-01') # 2014-01-01
    
    if indexs is None:
        indexs = df.index
    else:
        indexs = indexs.union(df.index)
    
#     df = df.assign(date = df.index)
#     df = df.assign(day = df.index.weekday)
#     df.date = df.date.dt.strftime('%Y-%m-%d')
    df = df.assign(tic = code)
#     df.index = range(df.shape[0])
    
    dfs.append(df)
    print('----------- over %s min: %s max: %s -----------' % (code, df.index.min(), df.index.max()))

for i, df in enumerate(dfs):
    df = df.reindex(indexs)
    df = df.assign(date = df.index)
    df = df.assign(day = df.index.weekday)
    df.index = range(df.shape[0])
    
    dfs[i] = df.ffill()
    
df = pd.concat(dfs)
# df.index = range(df.shape[0])

ltdxhq.close()

----------- over 000921 min: 2018-01-02 00:00:00 max: 2022-01-19 00:00:00 -----------
----------- over 600365 min: 2018-03-20 00:00:00 max: 2022-01-19 00:00:00 -----------
----------- over 000609 min: 2018-01-02 00:00:00 max: 2022-01-19 00:00:00 -----------
----------- over 601868 min: 2021-09-28 00:00:00 max: 2022-01-19 00:00:00 -----------
----------- over 002372 min: 2018-01-02 00:00:00 max: 2022-01-19 00:00:00 -----------
----------- over 001965 min: 2018-01-02 00:00:00 max: 2022-01-19 00:00:00 -----------


In [13]:
fe = FeatureEngineer(use_technical_indicator=True,
                     use_turbulence=False,
                     user_defined_feature = False)

df = fe.preprocess_data(df)

Successfully added technical indicators


In [14]:
# add covariance matrix as states
df=df.sort_values(['date','tic'], ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []

# look back is one year
lookback=252
for i in range(lookback, len(df.index.unique())):
    data_lookback = df.loc[i-lookback:i,:]
    price_lookback = data_lookback.pivot_table(index = 'date',columns = 'tic', values = 'close')
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values
    cov_list.append(covs)


df_cov = pd.DataFrame({'date':df.date.unique()[lookback:],'cov_list': cov_list,'return_list': return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date','tic']).reset_index(drop=True)

In [15]:
df['daily_variance'] = (df.high-df.low) / df.close

In [16]:
dl = df.loc[-252:, :]
dd = dl.pivot_table(index = 'date',columns = 'tic', values = 'close').pct_change().dropna()
dd.cov().corr().style.background_gradient(cmap='coolwarm')

tic,000609,000921,001965,002372
tic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
609,1.0,-0.229306,-0.229676,-0.292968
921,-0.229306,1.0,-0.396903,0.106719
1965,-0.229676,-0.396903,1.0,-0.769836
2372,-0.292968,0.106719,-0.769836,1.0


In [17]:
train = data_split(df, '2019-01-01','2022-01-01') # 2021-07-01 2022-01-01

In [18]:
train.head()

Unnamed: 0,open,close,high,low,volume,amount,tic,date,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,cov_list,return_list,daily_variance
0,5.27,5.29,5.32,5.25,5759311.0,30399482.0,609,2019-01-15,1,-0.08255,5.946131,4.840869,48.0639,-59.971539,0.911034,5.441,5.467333,"[[0.0009595613956845235, 0.0003124109814202052...",tic 000609 000921 001965 00...,0.013233
0,7.34,7.55,7.55,7.22,20004778.0,161089488.0,921,2019-01-15,1,0.171956,7.579382,5.955618,57.9551,171.516755,60.304291,6.799667,6.725167,"[[0.0009595613956845235, 0.0003124109814202052...",tic 000609 000921 001965 00...,0.043709
0,7.39,7.42,7.43,7.34,1422853.0,11541926.0,1965,2019-01-15,1,-0.0086,7.496838,7.139162,49.760481,27.822945,12.144795,7.363,7.4935,"[[0.0009595613956845235, 0.0003124109814202052...",tic 000609 000921 001965 00...,0.012129
0,11.7,12.04,12.08,11.68,4112392.0,64759288.0,2372,2019-01-15,1,0.160269,12.085685,10.869315,56.642725,165.038307,13.980941,11.421,11.001833,"[[0.0009595613956845235, 0.0003124109814202052...",tic 000609 000921 001965 00...,0.033223
1,5.28,5.3,5.59,5.16,13215401.0,70665424.0,609,2019-01-16,2,-0.076507,5.937906,4.832094,48.18694,-33.431536,13.031968,5.425,5.483167,"[[0.000959630829728534, 0.00031221297698058804...",tic 000609 000921 001965 00...,0.081132


In [19]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv


class StockPortfolioEnv(gym.Env):
    """A portfolio allocation environment for OpenAI gym

    Attributes
    ----------
        df: DataFrame
            input data
        stock_dim : int
            number of unique stocks
        hmax : int
            maximum number of shares to trade
        initial_amount : int
            start money
        transaction_cost_pct: float
            transaction cost percentage per trade
        reward_scaling: float
            scaling factor for reward, good for training
        state_space: int
            the dimension of input features
        action_space: int
            equals stock dimension
        tech_indicator_list: list
            a list of technical indicator names
        turbulence_threshold: int
            a threshold to control risk aversion
        day: int
            an increment number to control date

    Methods
    -------
    _sell_stock()
        perform sell action based on the sign of the action
    _buy_stock()
        perform buy action based on the sign of the action
    step()
        at each step the agent will return actions, then 
        we will calculate the reward, and return the next observation.
    reset()
        reset the environment
    render()
        use render to return other functions
    save_asset_memory()
        return account value at each time step
    save_action_memory()
        return actions/positions at each time step
        

    """
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 df,
                 stock_dim,
                 hmax,
                 initial_amount,
                 transaction_cost_pct,
                 reward_scaling,
                 state_space,
                 action_space,
                 tech_indicator_list,
                 turbulence_threshold=None,
                 lookback=252,
                 day=0):
        #super(StockEnv, self).__init__()
        #money = 10 , scope = 1
        self.day = day
        self.lookback=lookback
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
#         self.reward = initial_amount
        self.transaction_cost_pct =transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list

        # action_space normalization and shape is self.stock_dim
        self.action_space = spaces.Box(low=0, high=1,shape=(self.action_space,))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.state_space+len(self.tech_indicator_list),self.state_space))

        # load data from a pandas dataframe
        self.data = self.df.loc[self.day,:]
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
        self.terminal = False
        self.turbulence_threshold = turbulence_threshold
        # initalize state: inital portfolio return + individual stock return + individual weights
        self.portfolio_value = self.initial_amount

        # memorize portfolio value each step
        self.asset_memory = [self.initial_amount]
        # memorize portfolio return each step
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1 / self.stock_dim] * self.stock_dim]
        self.date_memory=[self.data.date.unique()[0]]

        
    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique()) - 1

        if self.terminal:
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ['daily_return']
#             plt.plot(df.daily_return.cumsum(),'r')
#             plt.savefig('results/cumulative_reward.png')
#             plt.close()
            
#             plt.plot(self.portfolio_return_memory,'r')
#             plt.savefig('results/rewards.png')
#             plt.close()

            print("=================================")
            print("begin_total_asset:{}".format(self.asset_memory[0]))
            print("end_total_asset:{}".format(self.portfolio_value))

            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
            df_daily_return.columns = ['daily_return']
            if df_daily_return['daily_return'].std() != 0:
                sharpe = (252 ** 0.5) * df_daily_return['daily_return'].mean() / df_daily_return['daily_return'].std()
                print("Sharpe: ", sharpe)
            print("=================================")
            
            return self.state, self.reward, self.terminal,{}

        else:
            weights = self.softmax_normalization(actions)
            self.actions_memory.append(weights)
            last_day_memory = self.data

            #load next state
            self.day += 1
            self.data = self.df.loc[self.day,:]
            self.covs = self.data['cov_list'].values[0]
            self.state = np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list], axis=0)
            portfolio_return = sum(((self.data.close.values / last_day_memory.close.values) - 1) * weights)
#             log_portfolio_return = np.log(sum((self.data.close.values / last_day_memory.close.values) * weights))
            # update portfolio value
            new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
#             new_portfolio_value = self.portfolio_value * (1 + log_portfolio_return)
            self.portfolio_value = new_portfolio_value

            # save into memory
            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data.date.unique()[0])
            self.asset_memory.append(new_portfolio_value)

            # the reward is the new portfolio value or end portfolo value
            self.reward = new_portfolio_value

        return self.state, self.reward, self.terminal, {}

    def reset(self):
        self.asset_memory = [self.initial_amount]
        self.day = 0
        self.data = self.df.loc[self.day,:]
        # load states
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
        self.portfolio_value = self.initial_amount
        #self.cost = 0
        #self.trades = 0
        self.terminal = False
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
        self.date_memory=[self.data.date.unique()[0]]
        return self.state
    
    def render(self, mode='human'):
        return self.state
        
    def softmax_normalization(self, actions):
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        softmax_output = numerator / denominator
        return softmax_output

    
    def save_asset_memory(self):
        date_list = self.date_memory
        portfolio_return = self.portfolio_return_memory
        #print(len(date_list))
        #print(len(asset_list))
        df_account_value = pd.DataFrame({'date':date_list,'daily_return':portfolio_return})
        return df_account_value

    def save_action_memory(self):
        # date and close price length must match actions length
        date_list = self.date_memory
        df_date = pd.DataFrame(date_list)
        df_date.columns = ['date']
        
        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.data.tic.values
        df_actions.index = df_date.date
        #df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

In [20]:
stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f'Stock Dimension: {stock_dimension}, State Space: {state_space}')

Stock Dimension: 4, State Space: 4


In [21]:
# ['daily_variance', 'change', 'log_volume', 'close','day', 'macd', 'rsi_30', 'boll_ub', 'dx_30']
# ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'boll_ub', 'dx_30', 'close_30_sma', 'close_60_sma'] # cci_30
tech_indicator_list = ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'daily_variance', 'dx_30', 'close_30_sma', 'close_60_sma']
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 100000, 
    "transaction_cost_pct": 0, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, # config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-1
    
}

e_train_gym = StockPortfolioEnv(df = train, **env_kwargs)

In [22]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [23]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo", model_kwargs = PPO_PARAMS, tensorboard_log='D:/code/python/logs')

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cuda device


In [24]:
agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=1000000)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Logging to D:/code/python/logs\ppo_1
begin_total_asset:100000
end_total_asset:154398.87560139806
Sharpe:  0.6851663684815609
begin_total_asset:100000
end_total_asset:145915.03048088079
Sharpe:  0.6199735532756807
----------------------------------
| time/              |           |
|    fps             | 367       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 165634.42 |
----------------------------------
begin_total_asset:100000
end_total_asset:202472.0945188156
Sharpe:  1.0182071371881092
begin_total_asset:100000
end_total_asset:171553.46141976325
Sharpe:  0.8250735086226509
begin_total_asset:100000
end_total_asset:173184.2382231426
Sharpe:  0.8447714393297109
-----------------------------------------
| time/                   |             |
|    fps                  | 419         |
|    iterations           | 2           |
|    time_elapsed         | 9           |


begin_total_asset:100000
end_total_asset:136876.7907475024
Sharpe:  0.5341634557033907
begin_total_asset:100000
end_total_asset:153356.7971899355
Sharpe:  0.6884131481028288
begin_total_asset:100000
end_total_asset:138103.33651064863
Sharpe:  0.543533894307107
------------------------------------------
| time/                   |              |
|    fps                  | 472          |
|    iterations           | 8            |
|    time_elapsed         | 34           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 9.284122e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.1e+12      |
|    n_updates            | 70           |
|    policy_gradient_loss | -2.26e-06    |
|    reward               | 124695.95    |
|    std

-------------------------------------------
| time/                   |               |
|    fps                  | 475           |
|    iterations           | 14            |
|    time_elapsed         | 60            |
|    total_timesteps      | 28672         |
| train/                  |               |
|    approx_kl            | 1.1554221e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.2e+12       |
|    n_updates            | 130           |
|    policy_gradient_loss | -3.37e-06     |
|    reward               | 152552.33     |
|    std                  | 1             |
|    value_loss           | 4.18e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:176007.3386063175
Sharpe:  0.8518452022815037
begin_total_asset:100000
end_tota

begin_total_asset:100000
end_total_asset:168427.78421051367
Sharpe:  0.800539990319413
begin_total_asset:100000
end_total_asset:166768.73890896264
Sharpe:  0.7783185668641426
begin_total_asset:100000
end_total_asset:182084.1283297226
Sharpe:  0.8924107900814207
------------------------------------------
| time/                   |              |
|    fps                  | 478          |
|    iterations           | 21           |
|    time_elapsed         | 89           |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 8.760253e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.11e+12     |
|    n_updates            | 200          |
|    policy_gradient_loss | -1.45e-06    |
|    reward               | 115693.086   |
|    st

------------------------------------------
| time/                   |              |
|    fps                  | 478          |
|    iterations           | 27           |
|    time_elapsed         | 115          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 8.731149e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.21e+12     |
|    n_updates            | 260          |
|    policy_gradient_loss | -9.39e-07    |
|    reward               | 115620.23    |
|    std                  | 1            |
|    value_loss           | 4.51e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:127435.91494248173
Sharpe:  0.4466512641718706
begin_total_asset:100000
end_total_asset:119653.8867

begin_total_asset:100000
end_total_asset:146700.63657825417
Sharpe:  0.6226161253919161
begin_total_asset:100000
end_total_asset:161299.3508456163
Sharpe:  0.747562127083497
begin_total_asset:100000
end_total_asset:124109.27705960989
Sharpe:  0.4131078690414391
------------------------------------------
| time/                   |              |
|    fps                  | 480          |
|    iterations           | 34           |
|    time_elapsed         | 144          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 9.895302e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.06e+12     |
|    n_updates            | 330          |
|    policy_gradient_loss | -1.52e-06    |
|    reward               | 123392.06    |
|    st

------------------------------------------
| time/                   |              |
|    fps                  | 482          |
|    iterations           | 40           |
|    time_elapsed         | 169          |
|    total_timesteps      | 81920        |
| train/                  |              |
|    approx_kl            | 9.225914e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0001       |
|    loss                 | 1.93e+12     |
|    n_updates            | 390          |
|    policy_gradient_loss | -1.81e-06    |
|    reward               | 133260.72    |
|    std                  | 1            |
|    value_loss           | 3.82e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:145789.01875064988
Sharpe:  0.6169648164183057
begin_total_asset:100000
end_total_asset:161611.7768

begin_total_asset:100000
end_total_asset:159208.95457790303
Sharpe:  0.7310014894911449
begin_total_asset:100000
end_total_asset:169683.2600279773
Sharpe:  0.8123336680859725
begin_total_asset:100000
end_total_asset:165326.32675749538
Sharpe:  0.7821486923289346
-------------------------------------------
| time/                   |               |
|    fps                  | 483           |
|    iterations           | 47            |
|    time_elapsed         | 198           |
|    total_timesteps      | 96256         |
| train/                  |               |
|    approx_kl            | 8.6729415e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0001        |
|    loss                 | 2.01e+12      |
|    n_updates            | 460           |
|    policy_gradient_loss | -1.15e-06     |
|    reward               | 12312

------------------------------------------
| time/                   |              |
|    fps                  | 484          |
|    iterations           | 53           |
|    time_elapsed         | 223          |
|    total_timesteps      | 108544       |
| train/                  |              |
|    approx_kl            | 8.090865e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.08e+12     |
|    n_updates            | 520          |
|    policy_gradient_loss | -6.32e-07    |
|    reward               | 144560.66    |
|    std                  | 1            |
|    value_loss           | 4.24e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:175189.66385485439
Sharpe:  0.8400173399981576
begin_total_asset:100000
end_total_asset:143192.6363

begin_total_asset:100000
end_total_asset:121818.40222837483
Sharpe:  0.38885093856168096
begin_total_asset:100000
end_total_asset:184402.72258562935
Sharpe:  0.9098897003689738
begin_total_asset:100000
end_total_asset:182607.82478577152
Sharpe:  0.8905053864666885
-------------------------------------------
| time/                   |               |
|    fps                  | 485           |
|    iterations           | 60            |
|    time_elapsed         | 253           |
|    total_timesteps      | 122880        |
| train/                  |               |
|    approx_kl            | 1.0419171e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0001        |
|    loss                 | 2.23e+12      |
|    n_updates            | 590           |
|    policy_gradient_loss | -3.92e-06     |
|    reward               | 969

begin_total_asset:100000
end_total_asset:157973.58489657103
Sharpe:  0.7143348579908847
------------------------------------------
| time/                   |              |
|    fps                  | 485          |
|    iterations           | 66           |
|    time_elapsed         | 278          |
|    total_timesteps      | 135168       |
| train/                  |              |
|    approx_kl            | 9.924406e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2e+12        |
|    n_updates            | 650          |
|    policy_gradient_loss | -2.67e-06    |
|    reward               | 116241.04    |
|    std                  | 1            |
|    value_loss           | 4.03e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:177707.0715

begin_total_asset:100000
end_total_asset:169932.70150005753
Sharpe:  0.8062816043291265
begin_total_asset:100000
end_total_asset:158121.3295334008
Sharpe:  0.7275757418748117
begin_total_asset:100000
end_total_asset:171470.14023382645
Sharpe:  0.827255572309056
------------------------------------------
| time/                   |              |
|    fps                  | 485          |
|    iterations           | 73           |
|    time_elapsed         | 307          |
|    total_timesteps      | 149504       |
| train/                  |              |
|    approx_kl            | 8.090865e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.41e+12     |
|    n_updates            | 720          |
|    policy_gradient_loss | -8.47e-07    |
|    reward               | 104547.81    |
|    st

------------------------------------------
| time/                   |              |
|    fps                  | 485          |
|    iterations           | 79           |
|    time_elapsed         | 332          |
|    total_timesteps      | 161792       |
| train/                  |              |
|    approx_kl            | 9.284122e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.31e+12     |
|    n_updates            | 780          |
|    policy_gradient_loss | -1.54e-06    |
|    reward               | 114439.05    |
|    std                  | 1            |
|    value_loss           | 4.61e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:201081.90782050835
Sharpe:  1.0293635562593446
begin_total_asset:100000
end_total_asset:139187.7526

begin_total_asset:100000
end_total_asset:195999.10883267815
Sharpe:  0.9947318592149528
begin_total_asset:100000
end_total_asset:126745.70280036991
Sharpe:  0.4386994316717973
begin_total_asset:100000
end_total_asset:181478.69352452585
Sharpe:  0.8940201466464357
------------------------------------------
| time/                   |              |
|    fps                  | 486          |
|    iterations           | 86           |
|    time_elapsed         | 362          |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 8.440111e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.21e+12     |
|    n_updates            | 850          |
|    policy_gradient_loss | -5.68e-07    |
|    reward               | 110169.53    |
|    

------------------------------------------
| time/                   |              |
|    fps                  | 486          |
|    iterations           | 92           |
|    time_elapsed         | 387          |
|    total_timesteps      | 188416       |
| train/                  |              |
|    approx_kl            | 8.236384e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.22e+12     |
|    n_updates            | 910          |
|    policy_gradient_loss | -9.18e-07    |
|    reward               | 125501.1     |
|    std                  | 1            |
|    value_loss           | 4.51e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:143333.91138956658
Sharpe:  0.5904219809258241
begin_total_asset:100000
end_total_asset:151111.6579

begin_total_asset:100000
end_total_asset:142351.22935597564
Sharpe:  0.5845949860253384
begin_total_asset:100000
end_total_asset:170112.39200983458
Sharpe:  0.8166419760584817
begin_total_asset:100000
end_total_asset:145934.9858888829
Sharpe:  0.6200045640930097
------------------------------------------
| time/                   |              |
|    fps                  | 486          |
|    iterations           | 99           |
|    time_elapsed         | 416          |
|    total_timesteps      | 202752       |
| train/                  |              |
|    approx_kl            | 7.741619e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.05e+12     |
|    n_updates            | 980          |
|    policy_gradient_loss | -4.09e-07    |
|    reward               | 121102.65    |
|    s

------------------------------------------
| time/                   |              |
|    fps                  | 486          |
|    iterations           | 105          |
|    time_elapsed         | 441          |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 8.993084e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.48e+12     |
|    n_updates            | 1040         |
|    policy_gradient_loss | -7.8e-07     |
|    reward               | 111018.91    |
|    std                  | 1            |
|    value_loss           | 4.77e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:142828.1002547142
Sharpe:  0.5910356662349978
begin_total_asset:100000
end_total_asset:188627.69172

begin_total_asset:100000
end_total_asset:163776.48731634088
Sharpe:  0.7645095997735296
begin_total_asset:100000
end_total_asset:152976.5816441503
Sharpe:  0.6755507848001474
begin_total_asset:100000
end_total_asset:169574.2806863287
Sharpe:  0.8212970204347414
------------------------------------------
| time/                   |              |
|    fps                  | 486          |
|    iterations           | 112          |
|    time_elapsed         | 471          |
|    total_timesteps      | 229376       |
| train/                  |              |
|    approx_kl            | 8.760253e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 1.94e+12     |
|    n_updates            | 1110         |
|    policy_gradient_loss | -1.2e-06     |
|    reward               | 128939.28    |
|    st

-------------------------------------------
| time/                   |               |
|    fps                  | 487           |
|    iterations           | 118           |
|    time_elapsed         | 495           |
|    total_timesteps      | 241664        |
| train/                  |               |
|    approx_kl            | 1.0099029e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.35e+12      |
|    n_updates            | 1170          |
|    policy_gradient_loss | -1.3e-06      |
|    reward               | 115237.59     |
|    std                  | 1             |
|    value_loss           | 4.52e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:135029.80960293772
Sharpe:  0.5247182518033245
begin_total_asset:100000
end_tot

begin_total_asset:100000
end_total_asset:169371.81191502442
Sharpe:  0.8052567521819166
begin_total_asset:100000
end_total_asset:165747.29672084932
Sharpe:  0.784428752505217
begin_total_asset:100000
end_total_asset:164505.47352270898
Sharpe:  0.7652359398425601
------------------------------------------
| time/                   |              |
|    fps                  | 487          |
|    iterations           | 125          |
|    time_elapsed         | 524          |
|    total_timesteps      | 256000       |
| train/                  |              |
|    approx_kl            | 9.837095e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.07e+12     |
|    n_updates            | 1240         |
|    policy_gradient_loss | -1.34e-06    |
|    reward               | 125717.65    |
|    s

begin_total_asset:100000
end_total_asset:184681.75238326698
Sharpe:  0.9065492558146856
begin_total_asset:100000
end_total_asset:158130.6493582602
Sharpe:  0.7203999996981222
------------------------------------------
| time/                   |              |
|    fps                  | 487          |
|    iterations           | 132          |
|    time_elapsed         | 554          |
|    total_timesteps      | 270336       |
| train/                  |              |
|    approx_kl            | 8.381903e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.32e+12     |
|    n_updates            | 1310         |
|    policy_gradient_loss | -7.42e-07    |
|    reward               | 122762.05    |
|    std                  | 1            |
|    value_loss           | 4.49e+12     |
--------

begin_total_asset:100000
end_total_asset:161110.78032837764
Sharpe:  0.7439487927267577
begin_total_asset:100000
end_total_asset:194691.1679433765
Sharpe:  0.9783246661959053
begin_total_asset:100000
end_total_asset:151167.7595216276
Sharpe:  0.6565317470449659
------------------------------------------
| time/                   |              |
|    fps                  | 487          |
|    iterations           | 139          |
|    time_elapsed         | 583          |
|    total_timesteps      | 284672       |
| train/                  |              |
|    approx_kl            | 9.022187e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.24e+12     |
|    n_updates            | 1380         |
|    policy_gradient_loss | -1.39e-06    |
|    reward               | 123132.125   |
|    st

------------------------------------------
| time/                   |              |
|    fps                  | 487          |
|    iterations           | 145          |
|    time_elapsed         | 609          |
|    total_timesteps      | 296960       |
| train/                  |              |
|    approx_kl            | 8.993084e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.38e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.38e+12     |
|    n_updates            | 1440         |
|    policy_gradient_loss | -6.78e-07    |
|    reward               | 119747.914   |
|    std                  | 1            |
|    value_loss           | 4.53e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:146125.99683619876
Sharpe:  0.6209775123161136
begin_total_asset:100000
end_total_asset:177848.0625

begin_total_asset:100000
end_total_asset:163669.51588234448
Sharpe:  0.7639083254931346
begin_total_asset:100000
end_total_asset:170629.33049376684
Sharpe:  0.808842774250955
begin_total_asset:100000
end_total_asset:158272.95213278348
Sharpe:  0.7265873525818327
------------------------------------------
| time/                   |              |
|    fps                  | 487          |
|    iterations           | 152          |
|    time_elapsed         | 638          |
|    total_timesteps      | 311296       |
| train/                  |              |
|    approx_kl            | 8.003553e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.29e+12     |
|    n_updates            | 1510         |
|    policy_gradient_loss | -8.88e-07    |
|    reward               | 135851.25    |
|    s

------------------------------------------
| time/                   |              |
|    fps                  | 488          |
|    iterations           | 158          |
|    time_elapsed         | 662          |
|    total_timesteps      | 323584       |
| train/                  |              |
|    approx_kl            | 9.807991e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 1.86e+12     |
|    n_updates            | 1570         |
|    policy_gradient_loss | -2.07e-06    |
|    reward               | 114110.21    |
|    std                  | 1            |
|    value_loss           | 3.76e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:137542.5302415742
Sharpe:  0.5397445490427787
begin_total_asset:100000
end_total_asset:143795.77565

begin_total_asset:100000
end_total_asset:123895.16222405549
Sharpe:  0.40968915458563204
begin_total_asset:100000
end_total_asset:136856.0677718238
Sharpe:  0.5412686790556951
begin_total_asset:100000
end_total_asset:176982.12660057182
Sharpe:  0.8627318171757118
------------------------------------------
| time/                   |              |
|    fps                  | 488          |
|    iterations           | 165          |
|    time_elapsed         | 692          |
|    total_timesteps      | 337920       |
| train/                  |              |
|    approx_kl            | 9.167707e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.09e+12     |
|    n_updates            | 1640         |
|    policy_gradient_loss | -1.22e-06    |
|    reward               | 124898.336   |
|    

------------------------------------------
| time/                   |              |
|    fps                  | 488          |
|    iterations           | 171          |
|    time_elapsed         | 717          |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 7.741619e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.03e+12     |
|    n_updates            | 1700         |
|    policy_gradient_loss | -9.84e-07    |
|    reward               | 144113.67    |
|    std                  | 1            |
|    value_loss           | 4.04e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:155014.81238435788
Sharpe:  0.7088389362334491
begin_total_asset:100000
end_total_asset:157194.6066

begin_total_asset:100000
end_total_asset:177226.89862929034
Sharpe:  0.8655616741345643
begin_total_asset:100000
end_total_asset:173300.10212260744
Sharpe:  0.8327231622502391
begin_total_asset:100000
end_total_asset:177329.53068645572
Sharpe:  0.8628473992185619
-----------------------------------------
| time/                   |             |
|    fps                  | 488         |
|    iterations           | 178         |
|    time_elapsed         | 746         |
|    total_timesteps      | 364544      |
| train/                  |             |
|    approx_kl            | 8.96398e-09 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.68       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0001      |
|    loss                 | 2.26e+12    |
|    n_updates            | 1770        |
|    policy_gradient_loss | -1.67e-06   |
|    reward               | 120743.375  |
|    std              

------------------------------------------
| time/                   |              |
|    fps                  | 488          |
|    iterations           | 184          |
|    time_elapsed         | 771          |
|    total_timesteps      | 376832       |
| train/                  |              |
|    approx_kl            | 8.411007e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.47e+12     |
|    n_updates            | 1830         |
|    policy_gradient_loss | -1.78e-06    |
|    reward               | 133757.75    |
|    std                  | 1            |
|    value_loss           | 4.92e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:181117.13107115225
Sharpe:  0.897368706409297
begin_total_asset:100000
end_total_asset:162209.36556

begin_total_asset:100000
end_total_asset:123694.48522607259
Sharpe:  0.40908841645910804
begin_total_asset:100000
end_total_asset:162617.46733553958
Sharpe:  0.7513422802914305
begin_total_asset:100000
end_total_asset:200206.04945843018
Sharpe:  1.0026078009922657
-------------------------------------------
| time/                   |               |
|    fps                  | 488           |
|    iterations           | 191           |
|    time_elapsed         | 799           |
|    total_timesteps      | 391168        |
| train/                  |               |
|    approx_kl            | 9.8661985e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.15e+12      |
|    n_updates            | 1900          |
|    policy_gradient_loss | -1.89e-06     |
|    reward               | 130

------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 197          |
|    time_elapsed         | 824          |
|    total_timesteps      | 403456       |
| train/                  |              |
|    approx_kl            | 9.167707e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.18e+12     |
|    n_updates            | 1960         |
|    policy_gradient_loss | -1.81e-06    |
|    reward               | 125464.06    |
|    std                  | 1            |
|    value_loss           | 4.23e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:188931.3266215502
Sharpe:  0.9283724370605096
begin_total_asset:100000
end_total_asset:153457.99468

begin_total_asset:100000
end_total_asset:157332.87814658965
Sharpe:  0.7224808462172521
begin_total_asset:100000
end_total_asset:173550.56711123392
Sharpe:  0.8383616996122735
begin_total_asset:100000
end_total_asset:161942.94189837738
Sharpe:  0.7503361031004517
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 204          |
|    time_elapsed         | 854          |
|    total_timesteps      | 417792       |
| train/                  |              |
|    approx_kl            | 7.945346e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.25e+12     |
|    n_updates            | 2030         |
|    policy_gradient_loss | -6.79e-07    |
|    reward               | 123583.46    |
|    

------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 210          |
|    time_elapsed         | 879          |
|    total_timesteps      | 430080       |
| train/                  |              |
|    approx_kl            | 9.051291e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.21e+12     |
|    n_updates            | 2090         |
|    policy_gradient_loss | -1.13e-06    |
|    reward               | 137575.11    |
|    std                  | 1            |
|    value_loss           | 4.38e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:186066.649436553
Sharpe:  0.925333338180591
begin_total_asset:100000
end_total_asset:181288.8558640

begin_total_asset:100000
end_total_asset:144466.2269380744
Sharpe:  0.6060183897237643
begin_total_asset:100000
end_total_asset:173119.51830857314
Sharpe:  0.8472085986317371
begin_total_asset:100000
end_total_asset:173259.0735575942
Sharpe:  0.8407048055861991
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 217          |
|    time_elapsed         | 908          |
|    total_timesteps      | 444416       |
| train/                  |              |
|    approx_kl            | 8.731149e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.22e+12     |
|    n_updates            | 2160         |
|    policy_gradient_loss | -7.83e-07    |
|    reward               | 104595.414   |
|    st

begin_total_asset:100000
end_total_asset:162055.5685323604
Sharpe:  0.7419566765784383
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 223          |
|    time_elapsed         | 933          |
|    total_timesteps      | 456704       |
| train/                  |              |
|    approx_kl            | 6.722985e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.02e+12     |
|    n_updates            | 2220         |
|    policy_gradient_loss | -6.2e-07     |
|    reward               | 112688.29    |
|    std                  | 1            |
|    value_loss           | 4.15e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:167156.52567

begin_total_asset:100000
end_total_asset:175718.92784865844
Sharpe:  0.8493341300333148
begin_total_asset:100000
end_total_asset:146325.44939423195
Sharpe:  0.6259438077107294
begin_total_asset:100000
end_total_asset:169364.5682650019
Sharpe:  0.7984966872137593
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 230          |
|    time_elapsed         | 962          |
|    total_timesteps      | 471040       |
| train/                  |              |
|    approx_kl            | 8.702045e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.21e+12     |
|    n_updates            | 2290         |
|    policy_gradient_loss | -7.79e-07    |
|    reward               | 117660.06    |
|    s

-------------------------------------------
| time/                   |               |
|    fps                  | 489           |
|    iterations           | 236           |
|    time_elapsed         | 987           |
|    total_timesteps      | 483328        |
| train/                  |               |
|    approx_kl            | 1.0448275e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0001        |
|    loss                 | 2.33e+12      |
|    n_updates            | 2350          |
|    policy_gradient_loss | -1.74e-06     |
|    reward               | 103125.82     |
|    std                  | 1             |
|    value_loss           | 4.56e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:131240.40245249186
Sharpe:  0.48836066789357074
begin_total_asset:100000
end_to

begin_total_asset:100000
end_total_asset:157441.77817620768
Sharpe:  0.7142937294319944
begin_total_asset:100000
end_total_asset:159838.37995309642
Sharpe:  0.7304172175414688
begin_total_asset:100000
end_total_asset:175930.79055128025
Sharpe:  0.8537381955288363
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 243          |
|    time_elapsed         | 1016         |
|    total_timesteps      | 497664       |
| train/                  |              |
|    approx_kl            | 7.537892e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.03e+12     |
|    n_updates            | 2420         |
|    policy_gradient_loss | -6.62e-07    |
|    reward               | 111243.7     |
|    

------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 249          |
|    time_elapsed         | 1041         |
|    total_timesteps      | 509952       |
| train/                  |              |
|    approx_kl            | 9.546056e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 1.8e+12      |
|    n_updates            | 2480         |
|    policy_gradient_loss | -2.36e-06    |
|    reward               | 93905.13     |
|    std                  | 1            |
|    value_loss           | 3.73e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:161179.58247950525
Sharpe:  0.7384986130574486
begin_total_asset:100000
end_total_asset:135580.3694

begin_total_asset:100000
end_total_asset:132993.44414321776
Sharpe:  0.4988638560534244
begin_total_asset:100000
end_total_asset:138242.1821262821
Sharpe:  0.5502487196663891
begin_total_asset:100000
end_total_asset:173214.3289057152
Sharpe:  0.8265263184338657
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 256          |
|    time_elapsed         | 1070         |
|    total_timesteps      | 524288       |
| train/                  |              |
|    approx_kl            | 8.789357e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.22e+12     |
|    n_updates            | 2550         |
|    policy_gradient_loss | -1.56e-06    |
|    reward               | 114852.98    |
|    st

begin_total_asset:100000
end_total_asset:173569.04308800236
Sharpe:  0.8306319220942835
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 262          |
|    time_elapsed         | 1095         |
|    total_timesteps      | 536576       |
| train/                  |              |
|    approx_kl            | 9.837095e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.38e+12     |
|    n_updates            | 2610         |
|    policy_gradient_loss | -1.53e-06    |
|    reward               | 112163.3     |
|    std                  | 1            |
|    value_loss           | 4.76e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:181113.8865

begin_total_asset:100000
end_total_asset:156867.127143887
Sharpe:  0.7036170881879545
begin_total_asset:100000
end_total_asset:172558.45331686456
Sharpe:  0.8372609370913937
begin_total_asset:100000
end_total_asset:155981.8916294389
Sharpe:  0.692949719510134
-------------------------------------------
| time/                   |               |
|    fps                  | 489           |
|    iterations           | 269           |
|    time_elapsed         | 1124          |
|    total_timesteps      | 550912        |
| train/                  |               |
|    approx_kl            | 1.0128133e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.15e+12      |
|    n_updates            | 2680          |
|    policy_gradient_loss | -1.1e-06      |
|    reward               | 124611.4

begin_total_asset:100000
end_total_asset:136295.88902150947
Sharpe:  0.5350328681925105
begin_total_asset:100000
end_total_asset:154540.5374978911
Sharpe:  0.690767628183267
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 276          |
|    time_elapsed         | 1153         |
|    total_timesteps      | 565248       |
| train/                  |              |
|    approx_kl            | 9.778887e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.23e+12     |
|    n_updates            | 2750         |
|    policy_gradient_loss | -1.76e-06    |
|    reward               | 133472.7     |
|    std                  | 1            |
|    value_loss           | 4.5e+12      |
---------

begin_total_asset:100000
end_total_asset:158251.55072832268
Sharpe:  0.7129523714657168
begin_total_asset:100000
end_total_asset:149289.3843089804
Sharpe:  0.6526060343674035
------------------------------------------
| time/                   |              |
|    fps                  | 489          |
|    iterations           | 283          |
|    time_elapsed         | 1182         |
|    total_timesteps      | 579584       |
| train/                  |              |
|    approx_kl            | 9.022187e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.11e+12     |
|    n_updates            | 2820         |
|    policy_gradient_loss | -1.68e-06    |
|    reward               | 140985.25    |
|    std                  | 1            |
|    value_loss           | 4.21e+12     |
--------

begin_total_asset:100000
end_total_asset:174728.8141352655
Sharpe:  0.8327001012339219
begin_total_asset:100000
end_total_asset:165407.0962821413
Sharpe:  0.7746033998794045
begin_total_asset:100000
end_total_asset:175991.53961223783
Sharpe:  0.861513657456867
------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 290          |
|    time_elapsed         | 1211         |
|    total_timesteps      | 593920       |
| train/                  |              |
|    approx_kl            | 9.458745e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 3.58e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.11e+12     |
|    n_updates            | 2890         |
|    policy_gradient_loss | -1.73e-06    |
|    reward               | 160881.27    |
|    std

-----------------------------------------
| time/                   |             |
|    fps                  | 490         |
|    iterations           | 296         |
|    time_elapsed         | 1236        |
|    total_timesteps      | 606208      |
| train/                  |             |
|    approx_kl            | 9.34233e-09 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.68       |
|    explained_variance   | 1.79e-07    |
|    learning_rate        | 0.0001      |
|    loss                 | 1.92e+12    |
|    n_updates            | 2950        |
|    policy_gradient_loss | -1.38e-06   |
|    reward               | 135100.97   |
|    std                  | 1           |
|    value_loss           | 3.86e+12    |
-----------------------------------------
begin_total_asset:100000
end_total_asset:179447.4159277561
Sharpe:  0.8777201578402587
begin_total_asset:100000
end_total_asset:164900.91788150923
Sharpe:  0.75

begin_total_asset:100000
end_total_asset:163223.84822874743
Sharpe:  0.7587900450510793
begin_total_asset:100000
end_total_asset:169644.28919436169
Sharpe:  0.8028702690150042
begin_total_asset:100000
end_total_asset:143412.97976303808
Sharpe:  0.5959092364253309
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 303           |
|    time_elapsed         | 1266          |
|    total_timesteps      | 620544        |
| train/                  |               |
|    approx_kl            | 1.2048986e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0001        |
|    loss                 | 1.96e+12      |
|    n_updates            | 3020          |
|    policy_gradient_loss | -3.97e-06     |
|    reward               | 1390

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 309          |
|    time_elapsed         | 1291         |
|    total_timesteps      | 632832       |
| train/                  |              |
|    approx_kl            | 9.604264e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 1.98e+12     |
|    n_updates            | 3080         |
|    policy_gradient_loss | -1.07e-06    |
|    reward               | 147807.44    |
|    std                  | 1            |
|    value_loss           | 3.95e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:172626.8355014966
Sharpe:  0.8498745680851907
begin_total_asset:100000
end_total_asset:157433.67567

begin_total_asset:100000
end_total_asset:143697.17649755298
Sharpe:  0.6007380552838915
begin_total_asset:100000
end_total_asset:122384.69461910645
Sharpe:  0.3955223286426032
begin_total_asset:100000
end_total_asset:135088.31108369562
Sharpe:  0.5208077966283022
------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 316          |
|    time_elapsed         | 1320         |
|    total_timesteps      | 647168       |
| train/                  |              |
|    approx_kl            | 7.741619e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.27e+12     |
|    n_updates            | 3150         |
|    policy_gradient_loss | -4.92e-07    |
|    reward               | 144895.73    |
|    

-----------------------------------------
| time/                   |             |
|    fps                  | 490         |
|    iterations           | 322         |
|    time_elapsed         | 1345        |
|    total_timesteps      | 659456      |
| train/                  |             |
|    approx_kl            | 9.95351e-09 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.68       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0001      |
|    loss                 | 1.99e+12    |
|    n_updates            | 3210        |
|    policy_gradient_loss | -1.45e-06   |
|    reward               | 127153.02   |
|    std                  | 1           |
|    value_loss           | 3.98e+12    |
-----------------------------------------
begin_total_asset:100000
end_total_asset:149074.88344167368
Sharpe:  0.6423941344202325
begin_total_asset:100000
end_total_asset:99237.301775057
Sharpe:  0.1269

begin_total_asset:100000
end_total_asset:190376.84482017488
Sharpe:  0.9473053278504863
begin_total_asset:100000
end_total_asset:138726.52694758633
Sharpe:  0.5501954730159416
begin_total_asset:100000
end_total_asset:156512.7701918088
Sharpe:  0.7103964445952146
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 329           |
|    time_elapsed         | 1374          |
|    total_timesteps      | 673792        |
| train/                  |               |
|    approx_kl            | 1.0273652e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0001        |
|    loss                 | 2.21e+12      |
|    n_updates            | 3280          |
|    policy_gradient_loss | -2.02e-06     |
|    reward               | 11675

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 335          |
|    time_elapsed         | 1399         |
|    total_timesteps      | 686080       |
| train/                  |              |
|    approx_kl            | 7.392373e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.26e+12     |
|    n_updates            | 3340         |
|    policy_gradient_loss | -7.94e-07    |
|    reward               | 124399.08    |
|    std                  | 1            |
|    value_loss           | 4.43e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:151220.25208511375
Sharpe:  0.6675571513440529
begin_total_asset:100000
end_total_asset:129703.9259

begin_total_asset:100000
end_total_asset:149776.12299708845
Sharpe:  0.6482368417257041
begin_total_asset:100000
end_total_asset:155934.9462021604
Sharpe:  0.706210416997822
begin_total_asset:100000
end_total_asset:168192.5146235518
Sharpe:  0.7887208250946008
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 342           |
|    time_elapsed         | 1427          |
|    total_timesteps      | 700416        |
| train/                  |               |
|    approx_kl            | 1.0186341e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.12e+12      |
|    n_updates            | 3410          |
|    policy_gradient_loss | -1.87e-06     |
|    reward               | 110011.

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 348          |
|    time_elapsed         | 1452         |
|    total_timesteps      | 712704       |
| train/                  |              |
|    approx_kl            | 9.313226e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.14e+12     |
|    n_updates            | 3470         |
|    policy_gradient_loss | -1.06e-06    |
|    reward               | 120330.92    |
|    std                  | 1            |
|    value_loss           | 4.34e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:147046.27297451787
Sharpe:  0.6314933190848688
begin_total_asset:100000
end_total_asset:157472.1134

begin_total_asset:100000
end_total_asset:162462.39256535936
Sharpe:  0.7471945200085157
begin_total_asset:100000
end_total_asset:154480.9445219372
Sharpe:  0.6905602838923552
begin_total_asset:100000
end_total_asset:158626.9728787126
Sharpe:  0.7221175039355334
-----------------------------------------
| time/                   |             |
|    fps                  | 490         |
|    iterations           | 355         |
|    time_elapsed         | 1481        |
|    total_timesteps      | 727040      |
| train/                  |             |
|    approx_kl            | 8.96398e-09 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.68       |
|    explained_variance   | -3.58e-07   |
|    learning_rate        | 0.0001      |
|    loss                 | 2.28e+12    |
|    n_updates            | 3540        |
|    policy_gradient_loss | -1.81e-06   |
|    reward               | 130943.34   |
|    std                

-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 361           |
|    time_elapsed         | 1506          |
|    total_timesteps      | 739328        |
| train/                  |               |
|    approx_kl            | 1.0681106e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0001        |
|    loss                 | 2.01e+12      |
|    n_updates            | 3600          |
|    policy_gradient_loss | -2.44e-06     |
|    reward               | 94296.695     |
|    std                  | 1             |
|    value_loss           | 3.99e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:164547.3032131341
Sharpe:  0.7698655993236264
begin_total_asset:100000
end_tota

begin_total_asset:100000
end_total_asset:155763.70308189216
Sharpe:  0.6989136255175987
begin_total_asset:100000
end_total_asset:205171.8151776215
Sharpe:  1.0384660479008694
begin_total_asset:100000
end_total_asset:135829.52213077663
Sharpe:  0.524889693288529
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 368           |
|    time_elapsed         | 1535          |
|    total_timesteps      | 753664        |
| train/                  |               |
|    approx_kl            | 1.0972144e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0001        |
|    loss                 | 2.1e+12       |
|    n_updates            | 3670          |
|    policy_gradient_loss | -2.53e-06     |
|    reward               | 118224

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 374          |
|    time_elapsed         | 1560         |
|    total_timesteps      | 765952       |
| train/                  |              |
|    approx_kl            | 8.934876e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.18e+12     |
|    n_updates            | 3730         |
|    policy_gradient_loss | -1.45e-06    |
|    reward               | 100183.39    |
|    std                  | 1            |
|    value_loss           | 4.28e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:147466.7566208081
Sharpe:  0.6281453161087397
begin_total_asset:100000
end_total_asset:167848.44060

begin_total_asset:100000
end_total_asset:144434.55049488417
Sharpe:  0.6036130588291659
begin_total_asset:100000
end_total_asset:162116.38242020522
Sharpe:  0.753382182895307
begin_total_asset:100000
end_total_asset:144213.25434850762
Sharpe:  0.6103679822771766
------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 381          |
|    time_elapsed         | 1589         |
|    total_timesteps      | 780288       |
| train/                  |              |
|    approx_kl            | 8.614734e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.49e+12     |
|    n_updates            | 3800         |
|    policy_gradient_loss | -8.29e-07    |
|    reward               | 125145.96    |
|    s

------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 387          |
|    time_elapsed         | 1614         |
|    total_timesteps      | 792576       |
| train/                  |              |
|    approx_kl            | 9.400537e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 1.91e+12     |
|    n_updates            | 3860         |
|    policy_gradient_loss | -1.34e-06    |
|    reward               | 110024.72    |
|    std                  | 1            |
|    value_loss           | 3.73e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:193998.44580261863
Sharpe:  0.978490209229538
begin_total_asset:100000
end_total_asset:143397.55251

begin_total_asset:100000
end_total_asset:179060.06308428888
Sharpe:  0.8726432816276551
begin_total_asset:100000
end_total_asset:147662.9826521135
Sharpe:  0.6406778063783093
begin_total_asset:100000
end_total_asset:166963.29154119416
Sharpe:  0.7728488417534671
------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 394          |
|    time_elapsed         | 1643         |
|    total_timesteps      | 806912       |
| train/                  |              |
|    approx_kl            | 8.789357e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 1.97e+12     |
|    n_updates            | 3930         |
|    policy_gradient_loss | -4.66e-07    |
|    reward               | 127396.83    |
|    s

------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 400          |
|    time_elapsed         | 1668         |
|    total_timesteps      | 819200       |
| train/                  |              |
|    approx_kl            | 9.662472e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.29e+12     |
|    n_updates            | 3990         |
|    policy_gradient_loss | -1.46e-06    |
|    reward               | 110979.766   |
|    std                  | 1            |
|    value_loss           | 4.65e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:169741.73231706346
Sharpe:  0.8079495259860366
begin_total_asset:100000
end_total_asset:148562.7497

begin_total_asset:100000
end_total_asset:189268.22476473392
Sharpe:  0.9432117933242846
begin_total_asset:100000
end_total_asset:165886.8610145206
Sharpe:  0.7780608541496283
begin_total_asset:100000
end_total_asset:149243.27142536317
Sharpe:  0.6427806467037337
------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 407          |
|    time_elapsed         | 1697         |
|    total_timesteps      | 833536       |
| train/                  |              |
|    approx_kl            | 8.993084e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.35e+12     |
|    n_updates            | 4060         |
|    policy_gradient_loss | -1.89e-06    |
|    reward               | 135806.53    |
|    s

begin_total_asset:100000
end_total_asset:169272.09123297164
Sharpe:  0.8041129093385057
begin_total_asset:100000
end_total_asset:150372.28174768895
Sharpe:  0.6544878499633588
------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 414          |
|    time_elapsed         | 1726         |
|    total_timesteps      | 847872       |
| train/                  |              |
|    approx_kl            | 9.400537e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.19e+12     |
|    n_updates            | 4130         |
|    policy_gradient_loss | -9.79e-07    |
|    reward               | 168454.2     |
|    std                  | 1            |
|    value_loss           | 4.55e+12     |
-------

begin_total_asset:100000
end_total_asset:172517.5278305483
Sharpe:  0.838893019376578
begin_total_asset:100000
end_total_asset:156122.57029226108
Sharpe:  0.7075284492982233
------------------------------------------
| time/                   |              |
|    fps                  | 491          |
|    iterations           | 421          |
|    time_elapsed         | 1755         |
|    total_timesteps      | 862208       |
| train/                  |              |
|    approx_kl            | 9.982614e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0001       |
|    loss                 | 2.38e+12     |
|    n_updates            | 4200         |
|    policy_gradient_loss | -1.92e-06    |
|    reward               | 98864.05     |
|    std                  | 1            |
|    value_loss           | 4.83e+12     |
---------

begin_total_asset:100000
end_total_asset:160214.122520035
Sharpe:  0.7311874046040072
begin_total_asset:100000
end_total_asset:152960.4513088518
Sharpe:  0.6694760581063312
begin_total_asset:100000
end_total_asset:131818.40470854708
Sharpe:  0.48772395828973636
-------------------------------------------
| time/                   |               |
|    fps                  | 491           |
|    iterations           | 428           |
|    time_elapsed         | 1784          |
|    total_timesteps      | 876544        |
| train/                  |               |
|    approx_kl            | 1.0477379e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 3.58e-07      |
|    learning_rate        | 0.0001        |
|    loss                 | 2.3e+12       |
|    n_updates            | 4270          |
|    policy_gradient_loss | -3.19e-06     |
|    reward               | 133520

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 434          |
|    time_elapsed         | 1810         |
|    total_timesteps      | 888832       |
| train/                  |              |
|    approx_kl            | 7.566996e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 3.58e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.08e+12     |
|    n_updates            | 4330         |
|    policy_gradient_loss | -3.9e-07     |
|    reward               | 131744.8     |
|    std                  | 1            |
|    value_loss           | 4.05e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:171446.04964698432
Sharpe:  0.8222260561009386
begin_total_asset:100000
end_total_asset:143038.2830

begin_total_asset:100000
end_total_asset:147948.91339075944
Sharpe:  0.6343805235965095
begin_total_asset:100000
end_total_asset:143640.77159385898
Sharpe:  0.5986142990172975
begin_total_asset:100000
end_total_asset:208440.06984754273
Sharpe:  1.0783164437581227
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 441           |
|    time_elapsed         | 1839          |
|    total_timesteps      | 903168        |
| train/                  |               |
|    approx_kl            | 1.0273652e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0001        |
|    loss                 | 2.12e+12      |
|    n_updates            | 4400          |
|    policy_gradient_loss | -2.77e-06     |
|    reward               | 1426

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 447          |
|    time_elapsed         | 1865         |
|    total_timesteps      | 915456       |
| train/                  |              |
|    approx_kl            | 8.993084e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0001       |
|    loss                 | 2.15e+12     |
|    n_updates            | 4460         |
|    policy_gradient_loss | -1.61e-06    |
|    reward               | 135158.67    |
|    std                  | 1            |
|    value_loss           | 4.14e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:154434.10576950043
Sharpe:  0.6858538648328686
begin_total_asset:100000
end_total_asset:144525.7092

begin_total_asset:100000
end_total_asset:151675.02642582278
Sharpe:  0.666674987561476
begin_total_asset:100000
end_total_asset:133074.50208428878
Sharpe:  0.5048160067602195
begin_total_asset:100000
end_total_asset:151023.5599382307
Sharpe:  0.6633043046447133
-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 454           |
|    time_elapsed         | 1894          |
|    total_timesteps      | 929792        |
| train/                  |               |
|    approx_kl            | 7.3050614e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0001        |
|    loss                 | 2.16e+12      |
|    n_updates            | 4530          |
|    policy_gradient_loss | -5.08e-07     |
|    reward               | 125958

-------------------------------------------
| time/                   |               |
|    fps                  | 490           |
|    iterations           | 460           |
|    time_elapsed         | 1919          |
|    total_timesteps      | 942080        |
| train/                  |               |
|    approx_kl            | 1.0797521e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0001        |
|    loss                 | 1.74e+12      |
|    n_updates            | 4590          |
|    policy_gradient_loss | -3.22e-06     |
|    reward               | 133276.94     |
|    std                  | 1             |
|    value_loss           | 3.34e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:147485.80143542794
Sharpe:  0.63600072627849
begin_total_asset:100000
end_total

begin_total_asset:100000
end_total_asset:180767.56248886397
Sharpe:  0.889598393701881
begin_total_asset:100000
end_total_asset:140643.25073663474
Sharpe:  0.5761956013356071
begin_total_asset:100000
end_total_asset:174911.19736074298
Sharpe:  0.8478652408014601
------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 467          |
|    time_elapsed         | 1948         |
|    total_timesteps      | 956416       |
| train/                  |              |
|    approx_kl            | 8.032657e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.19e+12     |
|    n_updates            | 4660         |
|    policy_gradient_loss | -5.82e-07    |
|    reward               | 116251.07    |
|    s

------------------------------------------
| time/                   |              |
|    fps                  | 490          |
|    iterations           | 473          |
|    time_elapsed         | 1973         |
|    total_timesteps      | 968704       |
| train/                  |              |
|    approx_kl            | 6.693881e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0001       |
|    loss                 | 2.29e+12     |
|    n_updates            | 4720         |
|    policy_gradient_loss | -3.57e-07    |
|    reward               | 125587.24    |
|    std                  | 1            |
|    value_loss           | 4.52e+12     |
------------------------------------------
begin_total_asset:100000
end_total_asset:188908.758946689
Sharpe:  0.9458465157662833
begin_total_asset:100000
end_total_asset:153039.844190

begin_total_asset:100000
end_total_asset:147683.19745685608
Sharpe:  0.633652089103628
begin_total_asset:100000
end_total_asset:171482.03491258057
Sharpe:  0.8254320738772173
begin_total_asset:100000
end_total_asset:166386.54775121994
Sharpe:  0.7830960586957251
--------------------------------------------
| time/                   |                |
|    fps                  | 490            |
|    iterations           | 480            |
|    time_elapsed         | 2002           |
|    total_timesteps      | 983040         |
| train/                  |                |
|    approx_kl            | 1.00408215e-08 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -5.68          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0001         |
|    loss                 | 2.17e+12       |
|    n_updates            | 4790           |
|    policy_gradient_loss | -9.71e-07      |
|    reward      

-------------------------------------------
| time/                   |               |
|    fps                  | 491           |
|    iterations           | 486           |
|    time_elapsed         | 2026          |
|    total_timesteps      | 995328        |
| train/                  |               |
|    approx_kl            | 1.0273652e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.68         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0001        |
|    loss                 | 1.97e+12      |
|    n_updates            | 4850          |
|    policy_gradient_loss | -1.65e-06     |
|    reward               | 113130.21     |
|    std                  | 1             |
|    value_loss           | 3.88e+12      |
-------------------------------------------
begin_total_asset:100000
end_total_asset:157853.7280768776
Sharpe:  0.7196904027783227
begin_total_asset:100000
end_tota

<stable_baselines3.ppo.ppo.PPO at 0x1ad1e794c48>

In [25]:
trade = data_split(df, '2022-01-01', '2022-01-09') # '2021-12-20', '2023-01-01'
e_trade_gym = StockPortfolioEnv(df = trade, **env_kwargs)

In [26]:
import torch
%matplotlib inline
import plotly.express as px

In [27]:
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
import pandas as pd
from pypfopt import EfficientFrontier
from pypfopt import expected_returns
from pypfopt import objective_functions
unique_tic = trade.tic.unique()
unique_trade_date = trade.date.unique()

In [39]:
from pyfolio import timeseries

df_daily_return_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=model_ppo, environment=e_trade_gym)
time_ind = pd.Series(df_daily_return_ppo.date)
ppo_cumpod =(df_daily_return_ppo.daily_return + 1).cumprod() - 1
DRL_strat_ppo = convert_daily_return_to_pyfolio_ts(df_daily_return_ppo)

perf_func = timeseries.perf_stats 

perf_stats_all_ppo = perf_func(returns=DRL_strat_ppo, 
                               factor_returns=DRL_strat_ppo, 
                               positions=None, transactions=None, turnover_denom="AGB")

begin_total_asset:100000
end_total_asset:104816.57097641527
Sharpe:  17.621255298064995
hit end!
begin_total_asset:100000
end_total_asset:103857.8987910049
Sharpe:  14.683056946413192
hit end!
begin_total_asset:100000
end_total_asset:103802.51261331959
Sharpe:  15.690859111164913
hit end!
begin_total_asset:100000
end_total_asset:104538.84880694697
Sharpe:  19.298962222094822
hit end!
begin_total_asset:100000
end_total_asset:103648.74320888754
Sharpe:  17.797315989519284
hit end!
begin_total_asset:100000
end_total_asset:106073.68777071327
Sharpe:  23.58452376563577
hit end!
begin_total_asset:100000
end_total_asset:105689.83159249542
Sharpe:  22.98023863621066
hit end!
begin_total_asset:100000
end_total_asset:103840.9803000659
Sharpe:  13.413210819828803
hit end!
begin_total_asset:100000
end_total_asset:104593.19311717042
Sharpe:  18.212104249784414
hit end!
begin_total_asset:100000
end_total_asset:104096.4428924973
Sharpe:  14.48664870429435
hit end!
begin_total_asset:100000
end_total_a