In [1]:
from src.stok.finrl_slim.preprocessing.preprocessors import Preprocessor
from src.stok.finrl_slim.trainer import Trainer
from src.stok.finrl_slim.optimiser import SharpeOptimiser
from src.stok.finrl_slim.env import StockTradingEnv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TRAIN_START_DATE = '2019-01-01'
TRAIN_END_DATE = '2021-10-01'
TRADE_START_DATE = '2021-10-01'
TRADE_END_DATE = '2023-03-01'
TICKER_LIST = ['AAPL']
stock_dims = len(TICKER_LIST)
state_dims = 1 + (2 * stock_dims)

In [3]:
p = Preprocessor(TICKER_LIST, TRAIN_START_DATE, TRAIN_END_DATE, TRADE_START_DATE, TRADE_END_DATE)
train_data, trade_data = p.get_train_test()

loading processed data from cache...


In [4]:
print("TRAIN SHAPE: ", train_data.shape)
print("TRADE SHAPE: ", trade_data.shape)

TRAIN SHAPE:  (693, 18)
TRADE SHAPE:  (353, 18)


In [5]:
# initialise single stock (GOOG) environment
train_env = StockTradingEnv(
    df=train_data,
    stock_dims=1,
    hmax=100,
    initial_amount=10000,
    num_stock_shares=[0],
    buy_cost_pct=[0.001],
    sell_cost_pct=[0.001],
    tech_indicator_list=[],
)
trade_env = StockTradingEnv(
    df=train_data,
    stock_dims=1,
    hmax=100,
    initial_amount=10000,
    num_stock_shares=[0],
    buy_cost_pct=[0.001],
    sell_cost_pct=[0.001],
    tech_indicator_list=[],
    turbulence_threshold=70,
    risk_indicator_col='vix',
)

In [6]:
trainer = Trainer(
    train_env=train_env,
    eval_env=trade_env,
    train_id='trial_1',
    model_name='ppo',
    checkpoint_save_freq=2000,
    use_sb_callbacks=True,
    use_mlflow=False,
)
# optimiser = SharpeOptimiser(
#     study_name='test-optimisation',
#     train_env=train_env,
#     eval_env=trade_env,
#     train_id='trial_1',
#     model_name='ppo',
#     save_freq=250,
# )

In [7]:
trainer.train(total_timesteps=1000)

Using cuda device
Logging to /home/tscelsi/Programming/stok/results/ppo/AAPL/trial_1/log
Registering 2 training callbacks...




Eval num_timesteps=100, episode_reward=0.00 +/- 0.00
Episode length: 693.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 693      |
|    mean_reward     | 0        |
| time/              |          |
|    total_timesteps | 100      |
---------------------------------
New best mean reward!
LogEvalMetricsCallback CALLED!




Eval num_timesteps=200, episode_reward=0.00 +/- 0.00
Episode length: 693.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 693      |
|    mean_reward     | 0        |
| time/              |          |
|    total_timesteps | 200      |
---------------------------------
LogEvalMetricsCallback CALLED!




Eval num_timesteps=300, episode_reward=0.00 +/- 0.00
Episode length: 693.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 693      |
|    mean_reward     | 0        |
| time/              |          |
|    total_timesteps | 300      |
---------------------------------
LogEvalMetricsCallback CALLED!




Eval num_timesteps=400, episode_reward=0.00 +/- 0.00
Episode length: 693.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 693      |
|    mean_reward     | 0        |
| time/              |          |
|    total_timesteps | 400      |
---------------------------------
LogEvalMetricsCallback CALLED!




KeyboardInterrupt: 

In [12]:
from src.stok.finrl_slim.config import RESULTS_DIR
from stable_baselines3.common.logger import configure
tmp_path = RESULTS_DIR + '/ppo'
new_logger_ppo = configure(tmp_path, ["stdout", "csv"])
model_ppo.set_logger(new_logger_ppo)

Logging to results/ppo
