In [110]:
import random

import gym
import numpy as np
import pandas as pd
import torch
import vectorbtpro as vbt
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

from vctr.data.data_loader import get_data
from vctr.features.feature_engineering import add_features


In [121]:
from typing import Optional, Tuple

import gym
import numpy as np
from gym import spaces


class TradingEnvironment(gym.Env):
    def __init__(self, data, initial_balance):
        self.initial_balance = initial_balance
        self.data = data
        self.signals = np.zeros((3, len(data)))  # buy, sell, hold signals
        self.current_step = 0
        self.position = 0  # current position (number of shares)
        self.cash = initial_balance

        # Define the action space: 0 - buy, 1 - hold, 2 - sell
        self.action_space = spaces.Discrete(3)

        # Define the observation space
        self.features = data.drop(['close'], axis=1).columns
        num_features = len(self.features) + 2  # Adding 2 for cash and position
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_features,), dtype=np.float32)

    def step(self, action):
        self.current_step += 1

        prev_portfolio_value = self.cash + self.position * self.data.iloc[self.current_step - 1]['close']

        if action == 0:  # buy
            self.signals[0, self.current_step] = 1
            shares_to_buy = self.cash // self.data.iloc[self.current_step]['close']
            self.position += shares_to_buy
            self.cash -= shares_to_buy * self.data.iloc[self.current_step]['close']
        elif action == 2:  # sell
            self.signals[1, self.current_step] = 1
            self.cash += self.position * self.data.iloc[self.current_step]['close']
            self.position = 0

        current_portfolio_value = self.cash + self.position * self.data.iloc[self.current_step]['close']

        obs = self._next_observation()
        reward = current_portfolio_value - prev_portfolio_value
        done = self.current_step >= len(self.data) - 1
        truncated = False
        info = {}

        return obs, reward, done, truncated, info

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[np.ndarray, dict]:
        self.current_step = 0
        self.done = False
        self.position = 0
        self.cash = self.initial_balance

        if seed:
            self.seed(seed)

        obs = self._next_observation()
        return obs, {'options': options}

    def _next_observation(self):
        obs = np.hstack(
            (
                self.data.iloc[self.current_step].drop(['close']).values,
                [self.cash / self.initial_balance],
                [self.position],
            )
        )
        return obs.reshape(1, -1)


##### **Training**

In [122]:
# Load a training dataset
data = get_data('FTM', '15m')
data = add_features(data)

# Set the initial balance
initial_balance = 10000

# Create the trading environment with the training data
train_env = TradingEnvironment(data, initial_balance)
train_env = DummyVecEnv([lambda: train_env])

# Create the PPO model
policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[64, 64])
model = PPO("MlpPolicy", train_env, verbose=1, policy_kwargs=policy_kwargs)

# Train the PPO model
model.learn(total_timesteps=20000)

# Save the trained PPO model
model.save("ppo_trading_model")


Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2581 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 2207       |
|    iterations           | 2          |
|    time_elapsed         | 1          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.64862895 |
|    clip_fraction        | 0.0911     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0335    |
|    explained_variance   | -12.2      |
|    learning_rate        | 0.0003     |
|    loss                 | 9.27e+03   |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.0189     |
|    value_loss           | 2.27e+04   |
----------------------------------------
-----------------------------------

##### **Testing**

In [123]:
initial_balance = 100 * 1000


def vectorbt_backtest(data, signals):
    entries, exits = pd.Series(signals[0]).astype(bool), pd.Series(signals[1]).astype(bool)

    portfolio = vbt.Portfolio.from_signals(
        data['close'],  # price data
        entries,
        exits,
        init_cash=initial_balance,
        fees=0.001,  # 0.1% fees per trade
        freq='15t',
    )
    return portfolio


In [124]:
# Load a new dataset for testing
data = get_data('FTM', '15m')
data = add_features(data)

# Create a new trading environment with the test data
test_env = TradingEnvironment(data, initial_balance)

# Load the saved model
trained_model = PPO.load('ppo_trading_model')

obs = test_env.reset()[0]
done = False

while not done:
    action, _ = trained_model.predict(obs)
    obs, reward, done, _, info = test_env.step(action)
    obs = np.array(obs).reshape((1, -1))  # reshape obs after each step

# Perform backtesting with vectorbt and analyze the performance
vectorbt_portfolio = vectorbt_backtest(data, test_env.signals)

# Calculate the performance metrics and plot the portfolio value as shown in the previous response

In [125]:
vectorbt_portfolio.stats()

Start                         2022-01-03 00:00:00
End                           2023-03-17 11:00:00
Period                          432 days 09:30:00
Start Value                              100000.0
Min Value                             6732.637538
Max Value                           136990.812153
End Value                            19088.047516
Total Return [%]                       -80.911952
Benchmark Return [%]                   -80.931806
Total Time Exposure [%]                 99.997591
Max Gross Exposure [%]                      100.0
Max Drawdown [%]                        95.085336
Max Drawdown Duration           418 days 09:15:00
Total Orders                                    1
Total Fees Paid                           99.9001
Total Trades                                    1
Win Rate [%]                                  NaN
Best Trade [%]                                NaN
Worst Trade [%]                               NaN
Avg Winning Trade [%]                         NaN
