# Basic experiment

In [1]:
from LocalVolEnv import LocalVol
import gymnasium as gym # type: ignore
from stable_baselines3 import PPO # type: ignore
from stable_baselines3.common.evaluation import evaluate_policy # type: ignore
from stable_baselines3.common.vec_env import DummyVecEnv # type: ignore
from stable_baselines3.common.env_checker import check_env # type: ignore
from stable_baselines3.common.monitor import Monitor # type: ignore
import numpy as np # type: ignore
import random
import os




In [98]:
# Low volatility environment for testing the PPO model on a Local Volatility model with Black-Scholes dynamics

Dynamics  = 'BS'
star_time = 0
T = 1
dT = 1/252
r = 0
mu = [0.05,-0.02]
sigma = [0.01,0.01]
P = [[0.9,0.0,0.0,0.1],[0.9,0.0,0.0,0.1],[0.5,0.0,0.0,0.5],[0.5,0.0,0.0,0.5]]
cuda = True #Use cuda device and larger network architecture (3 layers, 256 neurons per layer) and larger batch size

LVol = LocalVol(Dynamics = Dynamics, T = T, dT = dT, mu = mu, sigma = sigma, P = P)
LVol.seed(seed=random.seed(10))

env = gym.wrappers.TimeLimit(LVol, max_episode_steps=T)
env = Monitor(env, allow_early_resets=True)

steps = 1000000

base_path = os.path.dirname(os.getcwd()) 
path_folder = os.path.join(base_path,'Optimal-Trading-Frequency','BS_PPO') # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"

for k in range(len(mu)):
    path += f"_mu[{str(k)}]={str(int(mu[k]*100))}_P[{str(k)}]={str(int(P[0][k]*100))}_sigma[{str(k)}]={str(int(sigma[k]*100))}"

if cuda:
    path += 'cuda'

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=False)

# Print best action for each signal
print(mu[0],model.predict([0], deterministic = True))
print(mu[1],model.predict([2], deterministic = True))


0.05 (array([2]), None)
-0.02 (array([12]), None)


In [97]:
# Mid volatility environment for testing the PPO model on a Local Volatility model with Black-Scholes dynamics

Dynamics  = 'BS'
star_time = 0
T = 1
dT = 1/252
r = 0
mu = [0.05,-0.02]
sigma = [0.1,0.1]
P = [[0.9,0.0,0.0,0.1],[0.9,0.0,0.0,0.1],[0.5,0.0,0.0,0.5],[0.5,0.0,0.0,0.5]]
cuda = True #Use cuda device and larger network architecture (3 layers, 256 neurons per layer) and larger batch size

LVol = LocalVol(Dynamics = Dynamics, T = T, dT = dT, mu = mu, sigma = sigma, P = P)
LVol.seed(seed=random.seed(10))

env = gym.wrappers.TimeLimit(LVol, max_episode_steps=T)
env = Monitor(env, allow_early_resets=True)

steps = 1000000

base_path = os.path.dirname(os.getcwd()) 
path_folder = os.path.join(base_path,'Optimal-Trading-Frequency','BS_PPO') # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"

for k in range(len(mu)):
    path += f"_mu[{str(k)}]={str(int(mu[k]*100))}_P[{str(k)}]={str(int(P[0][k]*100))}_sigma[{str(k)}]={str(int(sigma[k]*100))}"

if cuda:
    path += 'cuda'

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=False)


# Print best action for each signal
print(mu[0],model.predict([0], deterministic = True))
print(mu[1],model.predict([2], deterministic = True))

0.05 (array([5]), None)
-0.02 (array([11]), None)


In [94]:
# Run experiment

Nepisodes = 100
rew = []
act = []
tradingtimes = []

for i in range(Nepisodes):
    obs = env.reset()
    obs = obs[0]
    cont = True
    i = 0
    act.append([])
    tradingtimes.append([])
    reward_episode = 0
    while cont:
        action = model.predict([obs], deterministic = True)
        obs, reward, terminated, truncated, info = LVol.step(action[0][0])
        act[-1].append(action)
        reward_episode += reward
        i += 1
        if any([terminated, truncated]):
            cont = False
            tradingtimes[-1].append(env.unwrapped.tradingtimes)
            rew.append(reward_episode)

print(np.mean(rew),np.std(rew))

1.973001648523032e-05 9.186758404222063e-06
