# Basic experiment

In [1]:
from LocalVolEnv import LocalVol
import gymnasium as gym # type: ignore
from stable_baselines3 import PPO # type: ignore
from stable_baselines3.common.evaluation import evaluate_policy # type: ignore
from stable_baselines3.common.vec_env import DummyVecEnv # type: ignore
from stable_baselines3.common.env_checker import check_env # type: ignore
from stable_baselines3.common.monitor import Monitor # type: ignore
import numpy as np # type: ignore
import random
import os




In [7]:
N = 10
Dynamics  = 'BS'
star_time = 0
T = 1
dT = 1/252
r = 0
mu = [0.05,-0.02]
sigma = [0.1,0.1]
P = [[0.9,0.0,0.0,0.1],[0.9,0.0,0.0,0.1],[0.5,0.0,0.0,0.5],[0.5,0.0,0.0,0.5]]
cuda = True #Use cuda device and larger network architecture (3 layers, 256 neurons per layer) and larger batch size

LVol = LocalVol(Dynamics = Dynamics, T = T, dT = dT, mu = mu, sigma = sigma, P = P)
LVol.seed(seed=random.seed(10))

env = gym.wrappers.TimeLimit(LVol, max_episode_steps=T)
env = Monitor(env, allow_early_resets=True)

steps = 1000000

base_path = os.path.dirname(os.getcwd()) 
path_folder = os.path.join(base_path,'Optimal-Trading-Frequency','BS_PPO') # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"

for k in range(len(mu)):
    path += f"_mu[{str(k)}]={str(int(mu[k]*100))}+P[{str(k)}]={str(int(P[0][k]*100))}"

if cuda:
    path += 'cuda'

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=True)

Nepisodes = 100
rew = []
act = []
tradingtimes = []

for i in range(Nepisodes):
    obs = env.reset()
    obs = obs[0]
    cont = True
    i = 0
    act.append([])
    tradingtimes.append([])
    reward_episode = 0
    while cont:
        action = model.predict([obs], deterministic = True)
        obs, reward, terminated, truncated, info = LVol.step(action[0][0])
        act[-1].append(action)
        reward_episode += reward
        i += 1
        if any([terminated, truncated]):
            cont = False
            tradingtimes[-1].append(env.unwrapped.tradingtimes)
            rew.append(reward_episode)

print(np.mean(rew),np.std(rew))

== CURRENT SYSTEM INFO ==
- OS: Windows-11-10.0.22631-SP0 10.0.22631
- Python: 3.12.9
- Stable-Baselines3: 2.6.0
- PyTorch: 2.6.0+cu126
- GPU Enabled: True
- Numpy: 2.1.3
- Cloudpickle: 3.1.1
- Gymnasium: 1.1.1
- OpenAI Gym: 0.22.0

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-11-10.0.22631-SP0 10.0.22631
- Python: 3.12.9
- Stable-Baselines3: 2.6.0
- PyTorch: 2.6.0+cu126
- GPU Enabled: True
- Numpy: 2.1.3
- Cloudpickle: 3.1.1
- Gymnasium: 1.1.1
- OpenAI Gym: 0.22.0

1.9887029727417184e-05 9.634641353624996e-06


In [17]:
obs = env.observation_space.sample()
print(0,model.predict([0], deterministic = True))
print(1,model.predict([1], deterministic = True))
print(2,model.predict([2], deterministic = True))
print(3,model.predict([3], deterministic = True))

0 (array([5]), None)
1 (array([0]), None)
2 (array([11]), None)
3 (array([0]), None)


In [6]:
steps = 100000

path_folder = f"C:/Users/yoshi/OneDrive/Desktop/Research/Optimal Trading Frequency/Optimal-Trading-Frequency/BS_PPO" # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"
for k in range(len(mu)):
    path += f"mu[{str(k)}]_mu{str(int(mu[k]*100))}"

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=True)

== CURRENT SYSTEM INFO ==
- OS: Windows-11-10.0.26100-SP0 10.0.26100
- Python: 3.12.9
- Stable-Baselines3: 2.3.2
- PyTorch: 2.6.0+cpu
- GPU Enabled: False
- Numpy: 2.0.2
- Cloudpickle: 3.1.1
- Gymnasium: 0.29.1
- OpenAI Gym: 0.26.2

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-11-10.0.26100-SP0 10.0.26100
- Python: 3.12.9
- Stable-Baselines3: 2.3.2
- PyTorch: 2.6.0+cpu
- GPU Enabled: False
- Numpy: 2.0.2
- Cloudpickle: 3.1.1
- Gymnasium: 0.29.1
- OpenAI Gym: 0.26.2



In [31]:
Nepisodes = 100
rew = []
act = []
tradingtimes = []

for i in range(Nepisodes):
    obs = env.reset()
    obs = obs[0]
    
    #obs = [[obs[0][i] for i in range(len(obs[0]))]]
    cont = True
    i = 0
    act.append([])
    tradingtimes.append([])
    reward_episode = 0
    while cont:
        action = model.predict([obs], deterministic = True)
        obs, reward, terminated, truncated, info = LVol.step(action[0][0])
        act[-1].append(action)
        reward_episode += reward
        i += 1
        if any([terminated, truncated]):
            cont = False
            tradingtimes[-1].append(env.unwrapped.tradingtimes)
            rew.append(reward_episode)

print(np.mean(rew),np.std(rew))


0.0005033821799006766 6.204861222379161e-05


In [32]:
print(act)

[[(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([250]), None)], [(array([250]), None), (array([25