# Basic experiment

In [6]:
from LocalVolEnv import LocalVol
import gymnasium as gym # type: ignore
from stable_baselines3 import PPO # type: ignore
from stable_baselines3.common.evaluation import evaluate_policy # type: ignore
from stable_baselines3.common.vec_env import DummyVecEnv # type: ignore
from stable_baselines3.common.env_checker import check_env # type: ignore
from stable_baselines3.common.monitor import Monitor # type: ignore
import numpy as np # type: ignore
import random
import os

In [9]:
# Low volatility environment for testing the PPO model on a Local Volatility model with Black-Scholes dynamics

Dynamics  = 'BS'
star_time = 0
T = 1
dT = 1/252
r = 0
mu = [0.05,-0.02]
sigma = [0.01,0.01]
P = [[0.9,0.0,0.0,0.1],[0.9,0.0,0.0,0.1],[0.5,0.0,0.0,0.5],[0.5,0.0,0.0,0.5]]
cuda = True #Use cuda device and larger network architecture (3 layers, 256 neurons per layer) and larger batch size

LVol = LocalVol(Dynamics = Dynamics, T = T, dT = dT, mu = mu, sigma = sigma, P = P)
LVol.seed(seed=random.seed(10))

env = gym.wrappers.TimeLimit(LVol, max_episode_steps=T)
env = Monitor(env, allow_early_resets=True)

steps = 1000000

base_path = os.path.dirname(os.getcwd()) 
path_folder = os.path.join(base_path,'Optimal-Trading-Frequency','BS_PPO') # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"

for k in range(len(mu)):
    path += f"_mu[{str(k)}]={str(int(mu[k]*100))}_P[{str(k)}]={str(int(P[0][k]*100))}_sigma[{str(k)}]={str(int(sigma[k]*100))}"

if cuda:
    path += 'cuda'

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=False)

# Print best action for each signal
print(mu[0],model.predict([0], deterministic = True))
print(mu[1],model.predict([2], deterministic = True))


0.05 (array([14]), None)
-0.02 (array([6]), None)


In [8]:
# Mid volatility environment for testing the PPO model on a Local Volatility model with Black-Scholes dynamics

Dynamics  = 'BS'
star_time = 0
T = 1
dT = 1/252
r = 0
mu = [0.05,-0.02]
sigma = [0.3,0.3]
P = [[0.9,0.0,0.0,0.1],[0.9,0.0,0.0,0.1],[0.5,0.0,0.0,0.5],[0.5,0.0,0.0,0.5]]
cuda = True #Use cuda device and larger network architecture (3 layers, 256 neurons per layer) and larger batch size

LVol = LocalVol(Dynamics = Dynamics, T = T, dT = dT, mu = mu, sigma = sigma, P = P)
LVol.seed(seed=random.seed(10))

env = gym.wrappers.TimeLimit(LVol, max_episode_steps=T)
env = Monitor(env, allow_early_resets=True)

steps = 1000000

base_path = os.path.dirname(os.getcwd()) 
path_folder = os.path.join(base_path,'Optimal-Trading-Frequency','BS_PPO') # PATH to the BS_PPO_Models folder
path = f"{path_folder}/BS_PPO_{str(steps)}_n_regimes_{str(len(mu))}"

for k in range(len(mu)):
    path += f"_mu[{str(k)}]={str(int(mu[k]*100))}_P[{str(k)}]={str(int(P[0][k]*100))}_sigma[{str(k)}]={str(int(sigma[k]*100))}"

if cuda:
    path += 'cuda'

model = PPO.load(path, env = DummyVecEnv([lambda: env]), print_system_info=False)


# Print best action for each signal
print(mu[0],model.predict([0], deterministic = True))
print(mu[1],model.predict([3], deterministic = True))

0.05 (array([14]), None)
-0.02 (array([13]), None)


In [4]:
# Run experiment

Nepisodes = 100
rew = []
act = []
tradingtimes = []

for i in range(Nepisodes):
    obs = env.reset()
    obs = obs[0]
    cont = True
    i = 0
    act.append([])
    tradingtimes.append([])
    reward_episode = 0
    while cont:
        action = model.predict([obs], deterministic = True)
        obs, reward, terminated, truncated, info = LVol.step(action[0][0])
        act[-1].append(action)
        reward_episode += reward
        i += 1
        if any([terminated, truncated]):
            cont = False
            tradingtimes[-1].append(env.unwrapped.tradingtimes)
            rew.append(reward_episode)

print(np.mean(rew),np.std(rew))

0.00013508578689874575 3.516655516800691e-05


In [5]:
print(env.unwrapped.MP)
print(env.unwrapped.P)

[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],

In [46]:
env.unwrapped.MP = [[0,0]]
for i in range(1,env.unwrapped.M+1):
    q = env.unwrapped.P[env.unwrapped.MP[i-1][0]*env.unwrapped.MP[i-1][1] + env.unwrapped.MP[i-1][1]] # transition probabilities
    U = env.unwrapped.np_random.uniform() # random number
    j = 0 # state index
    while U > 0 and j <= env.unwrapped.N**2: # transition to the j-th state
        if U < sum(q[:j+1])-sum(q[:j]):
            print(U,sum(q[:j+1]),sum(q[:j]))
            U = U - sum(q[:j+1])
            print(U)
        else:
            j += 1
    if j < env.unwrapped.N**2:
        print(i,j,U,sum(q[:j]),env.unwrapped.MP[i-1])
        print(j // env.unwrapped.N, j % env.unwrapped.N)
        env.unwrapped.MP.append([j // env.unwrapped.N, j % env.unwrapped.N])
        print(env.unwrapped.MP[i])
    else:
        print(i,j,U,sum(q[:j]),env.unwrapped.MP[i-1])
        env.unwrapped.MP.append([env.unwrapped.N-1,env.unwrapped.N-1])
        print(env.unwrapped.MP[i])
            
eps = [dT*(mu[env.unwrapped.MP[i][0]]+env.unwrapped.np_random.normal(0,sigma[env.unwrapped.MP[i][1]]**2)) for i in range(env.unwrapped.M+1)] # log returns increments

0.1221440456376146 0.9 0
-0.7778559543623854
1 0 -0.7778559543623854 0 [0, 0]
0 0
[0, 0]
0.7978674900512754 0.9 0
-0.10213250994872458
2 0 -0.10213250994872458 0 [0, 0]
0 0
[0, 0]
0.4307094090501098 0.9 0
-0.46929059094989023
3 0 -0.46929059094989023 0 [0, 0]
0 0
[0, 0]
0.7369132612390678 0.9 0
-0.16308673876093227
4 0 -0.16308673876093227 0 [0, 0]
0 0
[0, 0]
0.7155188131395103 0.9 0
-0.18448118686048975
5 0 -0.18448118686048975 0 [0, 0]
0 0
[0, 0]
0.4813086600208363 0.9 0
-0.41869133997916375
6 0 -0.41869133997916375 0 [0, 0]
0 0
[0, 0]
0.897945447373759 0.9 0
-0.0020545526262409908
7 0 -0.0020545526262409908 0 [0, 0]
0 0
[0, 0]
0.19415635739543746 0.9 0
-0.7058436426045626
8 0 -0.7058436426045626 0 [0, 0]
0 0
[0, 0]
0.3555295767364933 0.9 0
-0.5444704232635067
9 0 -0.5444704232635067 0 [0, 0]
0 0
[0, 0]
0.8611077847338227 0.9 0
-0.038892215266177366
10 0 -0.038892215266177366 0 [0, 0]
0 0
[0, 0]
0.31304878474960374 0.9 0
-0.5869512152503963
11 0 -0.5869512152503963 0 [0, 0]
0 0
[0, 0

In [48]:
print(env.unwrapped.M)
print(env.unwrapped.N)
print(env.unwrapped.MP)
print(q)
print(U)

252
2
[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [1, 1], [