In [None]:
import numpy as np
import pandas as pd

from utils.formulas import net_liability_bs_cfm
from utils.formulas import delta_bs_cfm
from utils.formulas import integral_evaluator
from utils.envs import TradingEnvUnderBSCFM
from utils.agent import DeltaAgentBSCFM, DeltaAgentBSIFM, DeltaAgentHestonCFM, DeltaAgentHestonIFM
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from stable_baselines import PPO2
import pickle

### Test Section

#### Integral Evaluator Test

In [None]:
from scipy.integrate import quad_vec

In [None]:
# compare with quad_vec
pts = np.linspace(0, 2, 100)
func = lambda s: s ** pts
quad_result = quad_vec(func, 0, 1)[0]

In [None]:
# result from integral evaluator
int_eval_result = integral_evaluator(func, 0, 1)

In [None]:
# visualization
plt.plot(quad_result, label='Quad_Package')
plt.plot(int_eval_result, label='Evaluator')
plt.legend()
plt.show()

In [None]:
# what is the max norm of the difference?
np.abs(np.max(quad_result - int_eval_result))

In [None]:
# running time for quad_vec
time_start_quad = time.time()
for _ in range(1000):
    quad_result = quad_vec(func, 0, 1)[0]
time_end_quad = time.time()
print('time taken by quad_vec: ', time_end_quad - time_start_quad)

In [None]:
# running time for integral evaluator
time_start_int = time.time()
for _ in range(1000):
    int_result = integral_evaluator(func, 0, 1)
time_end_int = time.time()
print('time taken by integral evaluator: ', time_end_int - time_start_int)

#### Net Liability Calculator and Delta Calculator Test

In [None]:
Ft = np.array([80, 90, 100, 150, 200])
t = np.array([1, 1, 1, 1, 1])
net_liability_bs_cfm(Ft, t, 100, 100, 0.03, 0.03, 0.03 * 0.95, 1, 0.02, 0.1)

In [None]:
Ft = np.array([80, 90, 100, 150, 200])
Ft

In [None]:
Ft = np.array([80, 90, 100, 150, 200])
t = np.array([0, 0, 0, 0, 0])
delta_bs_cfm(Ft, t, 100, 100, 0.0, 0.0, 0.0, 1, 0.02, 0.1, 1)

### Baseline Evaluation Section

#### Trading Environment Setup

In [None]:
S0 = 100
mu = 0.08
sigma = 0.2
num_steps = 252
r = 0.02
T = 1
N0 = 1
Gm = 100
Gd = 100
rho = 1.19
fom = 0.02
financial_market_params = {'S0': S0, 'r': r, 'mu': mu, 'sigma': sigma, 'num_steps': num_steps}
actuarial_market_params = {'N0': N0, 'Gm': Gm, 'Gd': Gd, 'rho': rho, 'fom': fom, 'T': T}

In [None]:
n_episode = 5000
env = TradingEnvUnderBSCFM(financial_market_params, actuarial_market_params, reward_type='evaluation')

#### Performance of Delta

In [None]:
%%time
pl_delta = []
for _ in tqdm(range(n_episode)):
    env.seed(_)
    env.reset()
    Ft_traj = env._Ft_traj
    timeline = np.arange(env._total_steps + 1) * env._dt
    delta_agent = DeltaAgentBSCFM(Ft_traj, timeline, Gm, Gd, fom, env._m, env._m * 0.95, T, r, sigma, rho)
    done = False
    t_idx = 0
    while not done:
        action = delta_agent.predict(t_idx)
        obs, reward, done, info = env.step(action)
        t_idx += 1
    pl_delta.append(reward)
pl_delta = np.array(pl_delta)

In [None]:
# a_file = open("./data_delta_bs_cfm.pkl", "wb")
# pickle.dump(pl_delta, a_file)
# a_file.close()

#### Performance of RL Agent

In [None]:
eval_model_id = './pilot_model_trial_1.zip'
model = PPO2.load(eval_model_id)

In [None]:
%%time
pl_rl = []
for _ in tqdm(range(n_episode)):
    env.seed(_)
    obs = env.reset()
    done = False
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
    pl_rl.append(reward)
pl_rl = np.array(pl_rl)

In [None]:
# a_file = open("./data_rl.pkl", "wb")
# pickle.dump(pl_rl, a_file)
# a_file.close()

#### Performance of Misspecified Deltas

##### BS + IFM

In [None]:
%%time
pl_delta = []
for _ in tqdm(range(n_episode)):
    env.seed(_)
    env.reset()
    Ft_traj = env._Ft_traj
    timeline = np.arange(env._total_steps + 1) * env._dt
    delta_agent = DeltaAgentBSIFM(Ft_traj, timeline, Gm, Gd, 50, env._m, env._m * 0.95, T, r, sigma, rho)
    done = False
    t_idx = 0
    while not done:
        action = delta_agent.predict(t_idx)
        obs, reward, done, info = env.step(action)
        t_idx += 1
    pl_delta.append(reward)
pl_delta = np.array(pl_delta)

In [None]:
# a_file = open("./data_delta_bs_ifm.pkl", "wb")
# pickle.dump(pl_delta, a_file)
# a_file.close()

##### Heston + CFM

In [None]:
%%time
pl_delta = []
for _ in tqdm(range(n_episode)):
    env.seed(_)
    obs = env.reset()
    delta_agent = DeltaAgentHestonCFM(Gm, Gd, fom, env._m, env._m * 0.95, T, r, sigma, rho,
                                  0.2, 0.04, 0.1, -0.5, rho * S0)
    done = False
    while not done:
        Ft = np.exp(obs[0])
        t = T - obs[-1]
        action = delta_agent.predict(Ft, t)
        obs, reward, done, info = env.step(action)
    pl_delta.append(reward)
pl_delta = np.array(pl_delta)

In [None]:
# a_file = open("./data_delta_heston_cfm.pkl", "wb")
# pickle.dump(pl_delta, a_file)
# a_file.close()

##### Heston + IFM

In [None]:
%%time
pl_delta = []
for _ in tqdm(range(n_episode)):
    env.seed(_)
    obs = env.reset()
    delta_agent = DeltaAgentHestonIFM(Gm, Gd, 50, env._m, env._m * 0.95, T, r, sigma, rho,
                                  0.2, 0.04, 0.1, -0.5, rho * S0)
    done = False
    while not done:
        Ft = np.exp(obs[0])
        t = T - obs[-1]
        action = delta_agent.predict(Ft, t)
        obs, reward, done, info = env.step(action)
    pl_delta.append(reward)
pl_delta = np.array(pl_delta)

In [None]:
# a_file = open("./data_delta_heston_ifm.pkl", "wb")
# pickle.dump(pl_delta, a_file)
# a_file.close()