# **Deep Hedging**
# Buchkov Viacheslav

In [1]:
!pip install stable_baselines3
!pip install sb3-contrib

In [2]:
import warnings
from pathlib import Path

from deep_hedging import ExperimentConfig, EuropeanCall, seed_everything
from deep_hedging.rl import DerivativeEnvStep, RLTrainer

from sb3_contrib import RecurrentPPO
from stable_baselines3 import SAC, PPO

RANDOM_SEED = 12

In [3]:
config = ExperimentConfig(
    RANDOM_SEED=RANDOM_SEED, DATA_ROOT=Path("data"), OUTPUT_ROOT=Path(".")
)
seed_everything(RANDOM_SEED)

config.DEVICE

In [4]:
config

## Environment.

In [5]:
env = DerivativeEnvStep(n_days=config.N_DAYS, instrument_cls=EuropeanCall)
env.reset()

## Training.

In [6]:
warnings.filterwarnings("ignore", category=UserWarning)

trainer = RLTrainer(
    model=RecurrentPPO("MlpLstmPolicy", env, verbose=1),
    instrument_cls=EuropeanCall,
    environment_cls=DerivativeEnvStep,
    config=config,
)
trainer.learn(10_000)

In [7]:
trainer.assess(3_000)