# **Deep Hedging**
# Buchkov Viacheslav

In [None]:
import abc
import math
import os
import random
import sys
from functools import lru_cache
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm

# You may add any imports you need
from torch.cuda.amp import GradScaler

RANDOM_SEED = 12

In [None]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


seed_everything(RANDOM_SEED)

In [None]:
N_DAYS = 5

## Getting spot data.

In [None]:
from google.colab import drive

drive.mount("/content/gdrive", force_remount=True)

ROOT_PATH = Path("dataset")
PATH = Path("/content/gdrive/MyDrive/")

Mounted at /content/gdrive


In [None]:
!unzip -q '/content/gdrive/MyDrive/quotes_USDRUB_resample.zip' -d './'

In [None]:
!unzip -q '/content/gdrive/MyDrive/quotes_EURRUB_resample.zip' -d './'

In [None]:
usdrub = pd.read_csv("quotes_USDRUB_resample_1min.csv")
usdrub["timestamp"] = pd.to_datetime(usdrub["timestamp"])
usdrub = usdrub.set_index("timestamp")
usdrub.head()

Unnamed: 0_level_0,Bid,Ask
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-03 07:00:00+00:00,60.97376,61.0165
2017-01-03 07:01:00+00:00,61.010855,61.068045
2017-01-03 07:02:00+00:00,61.03722,61.06985
2017-01-03 07:03:00+00:00,61.011955,61.04869
2017-01-03 07:04:00+00:00,61.02125,61.052835


In [None]:
eurrub = pd.read_csv("quotes_EURRUB_resample_1min.csv")
eurrub["timestamp"] = pd.to_datetime(eurrub["timestamp"])
eurrub = eurrub.set_index("timestamp")
eurrub.head()

Unnamed: 0_level_0,Bid,Ask
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-03 07:00:00+00:00,63.90315,64.34355
2017-01-03 07:01:00+00:00,63.881855,64.289995
2017-01-03 07:02:00+00:00,63.914725,65.362045
2017-01-03 07:03:00+00:00,63.8792,64.28493
2017-01-03 07:04:00+00:00,63.88679,64.03944


In [None]:
tol = pd.Timedelta('1 min')
spot = pd.merge_asof(left=usdrub, right=eurrub, right_index=True, left_index=True, direction='nearest', tolerance=tol)

In [None]:
spot.head()

Unnamed: 0_level_0,Bid_x,Ask_x,Bid_y,Ask_y
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-03 07:00:00+00:00,60.97376,61.0165,63.90315,64.34355
2017-01-03 07:01:00+00:00,61.010855,61.068045,63.881855,64.289995
2017-01-03 07:02:00+00:00,61.03722,61.06985,63.914725,65.362045
2017-01-03 07:03:00+00:00,61.011955,61.04869,63.8792,64.28493
2017-01-03 07:04:00+00:00,61.02125,61.052835,63.88679,64.03944


In [None]:
spot.index.min(), spot.index.max()

(Timestamp('2017-01-03 07:00:00+0000', tz='UTC'),
 Timestamp('2024-04-30 15:51:00+0000', tz='UTC'))

## Getting rates data.

In [None]:
rub_rates = pd.read_csv("/content/gdrive/MyDrive/dynamic.csv", sep=";")

rub_rates["timestamp"] = pd.to_datetime(rub_rates["tradedate"] + " " + rub_rates["tradetime"], format="%d.%m.%Y %H:%M:%S")
rub_rates = rub_rates.set_index("timestamp")
rub_rates.drop(["tradedate", "tradetime"], axis=1, inplace=True)

rub_rates = rub_rates.apply(lambda row: row.str.replace(",", "."))
rub_rates = rub_rates.astype(float)
rub_rates.head()

Unnamed: 0_level_0,B1,B2,B3,T1,G1,G2,G3,G4,G5,G6,G7,G8,G9
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2014-01-06 12:21:16,877.951361,-311.324633,51.105265,4.836731,0.0,0.0,-0.23543,-0.602083,-0.72534,-0.341294,0.683989,0.0,0.0
2014-01-08 12:41:22,879.619947,-312.611788,51.560662,4.824178,-0.005661,-0.005661,-0.214698,-1.64129,-4.821431,0.32879,3.391207,0.0,0.0
2014-01-09 18:38:19,876.971884,-320.83162,56.936812,4.448947,0.065004,-3.996982,-4.358512,0.140076,0.524525,4.56052,1.350378,0.0,0.0
2014-01-10 18:38:17,875.118031,-329.716005,61.089853,4.148542,0.018092,-2.428168,-3.204718,0.605912,2.373884,3.420259,0.019879,0.0,0.0
2014-01-13 18:37:26,878.212981,-325.573772,60.784188,4.195013,2.689991,3.672235,-8.583068,-1.823289,-0.645657,1.413068,-0.751966,0.0,0.0


In [None]:
usd_rates = pd.read_csv("/content/gdrive/MyDrive/feds200628.csv")
usd_rates["timestamp"] = pd.to_datetime(usd_rates["Date"])
usd_rates.drop(["Date"], axis=1, inplace=True)
usd_rates = usd_rates.set_index("timestamp")
usd_rates.head()

Unnamed: 0_level_0,BETA0,BETA1,BETA2,BETA3,SVEN1F01,SVEN1F04,SVEN1F09,SVENF01,SVENF02,SVENF03,...,SVENY23,SVENY24,SVENY25,SVENY26,SVENY27,SVENY28,SVENY29,SVENY30,TAU1,TAU2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961-06-14,3.917606,-1.277955,-1.949397,0.0,3.8067,3.9562,,3.5492,3.8825,3.9149,...,,,,,,,,,0.339218,-999.99
1961-06-15,3.978498,-1.257404,-2.247617,0.0,3.8694,4.0183,,3.5997,3.946,3.9763,...,,,,,,,,,0.325775,-999.99
1961-06-16,3.98435,-1.429538,-1.885024,0.0,3.8634,4.0242,,3.5957,3.9448,3.9811,...,,,,,,,,,0.348817,-999.99
1961-06-19,4.004379,-0.723311,-3.310743,0.0,3.9196,4.0447,,3.6447,3.9842,4.0035,...,,,,,,,,,0.282087,-999.99
1961-06-20,3.985789,-0.900432,-2.844809,0.0,3.8732,4.0257,,3.5845,3.9552,3.984,...,,,,,,,,,0.310316,-999.99


In [None]:
class YieldCurve:
    TARGET_COLUMN = "ytm"
    DISCOUNT_FACTOR_COLUMN = "discount_factor"
    FWD_RATE_COLUMN = "fwd_rate"

    def __init__(self, initial_terms: np.array, *args, **kwargs) -> None:
        self._rates_df = None
        self._discount_factors = None
        self._instant_fwd_rate = None

        self._initialize(initial_terms)

    def _initialize(self, terms: np.array) -> None:
        self.create_curve(terms=terms)

    @abc.abstractmethod
    def get_rates(self, terms: list[float]) -> np.array:
        pass

    def create_curve(self, terms: list[float]) -> None:
        self._rates_df = pd.DataFrame(self.get_rates(terms), index=terms, columns=[self.TARGET_COLUMN])
        # self._create_discount_factors()
        # self._create_instant_fwd_rates()

    def _create_discount_factors(self) -> pd.DataFrame:
        if self._rates_df is None:
            raise ValueError("Rate data is not fitted yet!")
        discount_factors = np.exp(-self._rates_df[self.TARGET_COLUMN] * self._rates_df.index)
        self._discount_factors = pd.DataFrame(discount_factors, index=self._rates_df.index,
                                              columns=[self.DISCOUNT_FACTOR_COLUMN])
        return self._discount_factors

    def _create_instant_fwd_rates(self) -> pd.DataFrame:
        if self._discount_factors is None:
            raise ValueError("Discount factor data is not fitted yet!")

        t_old = self._rates_df.index[0]
        instant_fwd_rates = []
        for t in self._rates_df.index[1:]:
            dt = t - t_old
            instant_fwd_rates.append(-1 / dt * (np.log(
                self._discount_factors.loc[t, self.DISCOUNT_FACTOR_COLUMN] / self._discount_factors.loc[
                    t_old, self.DISCOUNT_FACTOR_COLUMN])))
            t_old = t
        self._instant_fwd_rate = pd.DataFrame(instant_fwd_rates, index=self._rates_df.index[1:],
                                              columns=[self.FWD_RATE_COLUMN])
        return self._instant_fwd_rate

    @property
    def curve_df(self) -> pd.DataFrame:
        if self._rates_df is None:
            raise ValueError("Rate data is not fitted yet! Call .create_curve() first.")
        return self._rates_df

    @property
    def discount_factors_df(self) -> pd.DataFrame:
        return self._discount_factors

    @property
    def instant_fwd_rates_df(self) -> pd.DataFrame:
        return self._instant_fwd_rate

    @staticmethod
    def _find_point(curve: pd.DataFrame, term: float) -> float:
        index = np.absolute(curve.index - term).argmin()
        return curve.iloc[index].values[0]

    @lru_cache(maxsize=None)
    def get_rate(self, term: float) -> float:
        return self._find_point(self._rates_df, term)

    @lru_cache(maxsize=None)
    def get_discount_factor(self, term: float) -> float:
        return self._find_point(self._discount_factors, term)

    @lru_cache(maxsize=None)
    def get_instant_fwd_rate(self, term: float) -> float:
        return self._find_point(self._instant_fwd_rate, term)

In [None]:
class NelsonSiegelCurve(YieldCurve):
    def __init__(self, b0: float, b1: float, b2: float, tau: float,
                 initial_terms: np.array = np.linspace(1 / 365, 25., 100)) -> None:
        self.b0 = b0
        self.b1 = b1
        self.b2 = b2
        self.tau = tau

        super().__init__(initial_terms)

    def get_rates(self, terms: list[float]) -> np.array:
        terms = np.array(terms)
        rates = self.b0 + (self.b1 + self.b2) * self.tau / terms * (1 - np.exp(-terms / self.tau)) - self.b2 * np.exp(
            -terms / self.tau)
        return rates / 100

## Merging data.

In [None]:
tol = pd.Timedelta('24 hours')
df = pd.merge_asof(left=spot, right=rub_rates, right_index=True, left_index=True, direction='nearest', tolerance=tol)
df = pd.merge_asof(left=df, right=usd_rates, right_index=True, left_index=True, direction='nearest', tolerance=tol)
df.head()

Unnamed: 0_level_0,Bid_x,Ask_x,Bid_y,Ask_y,B1,B2,B3,T1,G1,G2,...,SVENY23,SVENY24,SVENY25,SVENY26,SVENY27,SVENY28,SVENY29,SVENY30,TAU1,TAU2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-03 07:00:00+00:00,60.97376,61.0165,63.90315,64.34355,924.070641,-111.376396,-260.204466,4.411702,-5.858851,8.954092,...,3.0356,3.0709,3.1064,3.142,3.1779,3.2137,3.2496,3.2853,1.776206,11.530964
2017-01-03 07:01:00+00:00,61.010855,61.068045,63.881855,64.289995,924.070641,-111.376396,-260.204466,4.411702,-5.858851,8.954092,...,3.0356,3.0709,3.1064,3.142,3.1779,3.2137,3.2496,3.2853,1.776206,11.530964
2017-01-03 07:02:00+00:00,61.03722,61.06985,63.914725,65.362045,924.070641,-111.376396,-260.204466,4.411702,-5.858851,8.954092,...,3.0356,3.0709,3.1064,3.142,3.1779,3.2137,3.2496,3.2853,1.776206,11.530964
2017-01-03 07:03:00+00:00,61.011955,61.04869,63.8792,64.28493,924.070641,-111.376396,-260.204466,4.411702,-5.858851,8.954092,...,3.0356,3.0709,3.1064,3.142,3.1779,3.2137,3.2496,3.2853,1.776206,11.530964
2017-01-03 07:04:00+00:00,61.02125,61.052835,63.88679,64.03944,924.070641,-111.376396,-260.204466,4.411702,-5.858851,8.954092,...,3.0356,3.0709,3.1064,3.142,3.1779,3.2137,3.2496,3.2853,1.776206,11.530964


In [None]:
from tqdm.auto import tqdm
tqdm.pandas()

df["usd_curve"] = df.progress_apply(lambda row: NelsonSiegelCurve(b0=row["BETA0"], b1=row["BETA1"], b2=row["BETA2"], tau=row["TAU1"]), axis=1)

  0%|          | 0/1488719 [00:00<?, ?it/s]

In [None]:
tqdm.pandas()

df["rub_curve"] = df.progress_apply(lambda row: NelsonSiegelCurve(b0=row["B1"], b1=row["B2"], b2=row["B3"], tau=row["T1"]), axis=1)

  0%|          | 0/1488719 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
tqdm.pandas()
df["rub_rate"] = df["rub_curve"].progress_apply(lambda curve: curve.get_rate(N_DAYS / 365) / 100)

  0%|          | 0/1488719 [00:00<?, ?it/s]

In [None]:
df["usd_rate"] = df["usd_curve"].progress_apply(lambda curve: curve.get_rate(N_DAYS / 365))

  0%|          | 0/1488719 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Saving final dataset.

In [None]:
data = df[["Bid_x", "Ask_x", "Bid_y", "Ask_y", "rub_rate", "usd_rate"]]
data.head()

Unnamed: 0_level_0,Bid_x,Ask_x,Bid_y,Ask_y,rub_rate,usd_rate
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03 07:00:00+00:00,60.97376,61.0165,63.90315,64.34355,0.081265,0.00672
2017-01-03 07:01:00+00:00,61.010855,61.068045,63.881855,64.289995,0.081265,0.00672
2017-01-03 07:02:00+00:00,61.03722,61.06985,63.914725,65.362045,0.081265,0.00672
2017-01-03 07:03:00+00:00,61.011955,61.04869,63.8792,64.28493,0.081265,0.00672
2017-01-03 07:04:00+00:00,61.02125,61.052835,63.88679,64.03944,0.081265,0.00672


In [None]:
data.to_pickle(PATH / "two_assets.pkl")

In [None]:
data = df[["Bid_x", "Ask_x", "rub_rate", "usd_rate"]]
data.rename(columns={"Bid_x": "bid", "Ask_x": "ask"}, inplace=True)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={"Bid_x": "bid", "Ask_x": "ask"}, inplace=True)


Unnamed: 0_level_0,bid,ask,rub_rate,usd_rate
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-03 07:00:00+00:00,60.97376,61.0165,0.081265,0.00672
2017-01-03 07:01:00+00:00,61.010855,61.068045,0.081265,0.00672
2017-01-03 07:02:00+00:00,61.03722,61.06985,0.081265,0.00672
2017-01-03 07:03:00+00:00,61.011955,61.04869,0.081265,0.00672
2017-01-03 07:04:00+00:00,61.02125,61.052835,0.081265,0.00672


In [None]:
data.to_pickle(PATH / "2024.pkl")