# **Deep Hedging**
# Buchkov Viacheslav

In [3]:
import abc
import math
import os
import random
import sys
from functools import lru_cache
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm

# You may add any imports you need
from torch.cuda.amp import GradScaler

RANDOM_SEED = 12

In [4]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


seed_everything(RANDOM_SEED)

In [5]:
N_DAYS = 5

## Getting spot data.

In [7]:
from google.colab import drive

drive.mount("/content/gdrive", force_remount=True)

ROOT_PATH = Path("dataset")
PATH = Path("/content/gdrive/MyDrive/")

In [8]:
!unzip -q '/content/gdrive/MyDrive/quotes_USDRUB_resample_1min.zip' -d './'

In [9]:
spot = pd.read_csv("quotes_USDRUB_resample_1min.csv")
spot["timestamp"] = pd.to_datetime(spot["timestamp"])
spot = spot.set_index("timestamp")
spot.head()

In [10]:
spot.index.min(), spot.index.max()

## Getting rates data.

In [27]:
rub_rates = pd.read_csv("/content/gdrive/MyDrive/rub_nss.csv", sep=";")

rub_rates["timestamp"] = pd.to_datetime(
    rub_rates["tradedate"] + " " + rub_rates["tradetime"], format="%d.%m.%Y %H:%M:%S"
)
rub_rates = rub_rates.set_index("timestamp")
rub_rates.drop(["tradedate", "tradetime"], axis=1, inplace=True)

rub_rates = rub_rates.apply(lambda row: row.str.replace(",", "."))
rub_rates = rub_rates.astype(float)
rub_rates.head()

In [13]:
usd_rates = pd.read_csv("/content/gdrive/MyDrive/usd_nss.csv")
usd_rates["timestamp"] = pd.to_datetime(usd_rates["Date"])
usd_rates.drop(["Date"], axis=1, inplace=True)
usd_rates = usd_rates.set_index("timestamp")
usd_rates.head()

In [19]:
class YieldCurve:
    TARGET_COLUMN = "ytm"
    DISCOUNT_FACTOR_COLUMN = "discount_factor"
    FWD_RATE_COLUMN = "fwd_rate"

    def __init__(self, initial_terms: np.array, *args, **kwargs) -> None:
        self._rates_df = None
        self._discount_factors = None
        self._instant_fwd_rate = None

        self._initialize(initial_terms)

    def _initialize(self, terms: np.array) -> None:
        self.create_curve(terms=terms)

    @abc.abstractmethod
    def get_rates(self, terms: list[float]) -> np.array:
        pass

    def create_curve(self, terms: list[float]) -> None:
        self._rates_df = pd.DataFrame(
            self.get_rates(terms), index=terms, columns=[self.TARGET_COLUMN]
        )
        # self._create_discount_factors()
        # self._create_instant_fwd_rates()

    def _create_discount_factors(self) -> pd.DataFrame:
        if self._rates_df is None:
            raise ValueError("Rate data is not fitted yet!")
        discount_factors = np.exp(
            -self._rates_df[self.TARGET_COLUMN] * self._rates_df.index
        )
        self._discount_factors = pd.DataFrame(
            discount_factors,
            index=self._rates_df.index,
            columns=[self.DISCOUNT_FACTOR_COLUMN],
        )
        return self._discount_factors

    def _create_instant_fwd_rates(self) -> pd.DataFrame:
        if self._discount_factors is None:
            raise ValueError("Discount factor data is not fitted yet!")

        t_old = self._rates_df.index[0]
        instant_fwd_rates = []
        for t in self._rates_df.index[1:]:
            dt = t - t_old
            instant_fwd_rates.append(
                -1
                / dt
                * (
                    np.log(
                        self._discount_factors.loc[t, self.DISCOUNT_FACTOR_COLUMN]
                        / self._discount_factors.loc[t_old, self.DISCOUNT_FACTOR_COLUMN]
                    )
                )
            )
            t_old = t
        self._instant_fwd_rate = pd.DataFrame(
            instant_fwd_rates,
            index=self._rates_df.index[1:],
            columns=[self.FWD_RATE_COLUMN],
        )
        return self._instant_fwd_rate

    @property
    def curve_df(self) -> pd.DataFrame:
        if self._rates_df is None:
            raise ValueError("Rate data is not fitted yet! Call .create_curve() first.")
        return self._rates_df

    @property
    def discount_factors_df(self) -> pd.DataFrame:
        return self._discount_factors

    @property
    def instant_fwd_rates_df(self) -> pd.DataFrame:
        return self._instant_fwd_rate

    @staticmethod
    def _find_point(curve: pd.DataFrame, term: float) -> float:
        index = np.absolute(curve.index - term).argmin()
        return curve.iloc[index].values[0]

    @lru_cache(maxsize=None)
    def get_rate(self, term: float) -> float:
        return self._find_point(self._rates_df, term)

    @lru_cache(maxsize=None)
    def get_discount_factor(self, term: float) -> float:
        return self._find_point(self._discount_factors, term)

    @lru_cache(maxsize=None)
    def get_instant_fwd_rate(self, term: float) -> float:
        return self._find_point(self._instant_fwd_rate, term)

In [20]:
class NelsonSiegelCurve(YieldCurve):
    def __init__(
        self,
        b0: float,
        b1: float,
        b2: float,
        tau: float,
        initial_terms: np.array = np.linspace(1 / 365, 25.0, 100),
    ) -> None:
        self.b0 = b0
        self.b1 = b1
        self.b2 = b2
        self.tau = tau

        super().__init__(initial_terms)

    def get_rates(self, terms: list[float]) -> np.array:
        terms = np.array(terms)
        rates = (
            self.b0
            + (self.b1 + self.b2) * self.tau / terms * (1 - np.exp(-terms / self.tau))
            - self.b2 * np.exp(-terms / self.tau)
        )
        return rates / 100

## Merging data.

In [29]:
tol = pd.Timedelta("24 hours")
df = pd.merge_asof(
    left=spot,
    right=rub_rates,
    right_index=True,
    left_index=True,
    direction="nearest",
    tolerance=tol,
)
df = pd.merge_asof(
    left=df,
    right=usd_rates,
    right_index=True,
    left_index=True,
    direction="nearest",
    tolerance=tol,
)
df.head()

In [33]:
from tqdm.auto import tqdm

tqdm.pandas()

df["usd_curve"] = df.progress_apply(
    lambda row: NelsonSiegelCurve(
        b0=row["BETA0"], b1=row["BETA1"], b2=row["BETA2"], tau=row["TAU1"]
    ),
    axis=1,
)

In [32]:
tqdm.pandas()

df["rub_curve"] = df.progress_apply(
    lambda row: NelsonSiegelCurve(
        b0=row["B1"], b1=row["B2"], b2=row["B3"], tau=row["T1"]
    ),
    axis=1,
)

In [35]:
df.to_pickle("/content/gdrive/MyDrive/df.pkl")

In [36]:
tqdm.pandas()
df["rub_rate"] = df["rub_curve"].progress_apply(
    lambda curve: curve.get_rate(N_DAYS / 365) / 100
)

In [37]:
df["usd_rate"] = df["usd_curve"].progress_apply(
    lambda curve: curve.get_rate(N_DAYS / 365)
)

## Saving final dataset.

In [46]:
data = df[["Bid", "Ask", "rub_rate", "usd_rate"]]
data.rename(columns={"Bid": "bid", "Ask": "ask"}, inplace=True)
data.head()

In [48]:
data.to_pickle(PATH / "data.pkl")