In [118]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8-whitegrid")
import levenberg_marquardt
from matplotlib.pyplot import figure
import heston 
from importlib import reload
reload(heston)
from heston import calibrate_heston, get_tick
import datetime
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
from scipy import stats as sps
from typing import Union
import h5py
import matplotlib.dates as mdates
from tqdm.notebook import tqdm

In [3]:
%%time
raw = pd.HDFStore('../datasets/eth-20230224.h5','r').get("/eth")

CPU times: user 21.4 s, sys: 3.91 s, total: 25.3 s
Wall time: 28.4 s


In [4]:
def process_data(data):
    # only options
    df = data.copy()
    df = df[(df["instrument"].str.endswith("C")) | (df["instrument"].str.endswith("P"))].sort_values("dt")
    df["type"] = np.where(df["instrument"].str.endswith("C"), "call", "put")
    
    perpetuals = data[data["instrument"].str.endswith("PERPETUAL")][["dt", "price"]].copy()
    perpetuals = perpetuals.rename(columns = {"price": "underlying_price"}).sort_values("dt")
    
    def get_strike(x):
        return int(x.split("-")[2])
    
    def get_expiration(x):
        return x.split("-")[1]
    

    df["strike_price"] = df["instrument"].apply(get_strike)
    df["expiration"] = df["instrument"].apply(get_expiration)
    
    def unix_time_millis(dt):
        epoch = datetime.datetime.utcfromtimestamp(0)
        return int((dt - epoch).total_seconds() * 1000_000)
    
    def get_normal_date(s):
        """Function to convert date to find years to maturity"""
        monthToNum = {
            "JAN": 1,
            "FEB": 2,
            "MAR": 3,
            "APR": 4,
            "MAY": 5,
            "JUN": 6,
            "JUL": 7,
            "AUG": 8,
            "SEP": 9,
            "OCT": 10,
            "NOV": 11,
            "DEC": 12,
        }

        full_date = s.split("-")[1]
        try:
            day = int(full_date[:2])
            month = monthToNum[full_date[2:5]]
        except:
            day = int(full_date[:1])
            month = monthToNum[full_date[1:4]]
        
        year = int("20" + full_date[-2:])
        exp_date = datetime.datetime(year, month, day)
        return unix_time_millis(exp_date)
    
    df["dt"] = pd.to_datetime(df["dt"])
    perpetuals["dt"] = pd.to_datetime(perpetuals["dt"])
    
    df = pd.merge_asof(df, perpetuals, on="dt",
                       tolerance=pd.Timedelta('7 minutes'),
                       direction='nearest',)
    
    df["timestamp"] = df["dt"].apply(unix_time_millis)
    df["expiration"] = df["instrument"].apply(get_normal_date)
    df = df.rename(columns = {"price": "mark_price"})
    
    
    return df


In [5]:
%%time
start = process_data(raw)

CPU times: user 50 s, sys: 20.9 s, total: 1min 10s
Wall time: 1min 31s


In [6]:
df = start.copy()
# df = df[(df["timestamp"]>=1672000000000000) & (df["timestamp"]<=1672800000000000)]
df

Unnamed: 0,dt,mark_price,instrument,type,strike_price,expiration,underlying_price,timestamp
0,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-1700-C,call,1700,1669334400000000,1133.90,1669036104511000
1,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-2400-C,call,2400,1669334400000000,1133.90,1669036104511000
2,2022-11-21 13:08:24.511,0.0020,ETH-25NOV22-700-P,put,700,1669334400000000,1133.90,1669036104511000
3,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-3800-C,call,3800,1669334400000000,1133.90,1669036104511000
4,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-3400-C,call,3400,1669334400000000,1133.90,1669036104511000
...,...,...,...,...,...,...,...,...
7498621,2023-02-24 07:13:46.716,0.0300,ETH-24FEB23-1700-P,put,1700,1677196800000000,1651.85,1677222826716000
7498622,2023-02-24 07:13:47.576,0.2975,ETH-24FEB23-1200-C,call,1200,1677196800000000,1651.85,1677222827576000
7498623,2023-02-24 07:13:51.208,0.0015,ETH-24FEB23-1650-P,put,1650,1677196800000000,1651.85,1677222831208000
7498624,2023-02-24 07:13:55.748,0.2980,ETH-24FEB23-1200-C,call,1200,1677196800000000,1651.85,1677222835748000


In [92]:
alpha_bar = 0.8

In [213]:
df

Unnamed: 0,dt,mark_price,instrument,type,strike_price,expiration,underlying_price,timestamp
0,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-1700-C,call,1700,1669334400000000,1133.90,1669036104511000
1,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-2400-C,call,2400,1669334400000000,1133.90,1669036104511000
2,2022-11-21 13:08:24.511,0.0020,ETH-25NOV22-700-P,put,700,1669334400000000,1133.90,1669036104511000
3,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-3800-C,call,3800,1669334400000000,1133.90,1669036104511000
4,2022-11-21 13:08:24.511,0.0005,ETH-25NOV22-3400-C,call,3400,1669334400000000,1133.90,1669036104511000
...,...,...,...,...,...,...,...,...
7498621,2023-02-24 07:13:46.716,0.0300,ETH-24FEB23-1700-P,put,1700,1677196800000000,1651.85,1677222826716000
7498622,2023-02-24 07:13:47.576,0.2975,ETH-24FEB23-1200-C,call,1200,1677196800000000,1651.85,1677222827576000
7498623,2023-02-24 07:13:51.208,0.0015,ETH-24FEB23-1650-P,put,1650,1677196800000000,1651.85,1677222831208000
7498624,2023-02-24 07:13:55.748,0.2980,ETH-24FEB23-1200-C,call,1200,1677196800000000,1651.85,1677222835748000


In [315]:
df.dt.max() - df.dt.min()

Timedelta('94 days 18:05:39.152000')

In [316]:
94/7

13.428571428571429

In [368]:
def get_alpha_bar(df: pd.DataFrame, timestamp: int = None):
    if timestamp:
        data = df.query(f"timestamp<={timestamp}").copy()
    else:
        data = df.copy()
    forward = (
        data[["dt", "timestamp", "underlying_price"]]
        .drop_duplicates()
        .sort_values("timestamp")
    )
    # need daily
    forward["date"] = pd.to_datetime(forward["dt"]).dt.date
    forward = forward.loc[forward.groupby("date").dt.idxmax()]
    
    forward["underlying_price_prev"] = forward["underlying_price"].shift(1)
    forward["timestamp_prev"] = forward["timestamp"].shift(1)
    forward["residual"] = (
        forward["underlying_price"] - forward["underlying_price_prev"]
    ) / (
        forward["underlying_price_prev"]
        * (
            np.sqrt((forward["timestamp"] - forward["timestamp_prev"])
            / 1e6
            / 3600
            / 24
            / 365)
        )
    )

    alpha_bar = (
        (forward["residual"] - forward["residual"].sum() / (forward.shape[0] + 1)).sum()
        ** 2
        / forward.shape[0]
#         / 100
    )
    return alpha_bar


In [369]:
get_alpha_bar(df)

0.000306475446204915

In [365]:
aa

Unnamed: 0,dt,timestamp,underlying_price,date
40710,2022-11-21 23:59:34.611,1669075174611000,1105.00,2022-11-21
104444,2022-11-22 23:59:59.921,1669161599921000,1137.05,2022-11-22
160179,2022-11-23 23:56:38.300,1669247798300000,1182.70,2022-11-23
208548,2022-11-24 23:59:56.274,1669334396274000,1202.35,2022-11-24
338832,2022-11-25 23:59:59.857,1669420799857000,1198.40,2022-11-25
...,...,...,...,...
7241580,2023-02-20 23:56:33.298,1676937393298000,1703.55,2023-02-20
7330680,2023-02-21 23:59:47.578,1677023987578000,1659.70,2023-02-21
7412472,2023-02-22 23:59:44.511,1677110384511000,1643.85,2023-02-22
7487150,2023-02-23 23:59:31.035,1677196771035000,1650.50,2023-02-23


In [344]:
forward = (df[["dt", "underlying_price"]]
        .drop_duplicates()
        .sort_values("dt"))
forward["date"] = pd.to_datetime(forward.dt).dt.date

In [353]:
forward.loc[forward.groupby("date").dt.idxmin()]

Unnamed: 0,dt,underlying_price,date
0,2022-11-21 13:08:24.511,1133.90,2022-11-21
40711,2022-11-22 00:03:54.502,1102.55,2022-11-22
104445,2022-11-23 00:00:01.672,1137.05,2022-11-23
160180,2022-11-24 00:00:48.257,1183.55,2022-11-24
208549,2022-11-25 00:05:32.435,1202.80,2022-11-25
...,...,...,...
7158285,2023-02-20 00:02:07.202,1681.55,2023-02-20
7241581,2023-02-21 00:00:09.526,1704.85,2023-02-21
7330681,2023-02-22 00:22:51.764,1661.75,2023-02-22
7412473,2023-02-23 00:03:22.651,1644.85,2023-02-23


In [330]:
forward

Unnamed: 0,timestamp,underlying_price
0,1669036104511000,1133.90
25,1669036104512000,1133.90
28,1669036104513000,1133.90
29,1669036104514000,1133.90
31,1669036104721000,1133.90
...,...,...
7498621,1677222826716000,1651.85
7498622,1677222827576000,1651.85
7498623,1677222831208000,1651.85
7498624,1677222835748000,1651.85


In [325]:
# get_alpha_bar(df)
# data["tau"] = (data.expiration - data.timestamp) / 1e6 / 3600 / 24 / 365

In [14]:
# get_implied_volatility(C = 0.0335*1397.9, K = 1400, )

# As rolling variance

In [451]:
alpha_bar = 0.1
daily = df[["dt", "timestamp", "underlying_price"]].drop_duplicates().sort_values("timestamp")
daily["date"] = pd.to_datetime(daily["dt"]).dt.date
daily = daily.loc[daily.groupby("date").dt.idxmax()]
daily["rolling_variance"] = (daily["underlying_price"].rolling(window=7).std())**2


daily["rolling_variance_next"] = daily["rolling_variance"].shift(-1)
daily["timestamp_next"] = daily["timestamp"].shift(-1)
daily["alpha"] = (daily["rolling_variance_next"] - daily["rolling_variance"])/np.sqrt(daily["rolling_variance"])
daily["k_coef"] = (alpha_bar*((daily["timestamp_next"] - daily["timestamp"])/ 1e6/ 3600/ 24/ 365) -
                 daily["rolling_variance"])/np.sqrt(daily["rolling_variance"])

daily["sigma_coef"] = np.sqrt((daily["timestamp_next"] - daily["timestamp"])/ 1e6/ 3600/ 24/ 365)# *np.random.normal(0.0, 1.0, size=len(daily))
daily = daily[~daily["alpha"].isna()]



In [452]:

lr = LinearRegression()
X = daily[["k_coef", "sigma_coef"]].values
y = daily["alpha"].values
lr.fit(X, y)

lr.coef_

array([ 2.35760789e-01, -3.17346330e+02])

In [453]:
res[["alpha", "k_coef", "sigma_coef"]].corr()

Unnamed: 0,alpha,k_coef,sigma_coef
alpha,1.0,0.230884,-0.105471
k_coef,0.230884,1.0,-0.12412
sigma_coef,-0.105471,-0.12412,1.0
