In [None]:
%load_ext jupyter_black

In [None]:
import pandas as pd
import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz

from utils import plot_line, plot_quantiles

In [None]:
DATA_DIR = "../data/"
LOG_DIR = "../logs/"
PROCESSED_DIR = "./data/"

BASE = "JUP"
QUOTE = "USDC"

### Preprocessing

In [None]:
data_path = f"{DATA_DIR}{BASE.lower()}-{QUOTE.lower()}.csv"
processed_data_path = f"{PROCESSED_DIR}{BASE.lower()}-{QUOTE.lower()}-analysis.csv"

df = pd.read_csv(data_path)

# df

In [None]:
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
df["timestamp"] = df["timestamp"].dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
df = df.sort_values(by="timestamp")

df["midpoint"] = (df["BID1"] + df["ASK1"]) / 2
df["spread"] = df["ASK1"] - df["BID1"]
df["spread_bps"] = df["spread"] / df["midpoint"] * 100 * 100

df

In [None]:
df.to_csv(processed_data_path, index=False)

### Overview

In [None]:
time_diffs = df["timestamp"].diff().dt.total_seconds() * 1000
time_diffs = time_diffs.dropna()

slot_diffs = df["slot"].diff().dropna()

In [None]:
duration = df["timestamp"].max() - df["timestamp"].min()
hours = duration.components.hours
minutes = duration.components.minutes

In [None]:
print(f"Tracked for {hours}h {minutes}m")

plot_quantiles(time_diffs, bins=50, name="Time between Books (ms)")
plot_quantiles(slot_diffs, bins=25, name="Slots between Books")
plot_quantiles(df["spread_bps"], bins=25, name="Spread (bps)")
plot_line(df["timestamp"], [df["spread_bps"]], ["Spread (bps)"], "Time", "Spread (bps)")
plot_line(
    df["timestamp"], [df["BID1"], df["ASK1"]], ["Bid", "Ask"], "Time", "Best Levels"
)