In [None]:
%load_ext jupyter_black

In [None]:
import pandas as pd
import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz

In [None]:
DATA_DIR = "../trades/"
LOG_DIR = "../logs/"
PROCESSED_DIR = "./trades/"

BASE = "JUP"
QUOTE = "USDC"

BASE_PRICE_MULT = 10000
BASE_SIZE_MULT = 100

### Utils

In [None]:
def plot_line(x_series, y_series_list, y_series_names, x_title, y_title, height=300):
    fig = go.Figure()
    hovertemplate = "%{x|%Y-%m-%d %H:%M:%S.%f}<extra>%{y}</extra>"
    for i in range(len(y_series_list)):
        fig.add_trace(
            go.Scatter(
                x=x_series,
                y=y_series_list[i],
                mode="lines",
                name=y_series_names[i],
                hovertemplate=hovertemplate,
            )
        )

    fig.update_layout(
        title="",
        xaxis_title=x_title,
        yaxis_title=y_title,
        xaxis=dict(showline=True, showgrid=False, linecolor="rgb(204, 204, 204)"),
        yaxis=dict(showline=True, showgrid=False, linecolor="rgb(204, 204, 204)"),
        plot_bgcolor="white",
        height=height,
        margin=dict(l=10, r=20, t=20, b=20),
    )

    fig.show()

### Preprocessing

In [None]:
data_path = f"{DATA_DIR}{BASE.lower()}-{QUOTE.lower()}.csv"
print(data_path)
processed_data_path = f"{PROCESSED_DIR}{BASE.lower()}-{QUOTE.lower()}-analysis.csv"

df = pd.read_csv(data_path)

# df

In [None]:
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
df["timestamp"] = df["timestamp"].dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
df["price"] = df["price"] / BASE_PRICE_MULT
df["size"] = df["size"] / BASE_SIZE_MULT


df

In [None]:
df.to_csv(processed_data_path, index=False)

### Overview

In [None]:
def calculate_pnl(df):
    inventory = 0
    cash_flow = 0

    pnl_series = pd.Series(index=df["timestamp"])

    for index, row in df.iterrows():
        trade_size = row["size"]
        trade_value = trade_size * row["price"]

        if row["side"] == "buy":
            cash_flow -= trade_value
            inventory += trade_size
        else:
            cash_flow += trade_value
            inventory -= trade_size

        if inventory > 0:
            unrealized_pnl = cash_flow + (inventory * row["price"])
        else:
            unrealized_pnl = cash_flow + (inventory * row["price"])

        pnl_series.loc[row["timestamp"]] = unrealized_pnl

    pnl_series = pnl_series.dropna()
    return pnl_series

In [None]:
df["inventory_change"] = df.apply(
    lambda row: row["size"] if row["side"] == "buy" else -row["size"], axis=1
)
df["cumulative_inventory"] = df["inventory_change"].cumsum()
pnl_series = calculate_pnl(df)
flat_pnl_series = pd.Series(0, index=pnl_series.index)
inventory_series = df.set_index("timestamp")["cumulative_inventory"]
flat_inventory_series = pd.Series(0, index=inventory_series.index)

In [None]:
plot_line(
    df["timestamp"], [pnl_series, flat_pnl_series], ["PnL", "Break Even"], "Time", "PnL"
)
plot_line(
    df["timestamp"],
    [inventory_series, flat_inventory_series],
    ["Inventory", "Flat"],
    "Time",
    "Inventory",
)