In [1]:
import pandas as pd
import datetime as dt
data = pd.read_csv(r"bridge-transactions.csv")
MAIN = ["Ethereum", "BNB Chain", "Solana", "Optimism", "Arbitrum", "Avalanche"]
LONG_TAIL = ["Base", "Polygon", "Linea"]
ALL_CHAINS = MAIN + LONG_TAIL
data.head()

Unnamed: 0,date_time,currency_symbol,source_chain_name,destination_chain_name,amount,entity_identifier,bridge
0,2024-07-05 07:30:59+00:00,BNB,Linea,Base,61.737951,0x328d8c40d596220446eae5b5bec134a1ff247c53ea45...,
1,2024-07-05 07:40:38+00:00,BNB,Solana,Arbitrum,43.339604,0x94ecbf840c2a053e2c3fdca3796a757e48e852f2a258...,
2,2024-07-05 07:41:41+00:00,BNB,Linea,BNB Chain,309.415405,0x0fa2eabf7160e81d09cba6943429d7b0a765bc74e526...,
3,2024-07-05 07:43:17+00:00,BNB,Solana,Optimism,43.284276,0x91b62472ebc5a36ce1081fae3ea7f96467d38730b00a...,
4,2024-07-05 07:43:35+00:00,BNB,Optimism,Arbitrum,247.734805,0xc23ab0e81427f1eab2be95f90debb904c63fd45f266f...,


In [2]:
data['source_chain_name'].unique()

array(['Linea', 'Solana', 'Optimism', 'Arbitrum', 'Base', 'Ethereum',
       'Polygon', 'BNB Chain', 'Avalanche'], dtype=object)

In [3]:
data['destination_chain_name'].unique()

array(['Base', 'Arbitrum', 'BNB Chain', 'Optimism', 'Polygon', 'Solana',
       'Ethereum', 'Linea'], dtype=object)

In [4]:
data['date_time'] = pd.to_datetime(data['date_time'])
date = data['date_time'].apply(lambda x: x.date)
print((date.min(), date.max()))
data['date'] = date
data = data.sort_values('date_time')

(datetime.date(2024, 7, 5), datetime.date(2024, 8, 2))


In [5]:
data_subset = data[data['date'] == pd.to_datetime('20240708')]
date_cut = list(data_subset.set_index('date_time').groupby(pd.Grouper(freq='30min'))['currency_symbol'].count().index)
date_cut.append(date_cut[-1] + dt.timedelta(minutes=30))
data_subset['bin'] = pd.cut(data_subset['date_time'], date_cut)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset['bin'] = pd.cut(data_subset['date_time'], date_cut)


In [52]:
from collections import defaultdict

import networkx as nx
import matplotlib.pyplot as plt
from functools import reduce

main_discount = 0.8e-4
long_tail_discount = 2e-4
slippage = {"Ethereum": 5e-6, "BNB Chain": 7.5e-6, "Solana": 1e-5, "Optimism": 1e-5, "Arbitrum": 2e-5,
            "Base": 5e-5, "Polygon": 5e-5, "Linea": 7.5e-5, "Avalanche": 1e-5,
           }

def sim_one_day(df, main_discount, long_tail_discount, slippage):
    accounting = defaultdict(float)
    trace = defaultdict(list)

    for currency_symbol in df['currency_symbol'].unique():
        prev_grouped = None
        cur = df[df['currency_symbol']==currency_symbol]
        trace = []
        iteration = 0
        for b in cur['bin'].unique().sort_values():
            epoch = cur[cur['bin']==b].set_index(['currency_symbol', 'source_chain_name', 'destination_chain_name'])
            grouped = epoch.groupby(level=[0,1,2]).sum().xs(currency_symbol)
            accounting[f"{currency_symbol}_protocol_fee"] += grouped.sum().iloc[0] * 1e-4

            # keep trace of all order
            curr_trace = grouped.copy(deep=True)
            curr_trace["epoch"] = iteration
            trace.append(curr_trace)

            # add orders from previous epoch
            if prev_grouped is not None:
                grouped = grouped.append(prev_grouped)
                grouped = grouped.groupby(level=[0,1]).sum()

            # detect matches
            while True:
                G=nx.DiGraph()
                G.add_nodes_from(list(grouped.index.get_level_values(0)) + list(grouped.index.get_level_values(1)))
                G.add_edges_from(list(grouped.index), weight=grouped)
                cycles = list(nx.simple_cycles(G))
                if not len(cycles) > 0:
                    break
                weighted_cycles = []
                for cycle in cycles:
                    min_weight = float("inf")
                    for i in range(len(cycle)-1):
                        weight = grouped.loc[pd.IndexSlice[cycle[i], cycle[i+1]]].values
                        min_weight = min(weight, min_weight)
                    weighted_cycles.append([min_weight * len(cycle)] + cycle)
                weighted_cycles = sorted(weighted_cycles, key=lambda x: -x[0])
                greedy = weighted_cycles[0]
                for i in range(1, len(greedy)-1):
                    grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]] -= greedy[0] / (len(greedy)-1)
                    accounting[f"{currency_symbol}_matched"] += greedy[0][0]
                    if grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]].values < 0 - 1e-4:
                        raise ValueError("Cannot have negative orders!")
                    elif grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]].values < 0 + 1e-4:
                        grouped.drop([pd.IndexSlice[greedy[i], greedy[i+1]]], inplace=True)

            # accounting
            trace_sum = pd.concat(trace).drop("epoch", axis=1).groupby(level=[0,1]).sum()
            diff = trace_sum - grouped.reindex(trace_sum.index).fillna(0)
            diff = diff[diff>0].dropna()
            for idx, row in diff.iterrows():
                amount = row.iloc[0]
                while True:
                    try:
                        for i in range(len(trace)):
                            if idx in trace[i].index:
                                delta = min(trace[i].loc[idx, "amount"], amount) 
                                trace[i].loc[idx, "amount"] -= delta
                                amount -= delta
                                if trace[i].loc[idx, "amount"] < 0 + 1e-4:
                                    trace[i].drop(idx, inplace=True)
                                # trick to break the while loop
                                if amount < 0 + 1e-4:
                                    raise Exception
                    except:
                        break
            trace_sum = pd.concat(trace).drop("epoch", axis=1).groupby(level=[0,1]).sum()
            diff = trace_sum - grouped.reindex(trace_sum.index).fillna(0)
            if diff.sum().iloc[0] > 1e-4:
                raise ValueError("Wrong accounting")
            trace = [t for t in trace if len(t) > 0]

            # discount this epoch
            undiscounted = grouped.copy(deep=True)
            discounted = grouped.copy(deep=True)        
            discounted.loc[pd.IndexSlice[:, MAIN], :] *= (1-main_discount)
            discounted.loc[pd.IndexSlice[:, LONG_TAIL], :] *= (1-long_tail_discount)
            discount_loss = undiscounted - discounted
            accounting[f"{currency_symbol}_discount_loss"] += discount_loss.sum().iloc[0]


            # clear orders filled by discounts
            for i in range(len(trace)):
                curr = trace[i]
                if len(curr) > 0:
                    for idx, row in curr.iterrows():
                        if row.iloc[0] > 1e-4:
                            fee = slippage[idx[0]] + slippage[idx[1]]
                            if idx[1] in MAIN:
                                discount = main_discount * (iteration - trace[i]["epoch"].iloc[0])
                            if idx[1] in LONG_TAIL:
                                discount = long_tail_discount * (iteration - trace[i]["epoch"].iloc[0])
                            
                            grouped.loc[idx] -= min(grouped.loc[idx].iloc[0], discount / fee) 

            # drop cleared orders
            grouped = grouped[grouped > 0+1e-4].dropna()
            prev_grouped = grouped
            iteration += 1
        accounting[f"{currency_symbol}_eod_remain"] = grouped.sum().iloc[0]
    return accounting


In [29]:
sim_one_day(data_subset,main_discount,long_tail_discount,slippage)

defaultdict(float,
            {'BNB_protocol_fee': 0.3286162282536421,
             'BNB_discount_loss': 0.762453483209498,
             'BNB_eod_remain': 143.9011226843943,
             'WETH_protocol_fee': 1.2194748334315078,
             'WETH_discount_loss': 2.079309638713685,
             'WETH_eod_remain': 6471.966857625832})

In [30]:
# consolidated_results = {}
# for date in data.date.unique():
#     print(date)
#     data_subset = data[data['date'] == date]
#     date_cut = list(data_subset.set_index('date_time').groupby(pd.Grouper(freq='30min'))['currency_symbol'].count().index)
#     date_cut.append(date_cut[-1] + dt.timedelta(minutes=30))
#     data_subset['bin'] = pd.cut(data_subset['date_time'], date_cut)
#     accounting = sim_one_day(data_subset,main_discount,long_tail_discount,slippage)
#     consolidated_results[date] = accounting

In [31]:
# pd.DataFrame(consolidated_results).T.to_clipboard()

In [53]:
# continuous sim
data_with_bin = []
for date in data.date.unique():
#     print(date)
    data_subset = data[data['date'] == date]
    date_cut = list(data_subset.set_index('date_time').groupby(pd.Grouper(freq='30min'))['currency_symbol'].count().index)
    date_cut.append(date_cut[-1] + dt.timedelta(minutes=30))
    data_subset['bin'] = pd.cut(data_subset['date_time'], date_cut)
    data_with_bin.append(data_subset)

data_with_bin = pd.concat(data_with_bin)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset['bin'] = pd.cut(data_subset['date_time'], date_cut)


In [54]:
data_with_bin['bin'].unique()

array([Interval('2024-07-05 07:30:00', '2024-07-05 08:00:00', closed='right'),
       Interval('2024-07-05 08:00:00', '2024-07-05 08:30:00', closed='right'),
       Interval('2024-07-05 08:30:00', '2024-07-05 09:00:00', closed='right'),
       ...,
       Interval('2024-08-02 01:00:00', '2024-08-02 01:30:00', closed='right'),
       Interval('2024-08-02 01:30:00', '2024-08-02 02:00:00', closed='right'),
       Interval('2024-08-02 02:00:00', '2024-08-02 02:30:00', closed='right')],
      dtype=object)

In [55]:
data_with_bin.to_clipboard()

In [56]:
def continous_sim(df, main_discount, long_tail_discount, slippage):
    accounting = defaultdict(lambda: defaultdict(float))
    trace = defaultdict(list)

    for currency_symbol in df['currency_symbol'].unique():
        prev_grouped = None
        cur = df[df['currency_symbol']==currency_symbol]
        trace = []
        iteration = 0
        for i, b in enumerate(sorted(cur['bin'].unique())):
            if i % 100 == 0:
                print(f"{i} iterations passed")
            epoch = cur[cur['bin']==b].set_index(['currency_symbol', 'source_chain_name', 'destination_chain_name'])
            date = epoch["date"].unique()[0]

            grouped = epoch.groupby(level=[0,1,2]).sum().xs(currency_symbol)
            accounting[date][f"{currency_symbol}_protocol_fee"] += grouped.sum().iloc[0] * 1e-4

            # keep trace of all order
            curr_trace = grouped.copy(deep=True)
            curr_trace["epoch"] = iteration
            trace.append(curr_trace)

            # add orders from previous epoch
            if prev_grouped is not None:
                grouped = grouped.append(prev_grouped)
                grouped = grouped.groupby(level=[0,1]).sum()

            # detect matches
            while True:
                G=nx.DiGraph()
                G.add_nodes_from(list(grouped.index.get_level_values(0)) + list(grouped.index.get_level_values(1)))
                G.add_edges_from(list(grouped.index), weight=grouped)
                cycles = list(nx.simple_cycles(G))
                if not len(cycles) > 0:
                    break
                weighted_cycles = []
                for cycle in cycles:
                    min_weight = float("inf")
                    for i in range(len(cycle)-1):
                        weight = grouped.loc[pd.IndexSlice[cycle[i], cycle[i+1]]].values
                        min_weight = min(weight, min_weight)
                    weighted_cycles.append([min_weight * len(cycle)] + cycle)
                weighted_cycles = sorted(weighted_cycles, key=lambda x: -x[0])
                greedy = weighted_cycles[0]
                for i in range(1, len(greedy)-1):
                    grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]] -= greedy[0] / (len(greedy)-1)
                    accounting[date][f"{currency_symbol}_matched"] += greedy[0][0]
                    if grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]].values < 0 - 1e-4:
                        raise ValueError("Cannot have negative orders!")
                    elif grouped.loc[pd.IndexSlice[greedy[i], greedy[i+1]]].values < 0 + 1e-4:
                        grouped.drop([pd.IndexSlice[greedy[i], greedy[i+1]]], inplace=True)

            # accounting
            trace_sum = pd.concat(trace).drop("epoch", axis=1).groupby(level=[0,1]).sum()
            diff = trace_sum - grouped.reindex(trace_sum.index).fillna(0)
            diff = diff[diff>0].dropna()
            for idx, row in diff.iterrows():
                amount = row.iloc[0]
                while True:
                    try:
                        for i in range(len(trace)):
                            if idx in trace[i].index:
                                delta = min(trace[i].loc[idx, "amount"], amount) 
                                trace[i].loc[idx, "amount"] -= delta
                                amount -= delta
                                if trace[i].loc[idx, "amount"] < 0 + 1e-4:
                                    trace[i].drop(idx, inplace=True)
                                # trick to break the while loop
                                if amount < 0 + 1e-4:
                                    raise Exception
                    except:
                        break
            trace_sum = pd.concat(trace).drop("epoch", axis=1).groupby(level=[0,1]).sum()
            diff = trace_sum - grouped.reindex(trace_sum.index).fillna(0)
            if diff.sum().iloc[0] > 1e-4:
                raise ValueError("Wrong accounting")
            trace = [t for t in trace if len(t) > 0]
            # discount this epoch
            undiscounted = grouped.copy(deep=True)
            discounted = grouped.copy(deep=True)        
            discounted.loc[pd.IndexSlice[:, MAIN], :] *= (1-main_discount)
            discounted.loc[pd.IndexSlice[:, LONG_TAIL], :] *= (1-long_tail_discount)
            discount_loss = undiscounted - discounted
            accounting[date][f"{currency_symbol}_discount_loss"] += discount_loss.sum().iloc[0]


            # clear orders filled by discounts
            for i in range(len(trace)):
                curr = trace[i]
                if len(curr) > 0:
                    for idx, row in curr.iterrows():
                        if row.iloc[0] > 1e-4:
                            fee = slippage[idx[0]] + slippage[idx[1]]
                            if idx[1] in MAIN:
                                discount = main_discount * (iteration - trace[i]["epoch"].iloc[0])
                            if idx[1] in LONG_TAIL:
                                discount = long_tail_discount * (iteration - trace[i]["epoch"].iloc[0])
                          
                            grouped.loc[idx] -= min(grouped.loc[idx].iloc[0], discount / fee)        

            # drop cleared orders
            grouped = grouped[grouped > 0+1e-4].dropna()
            prev_grouped = grouped
            iteration += 1
        accounting[date][f"{currency_symbol}_eod_remain"] = grouped.sum().iloc[0]
    return accounting


In [57]:
consolidated_results = continous_sim(data_with_bin, main_discount, long_tail_discount, slippage)

0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
600 iterations passed
700 iterations passed
800 iterations passed
900 iterations passed
0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
0 iterations passed


In [58]:
pd.DataFrame(consolidated_results).T.to_clipboard()

In [59]:
main_discount_params = [1e-4, 2.5e-4, 5e-4]
long_tail_discount_params = [5e-4, 10e-4]
all_res = defaultdict(lambda: defaultdict())
for md in main_discount_params:
    for ld in long_tail_discount_params:
        accounting = continous_sim(data_with_bin, md, ld, slippage)
        all_res[md][ld] = accounting

0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
600 iterations passed
700 iterations passed
800 iterations passed
900 iterations passed
0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
0 iterations passed
0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
600 iterations passed
700 iterations passed
800 iterations passed
900 iterations passed
0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
0 iterations passed
0 iterations passed
100 iterations passed
200 iterations passed
300 iterations passed
400 iterations passed
500 iterations passed
600 iterations passed
700 iterations passed
800 iterations passed
900 iterations passed
0 iterations passed
100 iterations passed
200 

In [60]:
one_m_five_l = pd.DataFrame(all_res[1e-4][5e-4]).T
one_m_ten_l = pd.DataFrame(all_res[1e-4][10e-4]).T
twopfive_m_five_l = pd.DataFrame(all_res[2.5e-4][5e-4]).T
twopfive_m_ten_l = pd.DataFrame(all_res[2.5e-4][10e-4]).T
five_m_five_l = pd.DataFrame(all_res[5e-4][5e-4]).T
five_m_ten_l = pd.DataFrame(all_res[5e-4][10e-4]).T

In [61]:
loss1 = one_m_five_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()
loss2 = one_m_ten_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()
loss3 = twopfive_m_five_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()
loss4 = twopfive_m_ten_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()
loss5 = five_m_five_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()
loss6 = five_m_ten_l[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()

loss0 = pd.DataFrame(consolidated_results).T[['BNB_discount_loss', 'WETH_discount_loss', 'WBTC_discount_loss']].sum()


In [62]:
pd.concat([loss0, loss1, loss2, loss3, loss4, loss5, loss6],axis=1).to_clipboard()

In [63]:
loss1 = one_m_five_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()
loss2 = one_m_ten_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()
loss3 = twopfive_m_five_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()
loss4 = twopfive_m_ten_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()
loss5 = five_m_five_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()
loss6 = five_m_ten_l[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()

loss0 = pd.DataFrame(consolidated_results).T[['BNB_eod_remain', 'WETH_eod_remain', 'WBTC_eod_remain']].sum()


In [64]:
pd.concat([loss0, loss1, loss2, loss3, loss4, loss5, loss6],axis=1).to_clipboard()