In [32]:
import pandas as pd
import numpy as np
import os
import datetime
import pprint

import math
from sympy import symbols, Eq, nsolve, sqrt

import pyarrow.parquet as pq

from ipynb.fs.full.Python_Implementation_of_UniSwap_V3 import Get_Price, Get_Tick, Get_Current_Spot_Price, Get_Total_Liquidity, Provide_Liquidity, Withdraw_Liquidity, Swap_Token0_to_Token1, Swap_Token1_to_Token0

# File with data
file_dir = '/Users/zhicong/Desktop/KCL/Ethereum_DEX_Data'

pool_name = 'USDC-WETH 500 10'
fees = 0.0005

# Liquidity Pool

We will be building history of liquidity pool size from Liquidity Provision data and Swaps data.

## Liquidity Provision

### Load Data

This is cases where liquidity providers mint or burn Uniswap tokens to provide or take out liquidity from the pool.

Since our swap data was downloaded before the LP actions data, we want to take only the LP actions data that are in-line with the swap data.

In [33]:
lp_action = pd.read_csv(os.path.join(file_dir, "uniswapv3_lp_actions", pool_name + ".csv"))
lp_action.columns = [x.upper() for x in lp_action.columns.tolist()]

# Final cleaning
lp_action['BLOCK_NUMBER'] = lp_action['BLOCK_NUMBER'].astype(int)
lp_action.sort_values(by = ['BLOCK_NUMBER'], ascending = False, inplace = True)
lp_action['AMOUNT0_ADJUSTED'] = lp_action['AMOUNT0_ADJUSTED'].astype(float)
lp_action['AMOUNT1_ADJUSTED'] = lp_action['AMOUNT1_ADJUSTED'].astype(float)
lp_action['BLOCK_TIMESTAMP'] = pd.to_datetime([x[:-5] for x in lp_action['BLOCK_TIMESTAMP'].astype(str)])

# Changing signs of addition and extraction
#lp_action['AMOUNT0_ADJUSTED'] = np.where(lp_action['ACTION'] == 'DECREASE_LIQUIDITY', -lp_action['AMOUNT0_ADJUSTED'], lp_action['AMOUNT0_ADJUSTED'])
#lp_action['AMOUNT1_ADJUSTED'] = np.where(lp_action['ACTION'] == 'DECREASE_LIQUIDITY', -lp_action['AMOUNT1_ADJUSTED'], lp_action['AMOUNT1_ADJUSTED'])
lp_action['ACTION'] = lp_action['ACTION'].replace({'INCREASE_LIQUIDITY':'Increase Liquidity',
                                                   'DECREASE_LIQUIDITY':'Decrease Liquidity'})
lp_action.rename(columns = {'LIQUIDITY_PROVIDER':'SENDER', 'ACTION':'EVENT_NAME'}, inplace = True)

# Get lower and upper price from tick
lp_action['PRICE_LOWER'] = lp_action['TICK_UPPER'].apply(Get_Price)
lp_action['PRICE_UPPER'] = lp_action['TICK_LOWER'].apply(Get_Price)

# Reverse to make sure LPs are sorted in the same order as they are executed
lp_action = lp_action.iloc[::-1]
lp_action.reset_index(drop = True, inplace = True)

lp_action.head()

Unnamed: 0,POOL_NAME,EVENT_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,AMOUNT0_ADJUSTED,TOKEN0_SYMBOL,AMOUNT1_ADJUSTED,TOKEN1_SYMBOL,TICK_LOWER,TICK_UPPER,PRICE_LOWER,PRICE_UPPER
0,USDC-WETH 500 10,Increase Liquidity,12376729,2021-05-05 21:42:11,0xb2ef52180d1e5f4835f4e343251286fa84743456,2995.507735,USDC,1.0,WETH,191150,198080,2499.9135991248622,4998.918171064905
1,USDC-WETH 500 10,Decrease Liquidity,12376958,2021-05-05 22:27:41,0xb2ef52180d1e5f4835f4e343251286fa84743456,2588.187276,USDC,0.931484,WETH,191150,198080,2499.9135991248622,4998.918171064905
2,USDC-WETH 500 10,Decrease Liquidity,12376958,2021-05-05 22:27:41,0xb2ef52180d1e5f4835f4e343251286fa84743456,0.0,USDC,0.0,WETH,191150,198080,2499.9135991248622,4998.918171064905
3,USDC-WETH 500 10,Increase Liquidity,12377035,2021-05-05 22:44:11,0x35f5a1ec10deee1256351e37bc78dc80345895f8,1.0,USDC,0.000146,WETH,194990,195010,3398.182812901327,3404.9856389500487
4,USDC-WETH 500 10,Increase Liquidity,12377266,2021-05-05 23:34:39,0x9f9b987becfe15aca93ce9208cca146f252c8d46,598.469729,USDC,2.6,WETH,186730,195460,3248.661520935142,7777.22370859911


### Imbalance asset liquidity provision

In single asset liquidity provision, a swap is incurred internally to make sure that the ratio is fitted for the price range the liquidity provider provides in. It is important to note that if the current swap price is below the $p_{b}$, we can deposit only WETH and if the current swap price is above $p_{a}$, we can deposit only USDC.


#### Example

A liquidity provider provides 1000 USDC and 0 WETH in this pool within ticks 201710 and 201720, and the current swap price of WETH to USDC is 1738.

We can do the math to get the price range of liquidity provision:

$$t_1 = \frac{1}{1.0001^{201720} \times 10^{-12}} = 1737.1995622867098$$

$$t_2 = \frac{1}{1.0001^{201710} \times 10^{-12}} = 1738.9375437973$$

In [34]:
print("Lower price:", 1/(1.0001**201720*10**-12))
print("Upper price:", 1/(1.0001**201710*10**-12))

Lower price: 1737.1995622867098
Upper price: 1738.9375437973


We can see that the current price is within the range of our liquidity provision. First, we calculate the liquidity parameter of this position:

$$USDC = L \sqrt{p} - L \sqrt{p_{a}} $$

$$1000 = L \sqrt{1738} - L \sqrt{1737.1995622867098}$$

$$L = \frac{1000}{\sqrt{1738} - \sqrt{1737.1995622867098}}$$

Then, we use the liquidity parameter to compute the amount of WETH needed for this provision:

$$WETH = \frac{L}{\sqrt{p}} - \frac{L}{\sqrt{p_{b}}} = \frac{104154.32933919976}{\sqrt{1738}} - \frac{104154.32933919976}{\sqrt{1738.9375437973}}$$

In [35]:
liquidity_parameter = 1000/(np.sqrt(1738) - np.sqrt(1737.1995622867098))
print("Liquidity parameter:", liquidity_parameter)

weth_amount = liquidity_parameter/np.sqrt(1738) - liquidity_parameter/np.sqrt(1738.9375437973)
print("Amount of WETH to deposit:", weth_amount)

Liquidity parameter: 104154.32933919976
Amount of WETH to deposit: 0.6735790561046997


Therefore, for this given price range, the liquidity provider needs to provide 0.6735790561046997 WETH in addition to 1000 USDC for this given range.

## Swaps data

For each swap, we subtract the amount of token out and add the amount of token in. From our swap, column "AMOUNT_IN" indicates the amount of tokens put into the swap and column "AMOUNT_OUT" indicates the amount of token receive from the swap. 

Transaction fees of Uniswap V3 tokens are taken from incoming token and stored elsewhere, therefore the final amount of token entering the pool is (1-transaction_fee)*"AMOUNT_IN" while the final amount of token leaving the pool is simply "AMOUNT_OUT".

In [36]:
swaps = pd.DataFrame(columns = ['BLOCK_NUMBER','BLOCK_TIMESTAMP','TX_HASH','SENDER','TX_TO','PLATFORM','POOL_NAME','CONTRACT_ADDRESS',
                                'EVENT_NAME','AMOUNT_IN','SYMBOL_IN','AMOUNT_OUT','SYMBOL_OUT'])

parquet_file = pq.ParquetFile(os.path.join(file_dir, "dex_swaps", "DEXs_swaps_full.gzip"))
# Read by batch to be able to fit dataframe into memory
for i in parquet_file.iter_batches(batch_size = 1000000):

    # Filter out only tokens we want
    temp = i.to_pandas()
    temp = temp[temp['POOL_NAME'] == pool_name]

    swaps = pd.concat([swaps,temp], axis = 0)
    del temp

# Final cleaning
swaps.reset_index(drop = True, inplace = True)
swaps['AMOUNT_IN'] = swaps['AMOUNT_IN'].astype(float)
swaps['AMOUNT_OUT'] = swaps['AMOUNT_OUT'].astype(float)
swaps['BLOCK_TIMESTAMP'] = pd.to_datetime(swaps['BLOCK_TIMESTAMP'])

# Since the pool charges a transaction fee of 5 basis point on the incoming token, and stored it elsewhere (not in liquidity pool), we subtract that
#swaps['AMOUNT_IN'] = swaps['AMOUNT_IN']*(1-0.0005)

# Changing signs of addition and extraction
swaps['AMOUNT_OUT'] = -swaps['AMOUNT_OUT']
swaps['AMOUNT0_ADJUSTED'] = np.where(swaps['SYMBOL_IN'] == 'WETH', swaps['AMOUNT_OUT'], swaps['AMOUNT_IN'])
swaps['TOKEN0_SYMBOL'] = 'USDC'
swaps['AMOUNT1_ADJUSTED'] = np.where(swaps['SYMBOL_IN'] == 'WETH', swaps['AMOUNT_IN'], swaps['AMOUNT_OUT'])
swaps['TOKEN1_SYMBOL'] = 'WETH'

swaps = swaps[['POOL_NAME','BLOCK_NUMBER','BLOCK_TIMESTAMP','SENDER','EVENT_NAME','AMOUNT0_ADJUSTED','TOKEN0_SYMBOL','AMOUNT1_ADJUSTED','TOKEN1_SYMBOL']]

# Reverse to make sure swaps are sorted in the same order as they are executed
swaps = swaps.iloc[::-1]
swaps.reset_index(drop = True, inplace = True)

swaps.head()

Unnamed: 0,POOL_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,EVENT_NAME,AMOUNT0_ADJUSTED,TOKEN0_SYMBOL,AMOUNT1_ADJUSTED,TOKEN1_SYMBOL
0,USDC-WETH 500 10,12376891,2021-05-05 22:15:01,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-119.744094,USDC,0.035,WETH
1,USDC-WETH 500 10,12377278,2021-05-05 23:37:55,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,499.756414,USDC,-0.144241,WETH
2,USDC-WETH 500 10,12377345,2021-05-05 23:51:36,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,365.0,USDC,-0.103492,WETH
3,USDC-WETH 500 10,12377364,2021-05-05 23:56:34,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-176.180828,USDC,0.05,WETH
4,USDC-WETH 500 10,12377369,2021-05-05 23:57:36,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-514.279985,USDC,0.146,WETH


### Merging

Now, we want to create a series of events that will cause changes in the liquidity pool size, sorted by block number. This will allow us to build a history of liquidity pool size by blocks.

Important note: Swaps within a block is in the same order of execution

In [22]:
# Removing new LPs
lp_action = lp_action[lp_action['BLOCK_NUMBER'] <= swaps['BLOCK_NUMBER'].max()]
lp_action.reset_index(drop = True, inplace = True)

data = pd.concat([swaps, lp_action], axis = 0)
del swaps, lp_action
data.sort_values(by = 'BLOCK_NUMBER', ascending = True, inplace = True)
data.rename(columns = {'AMOUNT0_ADJUSTED':'Delta USDC', 'AMOUNT1_ADJUSTED':'Delta WETH'}, inplace = True)
data.reset_index(drop = True, inplace = True)
data = data[~((data['Delta WETH'] == 0) & (data['Delta USDC'] == 0))]

data = data[['POOL_NAME','BLOCK_NUMBER','BLOCK_TIMESTAMP','SENDER','EVENT_NAME','Delta WETH','Delta USDC','TICK_LOWER','TICK_UPPER','PRICE_LOWER','PRICE_UPPER']]

data.head()

Unnamed: 0,POOL_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,EVENT_NAME,Delta WETH,Delta USDC,TICK_LOWER,TICK_UPPER,PRICE_LOWER,PRICE_UPPER
0,USDC-WETH 500 10,12376729,2021-05-05 21:42:11,0xb2ef52180d1e5f4835f4e343251286fa84743456,Increase Liquidity,1.0,2995.507735,191150.0,198080.0,2499.9135991248622,4998.918171064905
1,USDC-WETH 500 10,12376891,2021-05-05 22:15:01,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,0.035,-119.744094,,,,
3,USDC-WETH 500 10,12376958,2021-05-05 22:27:41,0xb2ef52180d1e5f4835f4e343251286fa84743456,Decrease Liquidity,0.931484,2588.187276,191150.0,198080.0,2499.9135991248622,4998.918171064905
4,USDC-WETH 500 10,12377035,2021-05-05 22:44:11,0x35f5a1ec10deee1256351e37bc78dc80345895f8,Increase Liquidity,0.000146,1.0,194990.0,195010.0,3398.182812901327,3404.9856389500487
5,USDC-WETH 500 10,12377266,2021-05-05 23:34:39,0x9f9b987becfe15aca93ce9208cca146f252c8d46,Increase Liquidity,2.6,598.469729,186730.0,195460.0,3248.661520935142,7777.22370859911


# The issue is that, with combination of Swap and Liquidity Provision, we are uncertain which one comes first

In [30]:
current_state = {}
current_spot_price = 3443.33391012612
lp_dataframe = pd.DataFrame(columns = ['address','tick_lower','tick_upper','L'])
fees_collected = {}

token0_to_trader = 0
token1_to_trader = 0

# We loop by block number, then by actions in the block, this is so that we can sort actions within a block
for block_num in data.loc[0:139,'BLOCK_NUMBER'].unique():
    print(block_num)

    temp_df = data[data['BLOCK_NUMBER'] == block_num]
    
    # Loop over all actions in this block
    temp_pool = pd.DataFrame(columns = data.columns)

    for i in np.arange(len(temp_df)):

        # Condition Check, if there are LP action stored in temp_pool, we see if we can execute them first
        if not temp_pool.empty:

            for j in np.arange(len(temp_pool)):

                address = LP_action_temp_pool['SENDER'].iloc[j]
                tick_lower = LP_action_temp_pool['TICK_LOWER'].iloc[j]
                tick_upper = LP_action_temp_pool['TICK_UPPER'].iloc[j]
                WETH = LP_action_temp_pool['Delta WETH'].iloc[j]
                USDC = LP_action_temp_pool['Delta USDC'].iloc[j]

                if np.abs(Get_Current_Spot_Price(WETH, USDC, Get_Price(tick_upper), Get_Price(tick_lower), current_spot_price)[1] - current_spot_price) < 1e-3:
                
                    if temp_df['EVENT_NAME'].iloc[i] == "Increase Liquidity":
                        current_state, current_spot_price, lp_dataframe, fees_collected = Provide_Liquidity(current_state, current_spot_price, lp_dataframe, fees_collected, 
                                                                                                            address, tick_lower, tick_upper, WETH, USDC)
                        continue
                
                    else:
                        current_state, current_spot_price, lp_dataframe = Withdraw_Liquidity(current_state, current_spot_price, lp_dataframe, address, tick_lower, tick_upper, WETH, USDC)
                        continue
                
                else:

                    break

            
        """
        LP Actions
        """
        
        if temp_df['EVENT_NAME'].iloc[i] in ["Increase Liquidity","Decrease Liquidity"]:

            address = temp_df['SENDER'].iloc[i]
            tick_lower = temp_df['TICK_LOWER'].iloc[i]
            tick_upper = temp_df['TICK_UPPER'].iloc[i]
            WETH = temp_df['Delta WETH'].iloc[i]
            USDC = temp_df['Delta USDC'].iloc[i]

            # To determine whether the LP action is before or after the swap within the same block
            if np.abs(Get_Current_Spot_Price(WETH, USDC, Get_Price(tick_upper), Get_Price(tick_lower), current_spot_price)[1] - current_spot_price) < 1e-3 or \
                not Get_Price(tick_upper) < current_spot_price < Get_Price(tick_lower):
                
                # If current spot price is same as LP spot price, the LP action is executed here
                if temp_df['EVENT_NAME'].iloc[i] == "Increase Liquidity":
                
                    current_state, current_spot_price, lp_dataframe, fees_collected = Provide_Liquidity(current_state, current_spot_price, lp_dataframe, fees_collected, 
                                                                                                        address, tick_lower, tick_upper, WETH, USDC)
                    continue
            
                else:

                    current_state, current_spot_price, lp_dataframe = Withdraw_Liquidity(current_state, current_spot_price, lp_dataframe, address, tick_lower, tick_upper, WETH, USDC)
                    continue
            
            else:
                
                # Else, we first execute swaps first
                LP_action_temp_pool = pd.concat([LP_action_temp_pool.T, temp_df.iloc[i]], axis = 1).T
                continue

        """
        Swaps
        """
        
        if temp_df['EVENT_NAME'].iloc[i] == "Swap" and temp_df['Delta WETH'].iloc[i] > 0:

            WETH = temp_df['Delta WETH'].iloc[i]
            USDC = -temp_df['Delta USDC'].iloc[i]

            current_state, current_spot_price, fees_collected, token1_to_trader = Swap_Token0_to_Token1(current_state, current_spot_price, lp_dataframe, fees_collected, WETH, fees)
            
            if np.abs(USDC/token1_to_trader - 1) > 0.001:
                raise TypeError("Wrong USDC value")
            else:
                continue

        if temp_df['EVENT_NAME'].iloc[i] == "Swap" and temp_df['Delta USDC'].iloc[i] > 0:

            USDC = temp_df['Delta USDC'].iloc[i]
            WETH = -temp_df['Delta WETH'].iloc[i]

            current_state, current_spot_price, fees_collected, token0_to_trader = Swap_Token1_to_Token0(current_state, current_spot_price, lp_dataframe, fees_collected, USDC, fees)

            if np.abs(WETH/token0_to_trader - 1) > 0.001:
                raise TypeError("Wrong WETH value")
            else:
                continue


12376729
System Message: Trader sends 1.0 token0 and 2995.507735 token1 into the pool
12376891
System Message: Transfer 119.744094948619 of token1 to trader
12376958
System Message: Trader withdraws 0.93148425 token0 and 2588.187276 token1 from the pool
12377035
System Message: Trader sends 0.000145738 token0 and 1.0 token1 into the pool
12377266
System Message: Trader sends 2.599999996 token0 and 598.469729 token1 into the pool
12377278
System Message: Transfer 0.144241064576162 of token0 to trader
12377308
System Message: Trader sends 0.0 token0 and 3405.610348 token1 into the pool
12377327
System Message: Trader sends 0.151652527 token0 and 1549.65885 token1 into the pool
12377345
System Message: Transfer 0.103492428465870 of token0 to trader
12377364
System Message: Transfer 176.180828304496 of token1 to trader
12377369
System Message: Transfer 514.279985864647 of token1 to trader
12377375
System Message: Transfer 2020.72483319812 of token1 to trader
12377404
System Message: Transf

TypeError: Wrong USDC value

In [31]:
data[data['BLOCK_NUMBER'] == 12377960]

Unnamed: 0,POOL_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,EVENT_NAME,Delta WETH,Delta USDC,TICK_LOWER,TICK_UPPER,PRICE_LOWER,PRICE_UPPER
60,USDC-WETH 500 10,12377960,2021-05-06 02:10:11,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,0.519157,-1802.139133,,,,
61,USDC-WETH 500 10,12377960,2021-05-06 02:10:11,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,0.05,-174.152239,,,,


In [23]:
temp = data['BLOCK_NUMBER'].value_counts().reset_index().sort_values(by = 'index')
temp[temp['BLOCK_NUMBER'] > 1].head(10)

Unnamed: 0,index,BLOCK_NUMBER
1225233,12377946,2
1225193,12377960,2
1227937,12378139,2
1236172,12378630,2
1238742,12378837,2
1239153,12379003,2
1237761,12379211,2
1233307,12379404,2
1235673,12379865,2
1235387,12379878,2


With the Deltas, we can build a history for the size of the liquidity pool.

In [None]:
# Add changes by each block
pool_size = data.groupby(['POOL_NAME','BLOCK_NUMBER','BLOCK_TIMESTAMP']).agg({'Delta USDC':np.sum, 'Delta WETH':np.sum}).reset_index()

# Calculate cumulative values
pool_size['Total USDC'] = pool_size['Delta USDC'].cumsum()
pool_size['Total WETH'] = pool_size['Delta WETH'].cumsum()
pool_size

In [None]:
import matplotlib.pyplot as plt

plt.plot(pool_size.BLOCK_TIMESTAMP, pool_size['Total USDC'], linewidth = 0.5)
plt.show()

In [None]:
data[(data['EVENT_NAME'] == 'Swap') & (data['BLOCK_TIMESTAMP'] >= datetime.datetime(2023,3,16)) &
     (data['BLOCK_TIMESTAMP'] < datetime.datetime(2023,3,17))]['Delta USDC'].abs().sum()