In [38]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import pyarrow.parquet as pq
import datetime as dt

import math
from decimal import Decimal, getcontext
getcontext().prec = 50

data_dir = '/Users/zhicong/Dropbox/DEXs'
pool_name = 'USDC-WETH 500 10'

# Functions

In [39]:
def Get_Price(_tick_):
    return Decimal(1/(1.0001**_tick_*10**-12))

# Read and Clean Data

Liquidity provision action

In [40]:
lp_action = pd.read_csv(os.path.join(data_dir, "uniswapv3_lp_actions", pool_name + "_LP.csv"))

# Final cleaning
lp_action['BLOCK_TIMESTAMP'] = pd.to_datetime(lp_action['BLOCK_TIMESTAMP'])
lp_action['AMOUNT0_ADJUSTED'] = np.where(lp_action['EVENT_NAME'] == 'Decrease Liquidity', -lp_action['AMOUNT0_ADJUSTED'], lp_action['AMOUNT0_ADJUSTED'])
lp_action['AMOUNT0_USD'] = np.where(lp_action['EVENT_NAME'] == 'Decrease Liquidity', -lp_action['AMOUNT0_USD'], lp_action['AMOUNT0_USD'])
lp_action['AMOUNT1_ADJUSTED'] = np.where(lp_action['EVENT_NAME'] == 'Decrease Liquidity', -lp_action['AMOUNT1_ADJUSTED'], lp_action['AMOUNT1_ADJUSTED'])
lp_action['AMOUNT1_USD'] = np.where(lp_action['EVENT_NAME'] == 'Decrease Liquidity', -lp_action['AMOUNT1_USD'], lp_action['AMOUNT1_USD'])
lp_action['L'] = np.where(lp_action['EVENT_NAME'] == 'Decrease Liquidity', -lp_action['L'], lp_action['L'])

# Round Liquidity Parameter to 5 decimal point
lp_action['L'] = round(lp_action['L'], 5)

lp_action.head()

Unnamed: 0,POOL_NAME,EVENT_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,AMOUNT0_ADJUSTED,TOKEN0_SYMBOL,AMOUNT0_USD,AMOUNT1_ADJUSTED,TOKEN1_SYMBOL,AMOUNT1_USD,TICK_LOWER,TICK_UPPER,PRICE_LOWER,PRICE_UPPER,L
0,USDC-WETH 500 10,Increase Liquidity,12376729,2021-05-05 21:42:11,0xb2ef52180d1e5f4835f4e343251286fa84743456,2995.507735,USDC,2995.292327,1.0,WETH,3437.435736,191150,198080,2499.913599,4998.918171,345.0731
1,USDC-WETH 500 10,Decrease Liquidity,12376958,2021-05-05 22:27:41,0xb2ef52180d1e5f4835f4e343251286fa84743456,-2588.187276,USDC,-2588.001158,-0.931484,WETH,-3201.917248,191150,198080,2499.913599,4998.918171,-310.56579
2,USDC-WETH 500 10,Increase Liquidity,12377035,2021-05-05 22:44:11,0x35f5a1ec10deee1256351e37bc78dc80345895f8,1.0,USDC,0.999928,0.000146,WETH,0.500964,194990,195010,3398.182813,3404.985639,25.65271
3,USDC-WETH 500 10,Increase Liquidity,12377266,2021-05-05 23:34:39,0x9f9b987becfe15aca93ce9208cca146f252c8d46,598.469729,USDC,598.426693,2.6,WETH,8937.3329,186730,195460,3248.661521,7777.223709,447.9946
4,USDC-WETH 500 10,Increase Liquidity,12377308,2021-05-05 23:44:49,0x9f9b987becfe15aca93ce9208cca146f252c8d46,3405.610348,USDC,3405.365449,0.0,WETH,0.0,194660,194990,3404.985639,3519.218937,3508.22475


Swaps action

In [41]:
swaps = pd.DataFrame(columns = ['BLOCK_NUMBER','BLOCK_TIMESTAMP','TX_HASH','SENDER','TX_TO','PLATFORM','POOL_NAME','CONTRACT_ADDRESS',
                                'EVENT_NAME','AMOUNT_IN','SYMBOL_IN','AMOUNT_OUT','SYMBOL_OUT'])

for i in range(1,4):
    temp = pd.read_parquet(os.path.join(data_dir, "dex_swaps", pool_name + "_" + str(i) + ".gzip"))
    swaps = pd.concat([swaps,temp], axis = 0)
    del temp

# Final cleaning
swaps.reset_index(drop = True, inplace = True)
swaps['BLOCK_NUMBER'] = swaps['BLOCK_NUMBER'].astype(int)
swaps['AMOUNT_IN'] = swaps['AMOUNT_IN'].astype(float)
swaps['AMOUNT_OUT'] = swaps['AMOUNT_OUT'].astype(float)
swaps['BLOCK_TIMESTAMP'] = pd.to_datetime(swaps['BLOCK_TIMESTAMP'])

# Since the pool charges a transaction fee of 5 basis point on the incoming token, and stored it elsewhere (not in liquidity pool), we subtract that
#swaps['AMOUNT_IN'] = swaps['AMOUNT_IN']*(1-0.0005)

# Changing signs of addition and extraction
swaps['AMOUNT_OUT'] = -swaps['AMOUNT_OUT']
swaps['AMOUNT0_ADJUSTED'] = np.where(swaps['SYMBOL_IN'] == 'WETH', swaps['AMOUNT_OUT'], swaps['AMOUNT_IN'])
swaps['TOKEN0_SYMBOL'] = 'USDC'
swaps['AMOUNT1_ADJUSTED'] = np.where(swaps['SYMBOL_IN'] == 'WETH', swaps['AMOUNT_IN'], swaps['AMOUNT_OUT'])
swaps['TOKEN1_SYMBOL'] = 'WETH'

swaps = swaps[['POOL_NAME','BLOCK_NUMBER','BLOCK_TIMESTAMP','SENDER','EVENT_NAME','AMOUNT0_ADJUSTED','TOKEN0_SYMBOL','AMOUNT1_ADJUSTED','TOKEN1_SYMBOL']]
swaps.sort_values(by = 'BLOCK_NUMBER', ascending = True, inplace = True)
swaps.reset_index(drop = True, inplace = True)

swaps.head()

Unnamed: 0,POOL_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,EVENT_NAME,AMOUNT0_ADJUSTED,TOKEN0_SYMBOL,AMOUNT1_ADJUSTED,TOKEN1_SYMBOL
0,USDC-WETH 500 10,12376891,2021-05-05 22:15:01,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-119.744094,USDC,0.035,WETH
1,USDC-WETH 500 10,12377278,2021-05-05 23:37:55,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,499.756414,USDC,-0.144241,WETH
2,USDC-WETH 500 10,12377345,2021-05-05 23:51:36,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,365.0,USDC,-0.103492,WETH
3,USDC-WETH 500 10,12377364,2021-05-05 23:56:34,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-176.180828,USDC,0.05,WETH
4,USDC-WETH 500 10,12377369,2021-05-05 23:57:36,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,-514.279985,USDC,0.146,WETH


Merging

In [42]:
# Removing new LPs
lp_action = lp_action[lp_action['BLOCK_NUMBER'] <= swaps['BLOCK_NUMBER'].max()]
lp_action.reset_index(drop = True, inplace = True)

data = pd.concat([swaps, lp_action], axis = 0)
data.sort_values(by = 'BLOCK_NUMBER', ascending = True, inplace = True)
data.rename(columns = {'AMOUNT0_ADJUSTED':'Delta USDC', 'AMOUNT1_ADJUSTED':'Delta WETH'}, inplace = True)
data.reset_index(drop = True, inplace = True)
data = data[~((data['Delta WETH'] == 0) & (data['Delta USDC'] == 0))]

data = data[['POOL_NAME','BLOCK_NUMBER','BLOCK_TIMESTAMP','SENDER','EVENT_NAME','Delta WETH','Delta USDC','TICK_LOWER','TICK_UPPER','PRICE_LOWER','PRICE_UPPER','L']]

data.head()

Unnamed: 0,POOL_NAME,BLOCK_NUMBER,BLOCK_TIMESTAMP,SENDER,EVENT_NAME,Delta WETH,Delta USDC,TICK_LOWER,TICK_UPPER,PRICE_LOWER,PRICE_UPPER,L
0,USDC-WETH 500 10,12376729,2021-05-05 21:42:11,0xb2ef52180d1e5f4835f4e343251286fa84743456,Increase Liquidity,1.0,2995.507735,191150.0,198080.0,2499.913599,4998.918171,345.0731
1,USDC-WETH 500 10,12376891,2021-05-05 22:15:01,0xe592427a0aece92de3edee1f18e0157c05861564,Swap,0.035,-119.744094,,,,,
2,USDC-WETH 500 10,12376958,2021-05-05 22:27:41,0xb2ef52180d1e5f4835f4e343251286fa84743456,Decrease Liquidity,-0.931484,-2588.187276,191150.0,198080.0,2499.913599,4998.918171,-310.56579
3,USDC-WETH 500 10,12377035,2021-05-05 22:44:11,0x35f5a1ec10deee1256351e37bc78dc80345895f8,Increase Liquidity,0.000146,1.0,194990.0,195010.0,3398.182813,3404.985639,25.65271
4,USDC-WETH 500 10,12377266,2021-05-05 23:34:39,0x9f9b987becfe15aca93ce9208cca146f252c8d46,Increase Liquidity,2.6,598.469729,186730.0,195460.0,3248.661521,7777.223709,447.9946


# Descriptive Statistics

We will be building data descriptives here. One assumption we made here is that each address represents one single agent.

## Basic Descriptive

In [43]:
print("Data period:", data['BLOCK_TIMESTAMP'].min().strftime("%d %B %Y"), "-", data['BLOCK_TIMESTAMP'].max().strftime("%d %B %Y"))

Data period: 05 May 2021 - 12 May 2023


In [44]:
print("Number of actions in our dataset:", len(data))
print("Number of liquidity provision action:", len(data[data['EVENT_NAME'].isin(['Increase Liquidity','Decrease Liquidity'])]))
print("Number of swap action:", len(data[data['EVENT_NAME'].isin(['Swap'])]))

Number of actions in our dataset: 4929727
Number of liquidity provision action: 151554
Number of swap action: 4778173


## Liquidity Providers

In [45]:
print("Total Number of unique Liquidity Provider:", len(lp_action['SENDER'].unique()))

Total Number of unique Liquidity Provider: 9721


In [67]:
lp_pool_concentration = lp_action[['SENDER','BLOCK_TIMESTAMP','L']].copy()
lp_pool_concentration = lp_pool_concentration.groupby(['SENDER','BLOCK_TIMESTAMP'])['L'].sum().reset_index()
lp_pool_concentration.pivot(index = 'BLOCK_TIMESTAMP', columns = 'SENDER', values = 'L')

SENDER,0x00000000002108f4085d2fd429669ce8f9851241,0x000000000088e0120f9e6652cc058aec07564f69,0x0000000833d062eb26304d071ebf3e23765f1f92,0x000066415ef89192a256621aea88aadef507e907,0x0000c3caa36e2d9a8cd5269c976ede05018f0000,0x000741def5c59bead2b2f6be2d35fc4145e39e6b,0x00080000158a9930aeb508823103b0a977161c47,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,0x000ac1c7454fb12372e3fcfab2a36335db5e8e9e,0x000b85652fc321561cdf2bcadb07b61d8ab9ab8d,...,0xffe7729b1eab35aa37c4486c8fc9f576d4cc1603,0xffe8cd71956e37c838003c2cbd5cedf79c339e74,0xffeb7a444cddcbf4c9add4f8df158bc4e5ea6445,0xffec51798fef19426dfb32260a0967f9f7e6fd49,0xffedab0adacbc18c7fad6c420d47cb9a5c1152d7,0xffefdcfff613c9bbb9928f6ff44f07c7b562bfdf,0xfff33c0bde72f6472f1d185166b7cbfcc3e9e150,0xfff540e86fa4973a5fc75c8d526f1e70533fdd80,0xfffa285b4ca6ead9e537279e6b0d6d233d5e56bb,0xfffc9a3f70fbef70a486fc492ad01bc36f588ce0
BLOCK_TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-05 21:42:11,,,,,,,,,,,...,,,,,,,,,,
2021-05-05 22:27:41,,,,,,,,,,,...,,,,,,,,,,
2021-05-05 22:44:11,,,,,,,,,,,...,,,,,,,,,,
2021-05-05 23:34:39,,,,,,,,,,,...,,,,,,,,,,
2021-05-05 23:44:49,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-12 13:45:11,,,,,,,,,,,...,,,,,,,,,,
2023-05-12 13:47:35,,,,,,,,,,,...,,,,,,,,,,
2023-05-12 13:51:35,,,,,,,,,,,...,,,,,,,,,,
2023-05-12 13:56:35,,,,,,,,,,,...,,,,,,,,,,


# Traders

In [None]:
print("Total Number of unique Traders:", len(swaps['SENDER'].unique()))