In [None]:
import time
from datetime import datetime, timezone


def date_to_utc_timestamp(c_date):
    return int(c_date.replace(tzinfo=timezone.utc).timestamp())


def utc_timestamp_to_date(ts):
    return datetime.fromtimestamp(ts, tz=timezone.utc)


start_date = datetime(2023, 5, 1)
print(date_to_utc_timestamp(start_date))
# print(utc_timestamp_to_date(1626480000))

In [None]:
# 1. Fetch Eth Pool Liquidity Data

from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

# 0.05% WBTC/ETH pool
LOW_POOL_ID = "0x4585fe77225b41b697c938b018e2ac67ac5a20c0"
# 0.3% WBTC/ETH pool
HIGH_POOL_ID = "0xcbcdf9626bc03e24f779434178a73a0b4bad62ed"


def get_pool_trade_data(pool_id, start_date):
    pool_query = """
    query poolDayDatas($pool_id: ID!, $start_time: Int!,){
        poolDayDatas(
            where: { pool: $pool_id, date_gt: $start_time }
        orderBy: date
        orderDirection: asc
        ) {
            date
            # in range liquidity at end of period
            liquidity
            # current price tracker at end of period
            sqrtPrice
            # price of token0 - derived from sqrtPrice
            tick
            # tvl derived in USD at end of period
            tvlUSD
            # volume in token0
            volumeToken0
            # volume in token1
            volumeToken1
            # volume in USD
            volumeUSD
            # fees in USD
            feesUSD
            # number of transactions during period
            txCount
            # opening price of token0
            open
            # high price of token0
            high
            # low price of token0
            low
            # close price of token0
            close
            }
    }
    """

    client = Client(
        transport=RequestsHTTPTransport(
            url='https://api.thegraph.com/subgraphs/name/uniswap/uniswap-v3',
            verify=True,
            retries=5,
        ))

    start_timestamp = date_to_utc_timestamp(start_date)
    variables = {"pool_id": pool_id, "start_time": start_timestamp}
    response = client.execute(gql(pool_query), variable_values=variables)
    trade_data = response["poolDayDatas"]
    trade_all_data = trade_data

    while len(trade_data) > 0:
        print(utc_timestamp_to_date(start_timestamp))
        print(len(trade_all_data))
        start_timestamp = trade_data[-1]['date']
        variables = {"pool_id": pool_id, "start_time": start_timestamp}
        response = client.execute(gql(pool_query), variable_values=variables)
        trade_data = response["poolDayDatas"]
        trade_all_data += trade_data

    return trade_all_data


# the earliest data is from 2021-05-01
start_date = datetime(2021, 5, 1)
pool_data = get_pool_trade_data(LOW_POOL_ID, start_date)

In [None]:
import pandas as pd

uniswap_df = pd.DataFrame(pool_data)
uniswap_df.head()

In [None]:
uniswap_df.to_csv("../data/wbtc_eth_0.05_uniswap_data.csv", index=False)

In [None]:
pool_data1 = get_pool_trade_data(HIGH_POOL_ID, start_date)
uniswap_df1 = pd.DataFrame(pool_data1)
uniswap_df1.to_csv("../data/wbtc_eth_0.3_uniswap_data.csv", index=False)

In [None]:
# 2. Fetch Arb Swap Period Data

from datetime import datetime
from utils import utils


start_timestamp = utils.date_to_utc_timestamp(datetime(2023, 5, 28, 0, 0))
end_timestamp = utils.date_to_utc_timestamp(datetime(2023, 5, 30, 0, 0))

In [None]:
import pandas as pd

arb_uniswap_df = pd.read_csv("../data/arb_swap_0523_0528.csv")
id_list = arb_uniswap_df["id"].tolist()
print(id_list[0])

In [None]:
# ARB graphQL
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport


arb_query = """
query {
    swaps(skip: %d, orderBy: timestamp, orderDirection: asc,
        where: { pool: "0xc31e54c7a869b9fcbecc14363cf510d1c41fa443", timestamp_gte: %d }){
            id
            account {
                id
                positionCount
                openPositionCount
                closedPositionCount
                depositCount
                withdrawCount
                swapCount
            }
            gasLimit
            gasPrice
            amountIn
            amountInUSD
            amountOut
            amountOutUSD
            tick
            timestamp
            tokenIn {
                symbol
            }
            tokenOut {
                symbol
            }
    }
}
"""


client = Client(
    transport=RequestsHTTPTransport(
        url='https://api.thegraph.com/subgraphs/name/messari/uniswap-v3-arbitrum',
        verify=True,
        retries=5,
    ))

# # variables = { "num_skip": 0, "start_timestamp": 1684460940 }
# # response = client.execute(gql(arb_query), variable_values=variables)
response = client.execute(gql(arb_query % (0, start_timestamp)))
print(len(response["swaps"]))

In [None]:
result = response['swaps']
id_set = set(id_list)
print_set = set()
PRINT_GAP = 1000

for item in result:
    id_set.add(item["id"])

# more than 100 swap transactions happened in a second, will need to change skip count
skip = 0
last_cnt = 0
while int(result[-1]["timestamp"]) <= end_timestamp:
    if skip == 1000:
        break
    last_cnt = len(result)

    print_cnt = int(len(result) / PRINT_GAP)
    if print_cnt not in print_set:
        print_set.add(print_cnt)
        print(datetime.now(), "result cnt:", len(result), "id cnt:", len(id_set), "skip: ", skip)
        print("Current time: ", utils.utc_timestamp_to_date(int(result[-1]["timestamp"])))

    current_timestamp = int(result[-1]["timestamp"])
    query = arb_query % (skip, current_timestamp)
    try:
        response = client.execute(gql(query))

        swap_data = response["swaps"]
        if len(swap_data) == 0:
            break
        for item in swap_data:
            if int(item["timestamp"]) > end_timestamp:
                break
            elif item["id"] in id_set:
                continue
            else:
                result.append(item)
                id_set.add(item["id"])

        if last_cnt == len(result):
            skip += 100
        else:
            skip = 0
    except Exception as ex:
        print("Fetching swap data error", ex)

In [None]:
import pandas as pd
from copy import deepcopy


arb_result = deepcopy(result)
for item in arb_result:
    item["transaction"] = item["id"]
    item["trader"] = item["account"]["id"]
    item["traderPositionCount"] = item["account"]["positionCount"]
    item["traderOpenPositionCount"] = item["account"]["openPositionCount"]
    item["traderClosedPositionCount"] = item["account"]["closedPositionCount"]
    item["traderDepositCount"] = item["account"]["depositCount"]
    item["traderWithdrawCount"] = item["account"]["withdrawCount"]
    item["traderSwapCount"] = item["account"]["swapCount"]
    item["tokenIn"] = item["tokenIn"]["symbol"]
    item["tokenOut"] = item["tokenOut"]["symbol"]
    item.pop("account")

arb_uniswap_df1 = pd.DataFrame(arb_result)
df = pd.concat([arb_uniswap_df, arb_uniswap_df1])
df.head()

In [None]:
def get_amount_price(swap_item):
    if swap_item["tokenIn"] == "WETH":
        eth_amount = int(swap_item["amountIn"]) / (10 ** 18)
        usd = float(swap_item["amountInUSD"])
    else:
        eth_amount = int(swap_item["amountOut"]) / (10 ** 18)
        usd = float(swap_item["amountOutUSD"])

    if eth_amount != 0:
        return eth_amount, usd, usd / eth_amount

    return eth_amount, usd, None

In [None]:
data = df.to_dict("records")

for item in data:
    eth_amount, usd_amount, price = get_amount_price(item)
    item["ethAmount"] = eth_amount
    item["usdAmount"] = usd_amount
    item["price"] = price
    item.pop("id")

df_new = pd.DataFrame(data)
df_new.head()

In [None]:
df_new.to_csv("../data/arb_swap.csv", index=False)

In [1]:
# 3. Fetch And Concat Arb User Swap Data
import pandas as pd


df = pd.read_csv("../data/user_arb_swap_resolved.csv")
df.head()

Unnamed: 0,id,amountIn,amountInUSD,amountOut,amountOutUSD,gasLimit,gasPrice,tick,timestamp,pool,tokenIn,tokenOut
0,0x4163f1437cefc0b6c73e00c8db819ea92aa07395a1a6...,3516117741730628903,5174.514646,5170663639,5170.663639,3000000,2000000000,-203382,1676306229,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC
1,0x058d12a79ee20e82379e5d0b2b1eeeaaa2988768244d...,4186892251423356044,6158.127305,6154400504,6154.400504,3000000,2000000000,-203387,1676306236,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC
2,0xcd8af3e9679b087671fa11627d3f0864b3bedb46523c...,3559441400205692687,5231.842059,5228940013,5228.940013,3000000,2000000000,-203393,1676306246,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC
3,0xa9ac38c9ce148d8830f73bec697d014de0ad3aa7e3a3...,8250801903,8250.801903,5613510956742205013,8245.020033,3000000,2000000000,-203396,1676306255,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,USDC,WETH
4,0xc6ea270acc5833c393000dad84ffb0baaf793238f669...,3454446749687345942,5076.537527,5073730688,5073.730688,3000000,2000000000,-203395,1676306282,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC


In [3]:
last_timestamp = int(df["timestamp"].max())
print(type(last_timestamp))

<class 'int'>


In [4]:
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

swap_query = """
{
    swaps(skip: %d, orderBy: timestamp, orderDirection: asc,
        where: { account_: {id: "0x8cc02c2381b7c55e18dccfea917f0677a5671931"}, timestamp_gte: %d }){
        id
        amountIn
        amountInUSD
        amountOut
        amountOutUSD
        gasLimit
        gasPrice
        tick
        timestamp
        pool {
            id
        }
        tokenIn {
            symbol
        }
        tokenOut {
            symbol
        }
    }
}"""


client = Client(
    transport=RequestsHTTPTransport(
        url='https://api.thegraph.com/subgraphs/name/messari/uniswap-v3-arbitrum',
        verify=True,
        retries=5,
    ))

response = client.execute(gql(swap_query % (0, last_timestamp)))
print(response["swaps"])

[{'id': '0xf18c91bf18a01b04bdd36f9f2a86364f8e93c1974eb4480e2de26d9dea0d23dd1e000000', 'amountIn': '16491066994', 'amountInUSD': '16491.066994', 'amountOut': '8793652510660301738', 'amountOutUSD': '16482.42989320664391504365044', 'gasLimit': '6000000', 'gasPrice': '2000000000', 'tick': '-200960', 'timestamp': '1685577355', 'pool': {'id': '0xc31e54c7a869b9fcbecc14363cf510d1c41fa443'}, 'tokenIn': {'symbol': 'USDC'}, 'tokenOut': {'symbol': 'WETH'}}, {'id': '0xcbc07212c81721435423a8ac276615cbc1ab2e9cf1d115c477fac83a6d960c081a000000', 'amountIn': '32798155443', 'amountInUSD': '32798.155443', 'amountOut': '17487734589899770208', 'amountOutUSD': '32779.97486338014584619297952', 'gasLimit': '6000000', 'gasPrice': '2000000000', 'tick': '-200959', 'timestamp': '1685577641', 'pool': {'id': '0xc31e54c7a869b9fcbecc14363cf510d1c41fa443'}, 'tokenIn': {'symbol': 'USDC'}, 'tokenOut': {'symbol': 'WETH'}}, {'id': '0xf239b50d206a97cfac9e56bf265c6b2c323de40b622dadb6fa6a73ac6ccb9aa903000000', 'amountIn': '23

In [5]:
from utils import utils
from datetime import datetime

result = response['swaps']
id_set = id_set = set(df["id"].tolist())
for t in result:
    id_set.add(t["id"])

print_set = set()
PRINT_GAP = 1000

current_timestamp = int(result[-1]["timestamp"])
end_timestamp = utils.date_to_utc_timestamp(datetime(2023, 6, 7, 0, 0))

# more than 100 swap transactions happened in a second, will need to change skip count
skip = 0
last_cnt = 0
while current_timestamp <= end_timestamp:
    if skip == 1000:
        break

    print_cnt = int(len(result) / PRINT_GAP)
    if print_cnt not in print_set:
        print_set.add(print_cnt)
        print(datetime.now(), "result cnt:", len(result), "id cnt:", len(id_set), "skip: ", skip)
        print("Current time: ", utils.utc_timestamp_to_date(current_timestamp))

    last_cnt = len(result)
    query = swap_query % (skip, current_timestamp)
    try:
        response = client.execute(gql(query))

        swap_data = response["swaps"]
        if len(swap_data) == 0:
            break
        for item in swap_data:
            if int(item["timestamp"]) > end_timestamp:
                break
            elif item["id"] in id_set:
                continue
            else:
                result.append(item)
                id_set.add(item["id"])

        if last_cnt == len(result):
            skip += 100
        else:
            skip = 0

        current_timestamp = int(result[-1]["timestamp"])
    except Exception as ex:
        print("Fetching swap data error", ex)

2023-06-07 15:33:21.954100 result cnt: 100 id cnt: 62410 skip:  0
Current time:  2023-06-01 02:41:36+00:00
2023-06-07 15:34:09.798130 result cnt: 1090 id cnt: 63400 skip:  0
Current time:  2023-06-05 00:47:30+00:00
2023-06-07 15:34:57.697410 result cnt: 2080 id cnt: 64390 skip:  0
Current time:  2023-06-06 16:55:18+00:00


In [None]:
# data concat

In [7]:
import pandas as pd
from copy import deepcopy


arb_result = deepcopy(result)
for item in arb_result:
    item["tokenIn"] = item["tokenIn"]["symbol"]
    item["tokenOut"] = item["tokenOut"]["symbol"]
    item["pool"] = item["pool"]["id"]

arb_uniswap_df = pd.DataFrame(arb_result)
arb_uniswap_df.head()

Unnamed: 0,id,amountIn,amountInUSD,amountOut,amountOutUSD,gasLimit,gasPrice,tick,timestamp,pool,tokenIn,tokenOut
0,0xf18c91bf18a01b04bdd36f9f2a86364f8e93c1974eb4...,16491066994,16491.066994,8793652510660301738,16482.429893206645,6000000,2000000000,-200960,1685577355,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,USDC,WETH
1,0xcbc07212c81721435423a8ac276615cbc1ab2e9cf1d1...,32798155443,32798.155443,17487734589899770208,32779.97486338014,6000000,2000000000,-200959,1685577641,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,USDC,WETH
2,0xf239b50d206a97cfac9e56bf265c6b2c323de40b622d...,23602123802,23602.123802,12577680087838434308,23586.524756264633,6000000,2000000000,-200954,1685577793,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,USDC,WETH
3,0x0774ac668acc332a7c7602b1a8104b5f480c3125ad82...,9010101078880157749,16906.74233592732,16896605650,16896.60565,6000000,2000000000,-200951,1685577967,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC
4,0xf4ad4dc41f3a4702e8a661e2c1ffc0baf528bcd1cddb...,9966794012412496598,18692.234594716552,18680307146,18680.307146,6000000,2000000000,-200956,1685577995,0xc31e54c7a869b9fcbecc14363cf510d1c41fa443,WETH,USDC


In [9]:
# judge whether need to resolve some usdt transaction
arb_uniswap_df.pool.value_counts()

0xc31e54c7a869b9fcbecc14363cf510d1c41fa443    2321
Name: pool, dtype: int64

In [10]:
df = pd.concat([df, arb_uniswap_df])
df.shape

(64632, 12)

In [11]:
df.to_csv("../data/user_arb_swap_all.csv", index=False)

In [None]:
import time

from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport


client = Client(
    transport=RequestsHTTPTransport(
        url='https://api.thegraph.com/subgraphs/name/messari/uniswap-v3-arbitrum',
        verify=True,
        retries=5,
    ))

query1 = """query get_pools($pool_id: ID!) {
    liquidityPool(id: $pool_id) {
        tick
        lastUpdateTimestamp
    }
}"""

variables = { "pool_id": "0xc31e54c7a869b9fcbecc14363cf510d1c41fa443" }
print(int(time.time()))
response1 = client.execute(gql(query1), variable_values=variables)
print(response1["liquidityPool"]["lastUpdateTimestamp"])
print(int(time.time()))

In [None]:
query2 = """
{
    liquidityPool(id: "0xc31e54c7a869b9fcbecc14363cf510d1c41fa443") {
        tick
        lastUpdateTimestamp
    }
}"""

print(int(time.time()))
response2 = client.execute(gql(query2))
print(response2["liquidityPool"]["lastUpdateTimestamp"])
print(int(time.time()))