In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
symbol = 'BNBUSDT'
binance_data_dir = 'data/binance/'
bitget_data_dir = 'data/bitget/'

binance_file_path = os.path.join(binance_data_dir, f'{symbol}.csv')
bitget_file_path = os.path.join(bitget_data_dir, f'{symbol}.csv')

In [3]:
binance_df = pd.read_csv(binance_file_path)
bitget_df = pd.read_csv(bitget_file_path)

In [4]:
# 对于binance_df，只需要b,a,T
# 需要将b->bid,a-->ask,T-->timestamp
binance_df = binance_df[["b", "a", "T"]]
binance_df.columns = ["bid", "ask", "timestamp"]
# 转换timestamp为datetime
binance_df["timestamp"] = pd.to_datetime(binance_df["timestamp"], unit="ms")
# 设置timestamp为索引
binance_df.set_index("timestamp", inplace=True)
binance_df

Unnamed: 0_level_0,bid,ask
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-06-19 14:17:35.847,640.93,640.94
2025-06-19 14:17:35.987,640.93,640.94
2025-06-19 14:17:36.106,640.93,640.94
2025-06-19 14:17:36.310,640.93,640.94
2025-06-19 14:17:36.430,640.94,640.95
...,...,...
2025-06-20 06:17:42.154,644.50,644.51
2025-06-20 06:17:42.349,644.50,644.51
2025-06-20 06:17:42.444,644.50,644.51
2025-06-20 06:17:42.741,644.50,644.51


In [5]:
# 对于bitget_df，只需要bidPr，askPr，ts
# 需要将bidPr->bid,askPr-->ask,ts-->timestamp
bitget_df = bitget_df[["bidPr", "askPr", "ts"]]
bitget_df.columns = ["bid", "ask", "timestamp"]
# 转换timestamp为datetime
bitget_df["timestamp"] = pd.to_datetime(bitget_df["timestamp"], unit="ms")
# 设置timestamp为索引
bitget_df.set_index("timestamp", inplace=True)
bitget_df

Unnamed: 0_level_0,bid,ask
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-06-19 14:17:35.770,640.99,641.00
2025-06-19 14:17:36.107,640.99,641.00
2025-06-19 14:17:36.423,640.99,641.00
2025-06-19 14:17:36.736,640.99,641.00
2025-06-19 14:17:37.063,640.99,641.00
...,...,...
2025-06-20 06:17:41.557,644.38,644.39
2025-06-20 06:17:41.934,644.38,644.39
2025-06-20 06:17:42.238,644.38,644.39
2025-06-20 06:17:42.551,644.38,644.39


In [6]:
# 先去除重复的时间索引
binance_df = binance_df[~binance_df.index.duplicated(keep='first')]
bitget_df = bitget_df[~bitget_df.index.duplicated(keep='first')]

# 去除NaT索引
binance_df = binance_df[binance_df.index.notna()]
bitget_df = bitget_df[bitget_df.index.notna()]

# 保证索引单调递增
binance_df = binance_df.sort_index()
bitget_df = bitget_df.sort_index()

# 检查索引是否严格单调递增
if not binance_df.index.is_monotonic_increasing:
	binance_df = binance_df.sort_index()
if not bitget_df.index.is_monotonic_increasing:
	bitget_df = bitget_df.sort_index()

# 重采样到1s并合并
binance_1s = binance_df.resample("1s").ffill()
bitget_1s = bitget_df.resample("1s").ffill()
# 删除nan值
binance_1s = binance_1s.dropna()
bitget_1s = bitget_1s.dropna()
# 合并
merged_1s = binance_1s.merge(bitget_1s, left_index=True, right_index=True, suffixes=("_binance", "_bitget"))
merged_1s

Unnamed: 0_level_0,bid_binance,ask_binance,bid_bitget,ask_bitget
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-06-19 14:17:36,640.93,640.94,640.99,641.00
2025-06-19 14:17:37,640.94,640.95,640.99,641.00
2025-06-19 14:17:38,640.94,640.95,640.99,641.00
2025-06-19 14:17:39,641.03,641.04,640.99,641.00
2025-06-19 14:17:40,641.03,641.04,641.08,641.09
...,...,...,...,...
2025-06-20 06:17:38,644.50,644.51,644.38,644.39
2025-06-20 06:17:39,644.50,644.51,644.38,644.39
2025-06-20 06:17:40,644.50,644.51,644.38,644.39
2025-06-20 06:17:41,644.50,644.51,644.38,644.39
