In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))
import datetime as dt
import pandas as pd
import tetrion.commands as cmd
from Backtestingtools.ticks_and_books_helper import MarketDataAnalyzer2


In [None]:
df_book = cmd.book_printer_v2("FUT_TAIFEX_TMF:202503", dt.date(2025, 3, 7), source="SRC_TAIFEX_RT_RD.ORIG_CPBQ.DEST_CPBQ", depth=5)
df_book.index = pd.to_datetime(df_book.index)
df_tick= cmd.tick_printer('FUT_TAIFEX_TMF:202503',dt.date(2025,3,7))
df_tick.index = pd.to_datetime(df_tick.index)




In [8]:
analyzer = MarketDataAnalyzer2(df_tick, df_book)


In [9]:
events = analyzer.detect_trade_through()

for e in events:
    print(f"{e['start_time']} ~ {e['end_time']} | {e['direction']} | {e['count']} 筆 | 平均穿價 {e['avg_diff']:.2f}")


2025-03-07 01:04:39.602784 ~ 2025-03-07 01:04:39.602784 | sell | 1 筆 | 平均穿價 1.00
2025-03-07 01:06:43.602812 ~ 2025-03-07 01:06:43.602812 | buy | 1 筆 | 平均穿價 1.00
2025-03-07 09:00:21.364940 ~ 2025-03-07 09:00:21.364940 | buy | 1 筆 | 平均穿價 1.00
2025-03-07 10:10:20.365159 ~ 2025-03-07 10:10:20.365159 | sell | 1 筆 | 平均穿價 1.00
2025-03-07 21:30:02.826733 ~ 2025-03-07 21:30:02.826733 | sell | 1 筆 | 平均穿價 1.00


In [3]:
import pandas as pd

def detect_trade_through_from_book(df_tick, df_book, merge_window_ms=1000):
    """
    Detect trade‐through events by comparing each tick's price against the most
    recent book snapshot (bid_px/ask_px). Consecutive events of the same
    direction within merge_window_ms are merged into one “wave.”

    Returns:
        List of dicts, each with keys:
          • start_time: Timestamp of the first tick in the wave
          • end_time:   Timestamp of the last tick in the wave
          • direction:  "buy" or "sell"
          • count:      number of ticks in that wave
          • avg_diff:   average price difference from the book
    """
    events = []

    # 1) Collect individual trade‐through ticks
    for t, row in df_tick.iterrows():
        px = row.get("px")
        if pd.isna(px):
            continue

        # find latest book snapshot at or before t
        try:
            book = df_book.loc[:t].iloc[-1]
        except IndexError:
            continue

        bid = book.get("bid_px")
        ask = book.get("ask_px")
        if pd.isna(bid) or pd.isna(ask) or bid == 0 or ask == 0:
            continue

        if px > ask:
            events.append((t, "buy", px - ask))
        elif px < bid:
            events.append((t, "sell", bid - px))

    if not events:
        return []

    # 2) Merge into waves
    merged = []
    group = [events[0]]
    for curr in events[1:]:
        prev = group[-1]
        gap_ms = (curr[0] - prev[0]).total_seconds() * 1000
        # same direction and within window → same wave
        if curr[1] == prev[1] and gap_ms <= merge_window_ms:
            group.append(curr)
        else:
            merged.append(group)
            group = [curr]
    merged.append(group)

    # 3) Summarize each wave
    results = []
    for g in merged:
        start = g[0][0]
        end   = g[-1][0]
        direction = g[0][1]
        count     = len(g)
        avg_diff  = sum(e[2] for e in g) / count
        results.append({
            "start_time": start,
            "end_time":   end,
            "direction":  direction,
            "count":      count,
            "avg_diff":   avg_diff
        })

    return results


In [5]:
import pandas as pd
import numpy as np

def align_tick_book(df_tick, df_book):
    """
    用 ex_time 对齐成交和 book，返回带 px, sz, bid_px, ask_px 的 DataFrame（index=ex_time）。
    """
    tick = df_tick.copy()
    tick.index = pd.to_datetime(tick['ex_time'])
    tick = tick.sort_index()
    book = df_book.sort_index()

    aligned = pd.merge_asof(
        tick[['px', 'sz']],
        book[['bid_px', 'ask_px']],
        left_index=True,
        right_index=True,
        direction='backward'
    )
    return aligned

def flag_through_ticks(df_aligned):
    """
    在对齐后的 df 中新增三列：
      buy_through  = px > ask_px
      sell_through = px < bid_px
      through      = buy_through OR sell_through
    """
    df = df_aligned.copy()
    df['buy_through']  = df['px'] > df['ask_px']
    df['sell_through'] = df['px'] < df['bid_px']
    df['through']      = df['buy_through'] | df['sell_through']
    return df

def summarize_through_events(df_flagged, merge_window_ms=1000):
    """
    将连续同方向的 through_ticks（相隔 merge_window_ms 内）合并为一波事件。
    返回 DataFrame：start_time, end_time, direction, count。
    """
    # 取出所有 through-tick
    df_thr = df_flagged[df_flagged['through']].reset_index()
    if df_thr.empty:
        return pd.DataFrame(columns=['start_time','end_time','direction','count'])

    # 把第一列（原来的 index，不管叫啥）改名为 time
    idx_name = df_flagged.index.name or df_thr.columns[0]
    df_thr = df_thr.rename(columns={idx_name: 'time'})

    # 方向
    df_thr['direction'] = np.where(df_thr['buy_through'], 'buy', 'sell')
    # 相邻穿价的时间差（毫秒）
    df_thr['gap_ms']    = df_thr['time'].diff().dt.total_seconds() * 1000
    # 新波：方向变 or 时间差大于阈值
    df_thr['new_wave']  = (df_thr['direction'] != df_thr['direction'].shift()) | (df_thr['gap_ms'] > merge_window_ms)
    df_thr['wave_id']   = df_thr['new_wave'].cumsum()

    # 汇总每一波事件
    summary = df_thr.groupby('wave_id').agg(
        start_time = ('time','first'),
        end_time   = ('time','last'),
        direction  = ('direction','first'),
        count      = ('time','size')
    ).reset_index(drop=True)

    return summary



In [25]:
# 1. 先把 start_time 设为索引（DatetimeIndex）
events_by_start = events.set_index('start_time')

# 2. 用 between_time 筛选每天 21:30:00~21:30:01
late_events = events_by_start.between_time('21:30:00', '21:30:01')

print(late_events)

                                       end_time direction  count
start_time                                                      
2025-03-07 21:30:00.123 2025-03-07 21:30:01.099      sell     32


In [12]:

# ========== 示例用法 ==========

# 1. 对齐
df_aligned = align_tick_book(df_tick, df_book)
df_aligned.tail()


Unnamed: 0_level_0,px,sz,bid_px,ask_px
ex_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-03-07 23:59:54.515,22436.0,1,22435.0,22436.0
2025-03-07 23:59:55.540,22437.0,1,22436.0,22437.0
2025-03-07 23:59:55.727,22440.0,1,22437.0,22440.0
2025-03-07 23:59:58.421,22436.0,1,22435.0,22436.0
2025-03-07 23:59:58.754,22435.0,1,22435.0,22437.0


In [None]:

# 2. 标记
df_flagged = flag_through_ticks(df_aligned)
df_flagged




Unnamed: 0_level_0,px,sz,bid_px,ask_px,buy_through,sell_through,through
ex_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-03-07 00:00:00.082,22653.0,1,,,False,False,False
2025-03-07 00:00:00.136,22655.0,1,,,False,False,False
2025-03-07 00:00:00.137,22655.0,6,,,False,False,False
2025-03-07 00:00:00.383,22656.0,1,,,False,False,False
2025-03-07 00:00:00.438,22656.0,1,,,False,False,False
...,...,...,...,...,...,...,...
2025-03-07 23:59:54.515,22436.0,1,22435.0,22436.0,False,False,False
2025-03-07 23:59:55.540,22437.0,1,22436.0,22437.0,False,False,False
2025-03-07 23:59:55.727,22440.0,1,22437.0,22440.0,False,False,False
2025-03-07 23:59:58.421,22436.0,1,22435.0,22436.0,False,False,False


In [None]:

# 3. 查看前 10 笔穿价成交
print(df_flagged[df_flagged['through']].head(10)[['px','bid_px','ask_px','buy_through','sell_through']])


In [None]:

# 4. 汇总事件
events = summarize_through_events(df_flagged, merge_window_ms=1000)
print(events)