In [None]:
import subprocess
from datetime import datetime
from pathlib import Path
import re
from collections import defaultdict

def run_tf_g_and_extract_all_lines(logfile: Path) -> list:
    try:
        cmd = f"less {logfile} | tf -g"
        result = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.DEVNULL)
        return result.splitlines()
    except:
        return []

def extract_all_quote_fill_pairs(date: str) -> dict:
    """
    從當天所有 5F 與 6F 的 log 抓出 quote 成交（FILL 前有 SEND_H_ORDER）
    回傳格式：{ '202508': [ (floor, timestamp, line), ... ], ... }
    """
    base_path = Path("/nfs/datafiles.optiontraderlogs") / date.replace("-", "/")
    log_file_5f = base_path / "capital_neutrino_txf_5f" / f"output.neutrino_txf_5f.{date.replace('-', '')}.log"
    log_file_6f = base_path / "capital_neutrino_txf" / f"output.neutrino_txf.{date.replace('-', '')}.log"

    lines_5f = run_tf_g_and_extract_all_lines(log_file_5f)
    lines_6f = run_tf_g_and_extract_all_lines(log_file_6f)

    def parse_quote_fills(lines, floor):
        results = []
        prev_line = ""
        for line in lines:
            if "[FILL]" in line and "FUT_TAIFEX_TXF:" in line:
                if "[SEND_H_ORDER]" in prev_line:
                    match = re.search(r"FUT_TAIFEX_TXF:(\d+)", line)
                    if not match:
                        continue
                    contract_month = match.group(1)
                    try:
                        time_str = line[:15]
                        timestamp = datetime.strptime(time_str, "%H:%M:%S.%f")
                        results.append((contract_month, floor, timestamp, line))
                    except:
                        continue
            prev_line = line
        return results

    parsed = parse_quote_fills(lines_5f, "5F") + parse_quote_fills(lines_6f, "6F")

    by_month = defaultdict(list)
    for contract_month, floor, timestamp, line in parsed:
        by_month[contract_month].append((floor, timestamp, line))

    for month in by_month:
        by_month[month].sort(key=lambda x: x[1])

    return by_month

def group_nearby_quotes_by_month(quote_events_by_month: dict, threshold_ms: float = 0.5) -> dict:
    grouped = {}

    for month, quote_events in quote_events_by_month.items():
        groups = []
        current = []
        for ev in quote_events:
            if not current:
                current.append(ev)
                continue
            diff_ms = abs((ev[1] - current[0][1]).total_seconds() * 1000)
            if diff_ms <= threshold_ms:
                current.append(ev)
            else:
                if len(current) >= 2:
                    groups.append(current)
                current = [ev]
        if len(current) >= 2:
            groups.append(current)

        grouped[month] = groups

    return grouped

def print_matched_quote_groups_with_diff(grouped_by_month: dict):
    for month, groups in grouped_by_month.items():
        print(f"\n=== Matched QUOTE FILL for contract {month} ===")
        for group in groups:
            print("---")
            group_sorted = sorted(group, key=lambda x: x[1])
            time_diff_ms = abs((group_sorted[1][1] - group_sorted[0][1]).total_seconds() * 1000)
            faster = group_sorted[0][0]
            slower = group_sorted[1][0]
            print(f"{faster} faster than {slower} by {time_diff_ms:.3f} ms")
            for floor, ts, line in group_sorted:
                print(f"{floor} {ts.time()} {line}")


In [None]:
from datetime import date, timedelta
import pandas as pd

def summarize_faster_side(grouped_by_month: dict) -> pd.DataFrame:
    summary = []
    for month, groups in grouped_by_month.items():
        for group in groups:
            if len(group) < 2:
                continue
            sorted_group = sorted(group, key=lambda x: x[1])
            faster, slower = sorted_group[0], sorted_group[1]
            diff_ms = (slower[1] - faster[1]).total_seconds() * 1000
            summary.append({
                "月份": month,
                "較快樓層": faster[0],
                "快了多少毫秒": round(diff_ms, 3)
            })
    return pd.DataFrame(summary)
def print_quote_speed_summary(summary_df: pd.DataFrame):
    print("===== QUOTING SPEED SUMMARY =====")
    grouped = summary_df.groupby("較快樓層")["快了多少毫秒"]

    for floor in ["6F", "5F"]:
        if floor in grouped.groups:
            count = len(grouped.get_group(floor))
            avg = grouped.get_group(floor).mean()
            print(f"{floor} faster count: {count}")
            print(f"{floor} average faster: {avg:.6f} ms")
        else:
            print(f"{floor} faster count: 0")
            print(f"{floor} average faster: N/A")

In [None]:

start_date = date(2025, 6, 1)
end_date = date(2025, 6, 20)
overall_grouped = {}

for d in range((end_date - start_date).days + 1):
    day = start_date + timedelta(days=d)
    day_str = day.strftime("%Y-%m-%d")
    quote_events = extract_all_quote_fill_pairs(day_str)
    grouped = group_nearby_quotes_by_month(quote_events)
    for month, group in grouped.items():
        overall_grouped.setdefault(month, []).extend(group)


In [None]:
summary_result = summarize_faster_side(overall_grouped)
print_quote_speed_summary(summary_result)
