In [1]:
from scapy.all import rdpcap, TCP, IP
from collections import defaultdict
import pandas as pd
import ast

In [None]:
# === 配置项 ===
pcap_path = "your_file.pcap" 
csv_output_path = "tcp_flows.csv"
flow_timeout = 4.5
min_payload = 1

flows = defaultdict(list)
packets = rdpcap(pcap_path)

for pkt in packets:
    if not pkt.haslayer(TCP):
        continue
    ip = pkt[IP]
    tcp = pkt[TCP]

    if len(tcp.payload) < min_payload:
        continue

    flags = tcp.flags
    if flags in ['A', 'FA', 'F', 'R']:
        continue

    flow_key = (ip.src, tcp.sport, ip.dst, tcp.dport)

    direction = "out"
    if ip.src.startswith("192.168.") or ip.src.startswith("10.") or ip.src.startswith("172."):
        direction = "out"
    else:
        direction = "in"

    flows[flow_key].append({
        "timestamp": pkt.time,
        "size": len(tcp.payload),
        "direction": direction
    })

# 保存所有 flow 的 time series
flow_records = []
flow_id = 0

for flow_key, pkts in flows.items():
    pkts = sorted(pkts, key=lambda x: x["timestamp"])
    base_time = pkts[0]["timestamp"]
    last_time = base_time

    ts_in, ts_out, ts_comb = [], [], []

    for pkt in pkts:
        delta = pkt["timestamp"] - last_time
        if delta > flow_timeout:
            break
        last_time = pkt["timestamp"]

        size = pkt["size"]
        if pkt["direction"] == "in":
            ts_in.append(size)
            ts_comb.append(-size)
        else:
            ts_out.append(size)
            ts_comb.append(size)

    if len(ts_comb) == 0:
        continue

    src_ip, sport, dst_ip, dport = flow_key
    flow_records.append({
        "flow_id": flow_id,
        "src_ip": src_ip,
        "src_port": sport,
        "dst_ip": dst_ip,
        "dst_port": dport,
        "ts_in": str(ts_in),
        "ts_out": str(ts_out),
        "ts_combined": str(ts_comb)
    })
    flow_id += 1

# 输出为 CSV
df = pd.DataFrame(flow_records)
df.to_csv(csv_output_path, index=False)
print(f"✅ 成功导出 {len(df)} 个流到 CSV: {csv_output_path}")
