In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import subprocess as subp
import os
import re
from io import StringIO
from typing import List, Dict, Any, Tuple
# import pyshark

In [53]:
pth_to_trace = '../data/devel-traces/zoom_firefox_gatherer-01-xm2ks_33048516.pcapng'
hostname = 'zoom.us'

pth_to_trace = '../data/devel-traces/cnet_firefox_gatherer-01-gxjxb_304479.pcapng'
hostname = 'www.cnet.com'

pth_to_trace = '../data/devel-traces/ebay_firefox_gatherer-01-dw2vj_11783345.pcapng'
hostname = 'www.ebay.com'

In [55]:
def get_ips_from_dns(pth_to_trace: str, hostname: str) -> List[str]:
    dns_cmd = f'tshark -r {pth_to_trace} -Y "dns.resp.name == \"{hostname}\"" -T fields ' \
        '-e frame.number ' \
        '-e frame.time_epoch ' \
        '-e dns.a  ' \
        '-e dns.resp.name  ' \
        '-E header=y -E separator=";"'
    out = subp.check_output(dns_cmd, shell=True)
    print(out)
    dns_replies = pd.read_table(StringIO(out.decode('utf8')), sep=';')
    ips = dns_replies.iloc[0].loc['dns.a'].split(',')
    return ips


def get_client_hello(pth_to_trace: str, ip: str) -> pd.DataFrame:
    client_hello_cmd = f'tshark -r {pth_to_trace} ' \
        f'-Y "tls.handshake.type == 1 && ip.dst=={ip}" ' \
        '-T fields ' \
        '-e frame.number ' \
        '-e frame.time_epoch ' \
        '-e ip.src ' \
        '-e tcp.srcport ' \
        '-e ip.dst ' \
        '-e tcp.dstport ' \
        '-E header=y -E separator=";"'
    out = subp.check_output(client_hello_cmd, shell=True)
    client_hello = pd.read_table(StringIO(out.decode('utf8')), sep=';')
    return client_hello


def four_tuple_from_client_hello(client_hello: pd.DataFrame) -> Tuple[str, int, str, int]:
    src_ip = client_hello.loc[0, 'ip.src']
    src_port = int(client_hello.loc[0, 'tcp.srcport'])
    dst_ip = client_hello.loc[0, 'ip.dst']
    dst_port = int(client_hello.loc[0, 'tcp.dstport'])
    return src_ip, src_port, dst_ip, dst_port


def get_main_flow(pth_to_trace: str, client_hello: pd.DataFrame) -> pd.DataFrame:
    src_ip, src_port, _, _ = four_tuple_from_client_hello(client_hello)
    main_flow_cmd = f'tshark -r {pth_to_trace} ' \
        f'-Y "((ip.src == {src_ip}   && '\
        f'tcp.srcport == {src_port}) || ' \
        f'(ip.dst == {src_ip}        && ' \
        f'tcp.dstport == {src_port})) &&' \
        f'tcp.len > 0" -o tcp.desegment_tcp_streams:false -T fields ' \
        '-e frame.number ' \
        '-e frame.time_epoch ' \
        '-e ip.src ' \
        '-e tcp.srcport ' \
        '-e ip.dst ' \
        '-e tcp.dstport ' \
        '-e tcp.len ' \
        '-e tcp.hdr_len ' \
        '-e tls.record.content_type ' \
        '-e tls.record.length ' \
        '-e tls.record.version ' \
        '-e tls.handshake.type ' \
        '-e ip.hdr_len ' \
        '-e ipv6.nxt ' \
        '-e ipv6.dst ' \
        '-e _ws.col.Info ' \
        '-E header=y -E separator=";"'
    out = subp.check_output(main_flow_cmd, shell=True)
    main_flow = pd.read_table(StringIO(out.decode('utf8')), sep=';')
    return main_flow


def find_tls_record_in_payload(pth_to_trace: int, frame_number: int,
                               major: int, minor: int) -> Tuple[int, int, int, int]:
    tls_cmd = f'tshark -r {pth_to_trace} -Y "frame.number == {frame_number}" ' \
        f'-x -w output.bin | grep -o -E "(14|15|16|17) ' \
        f'{major:02d} {minor:02d} [0-9a-f]{{2}} [0-9a-f]{{2}}"'
    out = subp.check_output(tls_cmd, shell=True)
    bytes = out.decode('utf8').split(' ')
    frame_size = int(bytes[-2] + bytes[-1].strip(), 16)
    record_type = int(bytes[0])
    major = int(bytes[1])
    minor = int(bytes[2])
    return record_type, frame_size, major, minor


def extract_record_info(pth_to_trace: str,frame_number: int, major: int, minor: int) -> Tuple[int, int, int, int]:
    tls_cmd = f'tshark -r {pth_to_trace} -Y "frame.number == {frame_number}" ' \
        f'-x -w output.bin | grep -o -E "(14|15|16|17) {major:02d} {minor:02d} [0-9a-f]{{2}} [0-9a-f]{{2}}"'
    out = subp.check_output(tls_cmd, shell=True)
    bytes = out.decode('utf8').split(' ')
    frame_size = int(bytes[-2] + bytes[-1].strip(), 16)
    record_type = int(bytes[0])
    major = int(bytes[1])
    minor = int(bytes[2])
    return frame_size, record_type, major, minor


def extract_main_flow(pth_to_trace: str, hostname: str) -> pd.DataFrame:
    ips = get_ips_from_dns(pth_to_trace, hostname)
    client_hello = get_client_hello(pth_to_trace, ips[0])
    main_flow = get_main_flow(pth_to_trace, client_hello)
    return main_flow

In [23]:
print(f'tshark -r {pth_to_trace} -Y "frame.number == 395" -x -w output.bin')

tshark -r ../data/devel-traces/cnet_firefox_gatherer-01-gxjxb_304479.pcapng -Y "frame.number == 395" -x -w output.bin


In [45]:
out = subp.check_output(f'tshark -r {pth_to_trace} -Y "frame.number == 395" -o tcp.desegment_tcp_streams:FALSE -x -w output.bin', shell=True).decode('utf8')

In [46]:
len('Frame (1514 bytes):'), len("0000  02 42 ac 11 00 07 02 42 8c 27 ba 23 08 00 45 00   .B.....B.'.#..E.")

(19, 72)

In [48]:
skip_to = 0

In [49]:
print(out)

0000  02 42 ac 11 00 07 02 42 8c 27 ba 23 08 00 45 00   .B.....B.'.#..E.
0010  05 dc a5 75 00 00 35 06 24 8f 97 65 72 9a ac 11   ...u..5.$..er...
0020  00 07 01 bb e3 8e 0b 1d 9d c0 88 b8 2c 4f 80 10   ............,O..
0030  00 8d 2e 79 00 00 01 01 08 0a 09 00 6d 11 9d 3b   ...y........m..;
0040  3f 9e bd 08 2b a9 2c 42 89 3c f8 be d5 67 67 92   ?...+.,B.<...gg.
0050  ae 49 d9 e8 3a 12 db 9f 33 40 ba b5 ba cc b5 ae   .I..:...3@......
0060  43 58 44 80 53 c8 c7 0f af e0 82 d7 67 43 17 03   CXD.S.......gC..
0070  03 05 73 66 a6 78 ea ef 6e 0c f2 cd a6 d2 07 a1   ..sf.x..n.......
0080  8d 94 9c 9b a0 81 49 c7 f1 89 fc f8 f8 a7 63 f5   ......I.......c.
0090  1c 51 ab d5 35 16 7d 7d 23 71 82 9c 39 eb 87 04   .Q..5.}}#q..9...
00a0  96 e5 26 6d 79 fc ea 43 d3 09 58 a7 1c 6f cc 14   ..&my..C..X..o..
00b0  bb 6d b8 14 00 4e 1d 38 2b 9f 0c a3 71 48 04 cf   .m...N.8+...qH..
00c0  76 72 05 31 de 98 42 a3 3b a8 8f 26 1b a9 1f ea   vr.1..B.;..&....
00d0  6f 2a bb cc 0b de 36 ba 51 ed c5 94 61 fc 9f 

In [50]:
len('0010  05 dc a5 75 00 00 35 06 24 8f 97 65 72 9a ac 11')

53

In [52]:
[s[6:53].strip() for s in out[skip_to:].split(os.linesep)]

['02 42 ac 11 00 07 02 42 8c 27 ba 23 08 00 45 00',
 '05 dc a5 75 00 00 35 06 24 8f 97 65 72 9a ac 11',
 '00 07 01 bb e3 8e 0b 1d 9d c0 88 b8 2c 4f 80 10',
 '00 8d 2e 79 00 00 01 01 08 0a 09 00 6d 11 9d 3b',
 '3f 9e bd 08 2b a9 2c 42 89 3c f8 be d5 67 67 92',
 'ae 49 d9 e8 3a 12 db 9f 33 40 ba b5 ba cc b5 ae',
 '43 58 44 80 53 c8 c7 0f af e0 82 d7 67 43 17 03',
 '03 05 73 66 a6 78 ea ef 6e 0c f2 cd a6 d2 07 a1',
 '8d 94 9c 9b a0 81 49 c7 f1 89 fc f8 f8 a7 63 f5',
 '1c 51 ab d5 35 16 7d 7d 23 71 82 9c 39 eb 87 04',
 '96 e5 26 6d 79 fc ea 43 d3 09 58 a7 1c 6f cc 14',
 'bb 6d b8 14 00 4e 1d 38 2b 9f 0c a3 71 48 04 cf',
 '76 72 05 31 de 98 42 a3 3b a8 8f 26 1b a9 1f ea',
 '6f 2a bb cc 0b de 36 ba 51 ed c5 94 61 fc 9f 30',
 'c4 1a 89 89 01 13 25 bf 90 e8 9c b5 89 6d b8 69',
 '5a 1f c7 4e dd 36 4c f5 0e 91 b3 80 cc ec 38 0d',
 '73 46 85 f7 bb 99 c7 16 0a 73 0b db de 96 be 06',
 '92 cc 34 7c 8b 30 e7 63 d9 fc 53 42 2a 18 20 99',
 '06 80 ed 60 ba ad ef ad a9 64 40 16 1c e9 0d e3',
 '67 7c ee 1

In [31]:
pattern = re.compile(
        "(?P<numBytes0>[0-9a-f]{4})?"              
        "(?P<anyBytes0>  ([0-9a-f]{2} )*)?"        
        "(?P<recType>14|15|16|17)"               
        "((?P<ascii1>   .{16}\n)(?P<numBytes2>[0-9a-f]{4} ))?"   
        " (?P<major>0[123])"
        "((?P<ascii2>   .{16}\n)(?P<numBytes3>[0-9a-f]{4} ))?"   
        " (?P<minor>0[123])"
        "((?P<ascii3>   .{16}\n)(?P<numBytes4>[0-9a-f]{4} ))?"   
        " (?P<length0>[0-9a-f]{2})"              
        "((?P<ascii4>   .{16}\n)(?P<numBytes5>[0-9a-f]{4} ))?"   
        " (?P<length1>[0-9a-f]{2})"
        "((?P<ascii5>   .{16}\n)(?P<numBytes6>[0-9a-f]{4} ))?"
        " (?P<handshakeType>[0-9a-f]{2})"
    )


In [36]:
s = """0090  00 00 00 00 4e 12 ea 29 41 9f 08 a7 ce 89 b3 00   ....N..)A.......
0080  d7 14 03 03 00 01 01 16 03 03 00 28 00 00 00 00   ...........(....
0090  00 00 00 00 4e 12 ea 29 41 9f 08 a7 ce 89 b3 00   ....N..)A.......
00a0  19 20 40 04 ad 65 5a 4d fa a4 72 90 69 dd a1 29   . @..eZM..r.i..)
00b0  57 8a 63 16                                       W.c."""
m = pattern.search(s)

In [39]:
m.span()

(73, 117)

In [136]:
def extract_record_info_re(pth_to_trace: str, frame_number: int, byte_offset=0) -> Dict[str, str]:
    
    # Match byte numbers.
    # Match any sequence of bytes.
    # Match TLS record type.
    # End of row. Match ASCII output, new line and byte numbers.
    # Match TLS major version.
    # End of row. Match ASCII output, new line and byte numbers.
    # Match TLS minor version.
    # End of row. Match ASCII output, new line and byte numbers.
    # Match first byte of TLS record length.
    # End of row. Match ASCII output, new line and byte numbers.
    # Match second byte of TLS record length.
    # End of row. Match ASCII output, new line and byte numbers.
    # Match handshake type. Only valid if record type is 22, i.e., 0x16
    pattern = re.compile(
        "(?P<numBytes0>[0-9a-f]{4})?"              
        "(?P<anyBytes0>  ([0-9a-f]{2} )*)?"        
        "(?P<recType>14|15|16|17)"               
        "((?P<ascii1>   .{16}\n)(?P<numBytes2>[0-9a-f]{4} ))?"   
        " (?P<major>0[123])"
        "((?P<ascii2>   .{16}\n)(?P<numBytes3>[0-9a-f]{4} ))?"   
        " (?P<minor>0[123])"
        "((?P<ascii3>   .{16}\n)(?P<numBytes4>[0-9a-f]{4} ))?"   
        " (?P<length0>[0-9a-f]{2})"              
        "((?P<ascii4>   .{16}\n)(?P<numBytes5>[0-9a-f]{4} ))?"   
        " (?P<length1>[0-9a-f]{2})"
        "((?P<ascii5>   .{16}\n)(?P<numBytes6>[0-9a-f]{4} ))?"
        " (?P<handshakeType>[0-9a-f]{2})"
    )
    num_rows = int(byte_offset / 16)
    remove_rows = num_rows * 73
    tls_cmd = f'tshark -r {pth_to_trace} -Y "frame.number == {frame_number}" -x -w output.bin'
    out = subp.check_output(tls_cmd, shell=True).decode('utf8')
    match = pattern.search(out[remove_rows:])
    
    if match is not None:
        groupdict = match.groupdict()
        num_bytes = len(groupdict['anyBytes0'].strip().split(' '))
        if num_rows * 16 + num_bytes > byte_offset + 2:
            start, stop = match.span()
            match2 = pattern.search(out[remove_rows + start:remove_rows + stop - 17])
            if match2 is not None:
                 match = match2
    else:
        print("Match is None for ", out[remove_rows:])
    #if match is not None:
    #    start, stop = match.span()
    #    print("BEFORE MATCH", out[remove_rows + start:remove_rows + stop - 17])
    #    match2 = pattern.search(out[remove_rows + start:remove_rows + stop - 17])
    #    if match2 is not None:
    #        match = match2
    return None if match is None else match.groupdict()


def frame_size_from_group_dict(groupdict: Dict[str, str]) -> int:
    return int(groupdict['length0'].strip() + groupdict['length1'].strip(), 16)


def get_frame_size(groupdict: Dict[str, str]) -> int:
    return int(groupdict['length0'].strip() + groupdict['length1'].strip(), 16)


def get_record_type(groupdict: Dict[str, str]) -> int:
    return int(groupdict['recType'], 16)


def get_handshake_type(groupdict: Dict[str, str]) -> int:
    return int(groupdict['handshakeType'], 16)


def get_major_version(groupdict: Dict[str, str]) -> int:
    return int(groupdict['major'], 16)


def get_minor_version(groupdict: Dict[str, str]) -> int:
    return int(groupdict['minor'], 16)


def get_header_start_byte_from_group_dict(groupdict: Dict[str, str], tcp_header_offset: int) -> int:
    tmp = groupdict['numBytes0'].lstrip('0')
    row_offset = 0 if len(tmp) == 0 else int(tmp, 16)
    tmp = groupdict['anyBytes0'].strip()
    col_offset = 0 if len(tmp) == 0 else len(tmp.split(' '))
    return row_offset + col_offset - tcp_header_offset

In [133]:
def add_labels(pth_to_trace: str, main_flow: pd.DataFrame) -> pd.DataFrame:
    frame_size, record, major, minor = find_tls_record_in_payload(pth_to_trace, frame_number, major, minor)
    
    
def packet_starts_with_tls_header(pth_to_trace: str, frame_number: int,
                                  major: int, minor: int) -> bool:
    first = 54
    second = 55
    third = 56
    cmd = f'tshark -r {pth_to_trace} -Y "frame.number == {frame_number} && (frame[{first}] == 14 || '\
          f'frame[{first}] == 15 || '\
          f'frame[{first}] == 16 || '\
          f'frame[{first}] == 17) && '\
          f'frame[{second}] == {major:02d} && frame[{third}] == {minor:02d}"'
    out = subp.check_output(cmd, shell=True)
    if len(out.decode('utf8')) > 0:
        return True
    else:
        return False

In [12]:
groupdict = extract_record_info_re(pth_to_trace, 158, 3, 3)
print(groupdict)
print(frame_size_from_group_dict(groupdict))
print(get_header_start_byte_from_group_dict(groupdict))

{'numBytes0': '04e0', 'anyBytes0': '  d0 3f 1f 65 2a 2e b7 1f b0 c4 4b ', 'recType': '17', 'ascii1': None, 'numBytes2': None, 'major': '03', 'ascii2': None, 'numBytes3': None, 'minor': '03', 'ascii3': None, 'numBytes4': None, 'length0': '10', 'ascii4': None, 'numBytes5': None, 'length1': '18', 'ascii5': '   .?.e*.....K.....\n', 'numBytes6': '04f0 ', 'handshakeType': 'e8'}
4120
1205


In [306]:
packet_starts_with_tls_header(pth_to_trace, 74, 3, 3)

True

In [195]:
groupdict['anyBytes0']

'  d0 3f 1f 65 2a 2e b7 1f b0 c4 4b '

In [56]:
ips = get_ips_from_dns(pth_to_trace, hostname)
ips

b'frame.number;frame.time_epoch;dns.a;dns.resp.name\n'


IndexError: single positional indexer is out-of-bounds

In [13]:
client_hello = get_client_hello(pth_to_trace, ip)
client_hello

NameError: name 'ip' is not defined

In [14]:
main_flow = get_main_flow(pth_to_trace, client_hello)
main_flow

NameError: name 'client_hello' is not defined

In [88]:
frame_number = 154
major = 3
minor = 3

In [120]:
main_flow = extract_main_flow(pth_to_trace, hostname)
main_flow

Unnamed: 0,frame.number,frame.time_epoch,ip.src,tcp.srcport,ip.dst,tcp.dstport,tcp.len,tcp.hdr_len,tls.record.content_type,tls.record.length,tls.record.version,tls.handshake.type,ip.hdr_len,ipv6.nxt,ipv6.dst,_ws.col.Info
0,14,1.618085e+09,172.17.0.2,40704,65.9.28.69,443,517,32,22,512,0x00000301,1.0,20,,,Client Hello
1,17,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,2220,122136,"0x00000303,0x00000303,0x00000303",2.0,20,,,"Server Hello, Change Cipher Spec, Application ..."
2,19,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
3,21,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
4,23,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,662,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
305,663,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
306,665,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data
307,666,1.618085e+09,65.9.28.69,443,172.17.0.2,40704,1428,32,,,,,20,,,Continuation Data


In [9]:
extract_main_flow('../data/devel-traces/instagram_chromium_gatherer-01-4tss9_38712719.pcapng', 'help.instagram.com')

tshark -r ../data/devel-traces/instagram_chromium_gatherer-01-4tss9_38712719.pcapng -Y "dns.resp.name == "help.instagram.com"" -T fields -e frame.number -e frame.time_epoch -e dns.a  -e dns.resp.name  -E header=y -E separator=";"
-----------------


Unnamed: 0,frame.number,frame.time_epoch,ip.src,tcp.srcport,ip.dst,tcp.dstport,tcp.len,tls.record.content_type,tls.record.length,tls.record.version,_ws.col.Info
0,12,1617986000.0,172.17.0.9,51102,157.240.20.63,443,517,22.0,512.0,0x00000301,Client Hello
1,14,1617986000.0,157.240.20.63,443,172.17.0.9,51102,1380,2220.0,12211017.0,"0x00000303,0x00000303,0x00000303","Server Hello, Change Cipher Spec, Application ..."
2,16,1617986000.0,157.240.20.63,443,172.17.0.9,51102,904,,1124.0,0x00000303,Application Data
3,21,1617986000.0,172.17.0.9,51102,157.240.20.63,443,64,20.0,153.0,"0x00000303,0x00000303","Change Cipher Spec, Application Data"
4,22,1617986000.0,172.17.0.9,51102,157.240.20.63,443,92,,87.0,0x00000303,Application Data
5,23,1617986000.0,172.17.0.9,51102,157.240.20.63,443,427,,422.0,0x00000303,Application Data
6,26,1617986000.0,157.240.20.63,443,172.17.0.9,51102,172,,167.0,0x00000303,Application Data
7,28,1617986000.0,157.240.20.63,443,172.17.0.9,51102,80,,75.0,0x00000303,Application Data
8,31,1617986000.0,157.240.20.63,443,172.17.0.9,51102,31,,26.0,0x00000303,Application Data
9,33,1617986000.0,157.240.20.63,443,172.17.0.9,51102,35,,30.0,0x00000303,Application Data


In [157]:
TLS_MAP = {
    20: 'CHANGE_CIPHER_SPEC',
    21: 'ALERT',
    22: {
        1: 'CLIENT_HELLO',
        2: 'SERVER_HELLO',
        11: 'CERTIFICATE',
        12: 'SERVER_KEY_EXCHANGE',
        13: 'CERTIFICATE_REQUEST',
        14: 'SERVER_DONE',
        15: 'CERTIFICATE_VERIFY',
        16: 'CLIENT_KEY_EXCHANGE',
        20: 'FINISHED'
    },
    23: 'APPLICATION_DATA'
}


class MainFlow(object):
    """
    Represents a main flow of a trace.
    """
    def __init__(self, src_ip: str, src_port: int, dst_ip: str, dst_port: int,
                 tcp_header_length: int, ip_header_length: int):
        self.src_ip = src_ip
        self.src_port = src_port
        self.dst_ip = dst_ip
        self.dst_port = dst_port
        self.tls_records = []
        self.frames = []
        self.tcp_header_length = tcp_header_length
        self.ip_header_length = ip_header_length

    def __str__(self) -> str:
        s = f'Flow: {self.src_ip}\t{self.src_port}\t{self.dst_ip}\t{self.dst_port}'
        s += '\n\t' + '\n\t'.join([str(f) for f in self.frames]) if len(self.frames) > 0 else ''
        return s


class TlsRecord(object):
    """
    Represents a TLS record.
    """
    header_size = 5

    @classmethod
    def from_record(cls, record: 'TlsRecord') -> 'TlsRecord':
        return cls(
            length=record.length,
            content_type=record.content_type,
            minor_version=record.minor_version,
            major_version=record.major_version,
            record_number=record.record_number,
            header_start_byte=record.header_start_byte,
            handshake_type=record.handshake_type,
            direction=record.direction
        )

    def __init__(self, length: int, content_type: int, minor_version: int,
                 major_version: int, record_number: int, direction: int,
                 header_start_byte: int, handshake_type: int=None):
        self.length = length
        self.content_type = content_type
        self.minor_version = minor_version
        self.major_version = major_version
        self.record_number = record_number
        self.handshake_type = handshake_type
        self.header_start_byte = header_start_byte
        self.direction = direction

    def __str__(self) -> str:
        d = 'outbound' if self.direction < 0 else 'inbound'
        msg_type = TLS_MAP[self.content_type]
        if self.content_type == 22:
            try:
                msg_type = TLS_MAP[self.content_type][self.handshake_type]
            except Exception as e:
                msg_type = f"ENCRYPTED_HANDSHAKE_MESSAGE {self.handshake_type}"
        s = f'Record: {self.record_number}\t{msg_type:30s}\t' + \
            f'{self.major_version}\t{self.minor_version}\t{d:8s}\t{self.header_start_byte}->{self.length}'
        return s


class Frame(object):
    """
    Represents a normal L2 frame.
    """
    def __init__(self, tcp_length: int, frame_number: int, time_epoch: float, direction: int,
                 tcp_header_offset: int):
        self.tcp_length = tcp_length
        self.frame_number = frame_number
        self.time_epoch = time_epoch
        self.tls_records = []
        self.direction = direction
        self.tcp_header_offset = tcp_header_offset

    def __str__(self) -> str:
        d = 'outbound' if self.direction < 0 else 'inbound'
        s = f"Frame: {self.frame_number}\t{self.time_epoch}\t{d:8s}\t{self.tcp_length}"
        s += '\n\t\t' + '\n\t\t'.join([str(r) for r in self.tls_records]) if len(self.tls_records) > 0 else ''
        return s


def find_records(pth_to_trace: str, frame: Frame, record_number: int,
                   offset: int, records: List[TlsRecord]) -> int:
    groupdict = extract_record_info_re(pth_to_trace, frame.frame_number, byte_offset=offset)
    # print("\t\t", str(groupdict))
    if groupdict is None:
        raise ValueError(f"GroupDict is none for frame {str(frame)}")
    else:
        record = TlsRecord(
            length=frame_size_from_group_dict(groupdict),
            content_type=get_record_type(groupdict),
            major_version=get_major_version(groupdict),
            minor_version=get_minor_version(groupdict),
            direction=frame.direction,
            record_number=record_number,
            header_start_byte=get_header_start_byte_from_group_dict(groupdict, frame.tcp_header_offset),
        )
        if record.content_type == 22:
            record.handshake_type = get_handshake_type(groupdict)
        records.append(record)

        remaining_payload = frame.tcp_length - record.header_start_byte - record.header_size
        if remaining_payload <= record.length:
            return record.length - remaining_payload
        else:
            return find_records(
                pth_to_trace=pth_to_trace,
                frame=frame,
                record_number=record_number + 1,
                offset = record.header_start_byte + record.header_size + record.length + frame.tcp_header_offset,
                records=records
            )


def new_row(main_flow: pd.DataFrame, i: int) -> Dict[str, Any]:
    frame_number = int(main_flow.loc[i, 'frame.number'])
    row = {}
    row['frame_number'] = frame_number
    row['frame.time_epoch'] = main_flow.at[i, 'frame.time_epoch']
    row['ip.src'] = main_flow.at[i, 'ip.src']
    row['tcp.srcport'] = main_flow.at[i, 'tcp.srcport']
    row['ip.dst'] = main_flow.at[i, 'ip.dst']
    row['tcp.dstport'] = main_flow.at[i, 'tcp.dstport']
    row['tcp.len'] = main_flow.at[i, 'tcp.len']
    row['tls.frame_number'] = 0 if len(rows) == 0 else rows[-1]['tls.frame_number'] + 1
    row['tls.handshake.type'] = None
    row['tls.record.version.major'] = None
    row['tls.record.version.minor'] = None
    return row


def packet_part_of_previous_frame(row: Dict[str, any]) -> Dict[str, Any]:
    print("\tPacket belongs to previous TLS frame.")
    row['tls.record.version.major'] = rows[-1]['tls.record.version.major']
    row['tls.record.version.minor'] = rows[-1]['tls.record.version.minor']
    row['tls.frame_number'] = rows[-1]['tls.frame_number']
    row['tls.record.content_type'] = rows[-1]['tls.record.content_type']
    row['tls.record.length'] = None
    return row


def packet_contains_new_frame(row: Dict[str, any], frame_number: int, offset: int):
    groupdict = extract_record_info_re(
        pth_to_trace,
        frame_number,
        major,
        minor,
        offset
    )
    row['tls.frame_number'] = row['tls.frame_number'] + 1
    row['tls.record.content_type'] = int(groupdict['recType'])
    row['tls.record.length'] = frame_size_from_group_dict(groupdict)
    row['tls.handshake.typ'] = None
    frame_starts_at = get_header_start_byte_from_group_dict(groupdict)
    return row, frame_starts_at

In [168]:
# 72: 16 03 01 01 fc 01 --> Client Hello, 508
# 74: 16 03 03 00 6a 02 --> Server Hello, 106
#     16 03 03 0f 03 0b --> Certificate, 3843
# 75: Copy
# 76: Copy
#     16 03 03 01 df 16 --> Unknown Handshake, 479
# 80: Copy
#     16 03 03 01 4d 0c --> Server Key Exchange, 333
#     16 03 03 00 04 0e --> Server Done
def label_main_flow(pth_to_trace: str, main_flow: pd.DataFrame) -> MainFlow:
    flow = MainFlow(
        src_ip=main_flow.at[0, 'ip.src'],
        src_port=int(main_flow.at[0, 'tcp.srcport']),
        dst_ip=main_flow.at[0, 'ip.dst'],
        dst_port=int(main_flow.at[0, 'tcp.dstport']),
        tcp_header_length=int(main_flow.at[0, 'tcp.hdr_len']),
        ip_header_length=int(main_flow.at[0, 'ip.hdr_len'])
    )
    remaining_bytes = {flow.src_ip: 0, flow.dst_ip: 0}
    for row_idx in range(main_flow.shape[0]):
        frame_src_ip = main_flow.at[row_idx, 'ip.src']
        records = []
        frame = Frame(
            tcp_length=main_flow.at[row_idx, 'tcp.len'],
            frame_number=main_flow.at[row_idx, 'frame.number'],
            time_epoch=main_flow.at[row_idx, 'frame.time_epoch'],
            direction=-1 if main_flow.at[row_idx, 'ip.src'] == flow.src_ip else 1,
            tcp_header_offset=14 + flow.tcp_header_length + flow.ip_header_length
        )
        # print("New frame: ", str(frame))
        # print("\tremaining_bytes", remaining_bytes)
        if remaining_bytes[frame_src_ip] == 0:
            records = []
            remaining_bytes[frame_src_ip] = find_records(
                pth_to_trace=pth_to_trace,
                frame=frame,
                record_number=len(flow.tls_records),
                offset=remaining_bytes[frame_src_ip] + frame.tcp_header_offset,
                records=records
            )
            frame.tls_records.extend(records)
        else:
            record = TlsRecord.from_record(flow.tls_records[-1])
            record.direction = frame.direction
            # print("\tCopy Record: ", str(record))
            frame.tls_records.append(record)
            if remaining_bytes[frame_src_ip] < frame.tcp_length:
                records = []
                remaining_bytes[frame_src_ip] = find_records(
                    pth_to_trace=pth_to_trace,
                    frame=frame,
                    record_number=len(flow.tls_records),
                    offset=remaining_bytes[frame_src_ip] + frame.tcp_header_offset,
                    records=records
                )
                frame.tls_records.extend(records)
            else:
                remaining_bytes[frame_src_ip] -= frame.tcp_length
        print(frame.frame_number, frame_src_ip, remaining_bytes[frame_src_ip])
        flow.frames.append(frame)
        flow.tls_records.extend(records)
    return flow
    

# pth_to_trace = '../data/devel-traces/zoom_firefox_gatherer-01-xm2ks_33048516.pcapng'
# hostname = 'zoom.us'

pth_to_trace = '../data/devel-traces/ebay-kleinanzeigen_firefox_gatherer-01-2ptv6_63946056.pcapng'
hostname = 'www.ebay-kleinanzeigen.de'

pth_to_trace = '../data/devel-traces/instagram_chromium_gatherer-01-4tss9_38712719.pcapng'
hostname = 'help.instagram.com'

pth_to_trace = '../data/devel-traces/instagram_chromium_gatherer-01-4tss9_91246581.pcapng'
hostname = 'help.instagram.com'

pth_to_trace = '../data/devel-traces/smallpdf_chromium_gatherer-01-kqkdk_46263679.pcapng'
hostname = 'smallpdf.com'

main_flow = extract_main_flow(pth_to_trace, hostname)
flow = label_main_flow(pth_to_trace, main_flow)
print('\n===================================================')
print(flow)

14 172.17.0.2 0
17 65.9.28.69 4091
19 65.9.28.69 2663
21 65.9.28.69 1235
23 65.9.28.69 93
25 65.9.28.69 0
30 172.17.0.2 0
31 172.17.0.2 0
32 172.17.0.2 0
34 65.9.28.69 0
36 172.17.0.2 0
38 65.9.28.69 0
41 65.9.28.69 14978
43 65.9.28.69 13550
45 65.9.28.69 12122
47 65.9.28.69 10694
49 65.9.28.69 9266
51 65.9.28.69 7838
52 65.9.28.69 6410
54 65.9.28.69 4982
55 65.9.28.69 3554
57 65.9.28.69 2126
58 65.9.28.69 698
60 65.9.28.69 15676
61 65.9.28.69 14248
63 65.9.28.69 12820
64 65.9.28.69 11392
66 65.9.28.69 9964
67 65.9.28.69 8536
69 65.9.28.69 7108
70 65.9.28.69 5680
72 65.9.28.69 4252
73 65.9.28.69 2824
75 65.9.28.69 1396
76 65.9.28.69 16374
78 65.9.28.69 14946
79 65.9.28.69 13518
81 65.9.28.69 12090
82 65.9.28.69 10662
84 65.9.28.69 9234
85 172.17.0.2 0
86 172.17.0.2 0
87 65.9.28.69 7806
88 65.9.28.69 6378
91 65.9.28.69 4950
93 65.9.28.69 3522
95 172.17.0.2 0
96 172.17.0.2 0
97 172.17.0.2 0
98 172.17.0.2 0
99 172.17.0.2 0
100 172.17.0.2 0
101 65.9.28.69 2094
102 65.9.28.69 666
104 65.9.2

In [169]:
print("\n".join([str(r) for r in flow.tls_records]))

Record: 0	CLIENT_HELLO                  	3	1	outbound	0->512
Record: 1	SERVER_HELLO                  	3	3	inbound 	0->122
Record: 2	CHANGE_CIPHER_SPEC            	3	3	inbound 	127->1
Record: 3	APPLICATION_DATA              	3	3	inbound 	133->36
Record: 4	APPLICATION_DATA              	3	3	inbound 	174->5340
Record: 5	APPLICATION_DATA              	3	3	inbound 	1235->281
Record: 6	APPLICATION_DATA              	3	3	inbound 	93->53
Record: 7	CHANGE_CIPHER_SPEC            	3	3	outbound	0->1
Record: 8	APPLICATION_DATA              	3	3	outbound	6->53
Record: 9	APPLICATION_DATA              	3	3	outbound	0->87
Record: 10	APPLICATION_DATA              	3	3	outbound	0->358
Record: 11	APPLICATION_DATA              	3	3	inbound 	0->57
Record: 12	APPLICATION_DATA              	3	3	outbound	0->26
Record: 13	APPLICATION_DATA              	3	3	inbound 	0->26
Record: 14	APPLICATION_DATA              	3	3	inbound 	0->16401
Record: 15	APPLICATION_DATA              	3	3	inbound 	698->16401
Record: 16	A

In [None]:
remaining_record_size = 0
is_unlabeled = lambda x: pd.isna(main_flow.loc[x, 'tls.record.content_type'])
rows = []
remaining_bytes = 0

for i in range(main_flow.shape[0]):
    frame_number = int(main_flow.loc[i, 'frame.number'])
    frame_length = int(main_flow.at[i, 'tcp.len'])
    print(f"Process frame: {frame_number}")
    row = new_row(main_flow, i)
    if is_unlabeled(i):
        row['tls.record.version.major'] = rows[-1]['tls.record.version.major']
        row['tls.record.version.minor'] = rows[-1]['tls.record.version.minor']
        if remaining_bytes > 0:
            row = packet_part_of_previous_frame(row)
            rows.append(row)
            remaining_bytes -= row['tcp.len']
            
            if remaining_bytes < 0:
                frame_starts_at = 0
                tls_length = 0
                while frame_starts_at + tls_length + 5 < frame_length:
                    tmp, frame_starts_at = packet_contains_new_frame(rows[-1].copy(), frame_number, frame_starts_at + tls_length)
                    consumed = tmp['tcp.len'] - frame_starts_at
                    tls_length = tmp['tls.record.length']
                    remaining_bytes = tls_length - consumed
                    print(f"\t{frame_starts_at} + {tls_length} + 5 = {frame_starts_at+tls_length+5} < {frame_length}")
                    print(f"\tContains new frame starting at {frame_starts_at}: {tmp['tls.record.content_type']} {tmp['tls.record.length']}")
                    rows.append(tmp)
                
        elif packet_starts_with_tls_header(pth_to_trace, frame_number, major, minor):
            print(f'\tFrame {frame_number} starts with TLS Header')
            tmp, frame_starts_at = packet_contains_new_frame(row, frame_number, 0)
            tls_length = tmp['tls.record.length']
            rows.append(tmp)
            print(f"\tContains new frame starting at {frame_starts_at}: {tmp['tls.record.content_type']} {tmp['tls.record.length']}")
            while frame_starts_at + tls_length + 5 < frame_length:
                tmp, frame_starts_at = packet_contains_new_frame(rows[-1].copy(), frame_number, frame_starts_at + tls_length)
                consumed = tmp['tcp.len'] - frame_starts_at
                tls_length = tmp['tls.record.length']
                remaining_bytes = tls_length - consumed
                print(f"\t{frame_starts_at} + {tls_length} + 5 = {frame_starts_at+tls_length+5} < {frame_length}")
                print(f"\tContains new frame starting at {frame_starts_at}: {tmp['tls.record.content_type']} {tmp['tls.record.length']}")
                rows.append(tmp)
            remaining_bytes = rows[-1]['tls.record.length'] - (frame_length - (frame_starts_at + 5))
            print(f"\t{remaining_bytes} bytes remain")
        else:
            print(f"\tUnexpected contingency, remaining_bytes = {remaining_bytes}")
        #     groupdict = extract_record_info_re(pth_to_trace, main_flow.loc[i, 'frame.number'], major, minor)
        #     print("\t", groupdict)
        
    else:
        major = int(main_flow.at[i, 'tls.record.version'][-4:-2])
        minor = int(main_flow.at[i, 'tls.record.version'][-2:])
        row['tls.record.version.major'] = major
        row['tls.record.version.minor'] = minor
        row['_ws.col.Info'] = main_flow.at[i, '_ws.col.Info']
        record_types = main_flow.at[i, 'tls.record.content_type'].split(',')
        record_lengths = main_flow.at[i, 'tls.record.length'].split(',')
        tls_handshake_types = None
        assert len(record_types) == len(record_lengths)
        frame_content = 0
        for j, (t, l) in enumerate(zip(record_types, record_lengths)):
            tmp = row.copy()
            tmp['tls.frame_number'] = row['tls.frame_number'] + j
            tmp['tls.record.content_type'] = int(t)
            tmp['tls.record.length'] = int(l)
            frame_content += tmp['tls.record.length'] + 5
            if tmp['tls.record.content_type'] != 23 and not pd.isna(main_flow.at[i, 'tls.handshake.type']):
                if tls_handshake_types is None:
                    # tls_handshake_types = str(int(main_flow.at[i, 'tls.handshake.type'])).split(',')
                    tls_handshake_types = [int(main_flow.at[i, 'tls.handshake.type'])]
                tmp['tls.handshake.type'] = int(tls_handshake_types.pop()) if len(tls_handshake_types) > 0 else None
            rows.append(tmp)
        print(f"\tFrame {frame_number} has v {major}.{minor}, type {main_flow.at[i, 'tls.record.content_type']} and lengths {main_flow.loc[i, 'tls.record.length']}")
        
        # print(rows[-1], frame_content)
        if rows[-1]['tcp.len'] > frame_content:
            groupdict = extract_record_info_re(pth_to_trace, main_flow.at[i, 'frame.number'], major, minor, rows[-1]['tls.record.length'])
            tmp = rows[-1].copy()
            tmp['tls.frame_number'] = tmp['tls.frame_number'] + 1
            tmp['tls.record.content_type'] = int(groupdict['recType'])
            tmp['tls.record.length'] = frame_size_from_group_dict(groupdict)
            tmp['tls.handshake.typ'] = None
            rows.append(tmp)
            consumed = tmp['tcp.len'] - get_header_start_byte_from_group_dict(groupdict)
            remaining_bytes = tmp['tls.record.length'] - consumed
            print("\t", f"Opened new frame, size {tmp['tls.record.length']}, consumed {consumed}, remaining {remaining_bytes}")

In [343]:
pd.DataFrame.from_dict(rows)

Unnamed: 0,frame_number,frame.time_epoch,ip.src,tcp.srcport,ip.dst,tcp.dstport,tcp.len,tls.frame_number,tls.handshake.type,tls.record.version.major,tls.record.version.minor,_ws.col.Info,tls.record.content_type,tls.record.length,tls.handshake.typ
0,72,1618235000.0,172.17.0.2,48762,18.205.93.255,443,513,0,1.0,3,1,Client Hello,22,508.0,
1,74,1618235000.0,18.205.93.255,443,172.17.0.2,48762,1460,1,2.0,3,3,Server Hello,22,106.0,
2,74,1618235000.0,18.205.93.255,443,172.17.0.2,48762,1460,2,2.0,3,3,Server Hello,16,3843.0,
3,76,1618235000.0,18.205.93.255,443,172.17.0.2,48762,1460,2,,3,3,,16,,
4,78,1618235000.0,18.205.93.255,443,172.17.0.2,48762,1460,2,,3,3,,16,,
5,78,1618235000.0,18.205.93.255,443,172.17.0.2,48762,1460,3,,3,3,,16,479.0,
6,80,1618235000.0,18.205.93.255,443,172.17.0.2,48762,410,3,,3,3,,16,,
7,80,1618235000.0,18.205.93.255,443,172.17.0.2,48762,410,4,,3,3,,16,333.0,
8,80,1618235000.0,18.205.93.255,443,172.17.0.2,48762,410,5,,3,3,,16,4.0,
9,91,1618235000.0,172.17.0.2,48762,18.205.93.255,443,126,6,16.0,3,3,"Client Key Exchange, Change Cipher Spec, Encry...",22,70.0,


In [201]:
int(main_flow.loc[0, 'tls.record.version'][-4:-2])

3

In [37]:
def tmp(main_flow):
    for i, row in main_flow.iterrows():
        print(i, row['tls.record.length'])
        if i == 2:
            main_flow.at[i, 'tls.record.lenght'] = 10000
            print(main_flow.at[i, 'tls.record.lenght'] )
            
    print(main_flow.at[2, 'tls.record.lenght'] )
    return main_flow

In [38]:
mf = tmp(main_flow)

0 508
1 106
2 nan
10000.0
3 3843
4 479,333,4
5 70,1,40
6 172
7 248
8 1,40
9 64
10 33
11 33
12 nan
13 nan
14 4120
15 nan
16 nan
17 4120
18 nan
19 nan
20 4120
21 nan
22 nan
23 4120
24 nan
25 3888,33
26 352
27 nan
28 nan
29 4120
30 nan
31 nan
32 3188,33
33 172
34 33
35 37
36 nan
37 2597
10000.0


In [110]:
s = """
0000  02 42 55 b3 54 37 02 42 ac 11 00 02 08 00 45 00   .BU.T7.B......E.
0010  00 d9 da 1f 40 00 40 06 43 20 ac 11 00 02 12 cd   ....@.@.C ......
0020  5d ff be 7a 01 bb ae 3d 18 ce d8 55 d2 c0 50 18   ]..z...=...U..P.
0030  01 f5 a8 98 00 00 17 03 03 00 ac 00 00 00 00 00   ................
0040  00 00 01 8a 01 b9 f4 be 8d ef 11 07 60 d7 2a 0a   ............`.*.
0050  c2 af ee 0d af 24 b7 30 af f5 86 38 ac 77 4c 1d   .....$.0...8.wL.
0060  dc cd ba b6 bc 71 dc 59 a9 66 7a 3c a3 87 63 4c   .....q.Y.fz<..cL
0070  20 2f f3 f3 65 9f b7 85 05 63 4e 7f 20 56 15 03    /..e....cN. V..
0080  03 ff 2f 74 c0 7b ed 3a bf 0c 7a a0 b8 21 03 16   0(.t.{.:..z..!..
0090  03 03 7b e6 7e 6f 63 8b e5 ee d1 ba 4a 14 03 03   ..{.~oc.....J...
00a0  1f 3a 74 65 ed c7 b9 ad 67 50 7f 58 17 03 03 99   .:te....gP.X....
00b0  94 5c b5 aa 40 a6 03 c4 d2 42 81 1a 17 03 03 dd   .\..@....B..|.V.
00c0  b7 b3 b8 da 4b ce 25 09 c5 28 96 94 5a 26 bc 15   ....K.%..(..Z&..
00d0  14 03 03 0f 56 e2 1e 53 5c 96 d5 92 fc d8 d8 d3   Ud..V..S\.......
00e0  9d ed 40 06 6b 44 17
"""

In [170]:
pattern = re.compile(
    "(?P<numBytes0>[0-9a-f]{4})?"              # Match byte numbers.
    "(?P<anyBytes0>  ([0-9a-f]{2} )*)?"        # Match any sequence of bytes.
    "(?P<recType>14|15|16|17)"               # Match TLS record type.
    "((?P<ascii1>   .{16}\n)(?P<numBytes2>[0-9a-f]{4} ))?"   # End of row. Match ASCII output, new line and byte numbers.
    " (?P<major>03)"                       # Match TLS major version
    "((?P<ascii2>   .{16}\n)(?P<numBytes3>[0-9a-f]{4} ))?"   # End of row. Match ASCII output, new line and byte numbers.
    " (?P<minor>03)"                       # Match TLS minor version.
    "((?P<ascii3>   .{16}\n)(?P<numBytes4>[0-9a-f]{4} ))?"   # End of row. Match ASCII output, new line and byte numbers.
    " (?P<length0>[0-9a-f]{2})"              # Match first byte of TLS record length.
    "((?P<ascii4>   .{16}\n)(?P<numBytes5>[0-9a-f]{4} ))?"   # End of row. Match ASCII output, new line and byte numbers.
    " (?P<length1>[0-9a-f]{2})"              # Match second byte of TLS record length.
)

In [171]:
print(s)


0000  02 42 55 b3 54 37 02 42 ac 11 00 02 08 00 45 00   .BU.T7.B......E.
0010  00 d9 da 1f 40 00 40 06 43 20 ac 11 00 02 12 cd   ....@.@.C ......
0020  5d ff be 7a 01 bb ae 3d 18 ce d8 55 d2 c0 50 18   ]..z...=...U..P.
0030  01 f5 a8 98 00 00 17 03 03 00 ac 00 00 00 00 00   ................
0040  00 00 01 8a 01 b9 f4 be 8d ef 11 07 60 d7 2a 0a   ............`.*.
0050  c2 af ee 0d af 24 b7 30 af f5 86 38 ac 77 4c 1d   .....$.0...8.wL.
0060  dc cd ba b6 bc 71 dc 59 a9 66 7a 3c a3 87 63 4c   .....q.Y.fz<..cL
0070  20 2f f3 f3 65 9f b7 85 05 63 4e 7f 20 56 15 03    /..e....cN. V..
0080  03 ff 2f 74 c0 7b ed 3a bf 0c 7a a0 b8 21 03 16   0(.t.{.:..z..!..
0090  03 03 7b e6 7e 6f 63 8b e5 ee d1 ba 4a 14 03 03   ..{.~oc.....J...
00a0  1f 3a 74 65 ed c7 b9 ad 67 50 7f 58 17 03 03 99   .:te....gP.X....
00b0  94 5c b5 aa 40 a6 03 c4 d2 42 81 1a 17 03 03 dd   .\..@....B..|.V.
00c0  b7 b3 b8 da 4b ce 25 09 c5 28 96 94 5a 26 bc 15   ....K.%..(..Z&..
00d0  14 03 03 0f 56 e2 1e 53 5c 96 d5 92 fc d8 d8

In [134]:
pattern.search(s)

<re.Match object; span=(220, 258), match='0030  01 f5 a8 98 00 00 17 03 03 00 ac'>

In [135]:
pattern.search(s[258:])

<re.Match object; span=(254, 341), match='0070  20 2f f3 f3 65 9f b7 85 05 63 4e 7f 20 56 1>

In [136]:
pattern.search(s[599:])

<re.Match object; span=(37, 76), match='16   0(.t.{.:..z..!..\n0090  03 03 7b e6'>

In [137]:
pattern.search(s[676:])

<re.Match object; span=(27, 66), match='14 03 03   ..{.~oc.....J...\n00a0  1f 3a'>

In [138]:
pattern.search(s[742:])

<re.Match object; span=(31, 70), match='17 03 03 99   .:te....gP.X....\n00b0  94'>

In [139]:
pattern.search(s[812:])

<re.Match object; span=(34, 73), match='17 03 03 dd   .\\..@....B..|.V.\n00c0  b7'>

In [140]:
pattern.search(s[885:])

<re.Match object; span=(65, 85), match='00d0  14 03 03 0f 56'>

In [172]:
m = pattern.search(s)

In [173]:
m.groups()

('0030',
 '  01 f5 a8 98 00 00 ',
 '00 ',
 '17',
 None,
 None,
 None,
 '03',
 None,
 None,
 None,
 '03',
 None,
 None,
 None,
 '00',
 None,
 None,
 None,
 'ac')

In [174]:
m2 = pattern.search(s[812:])

In [178]:
m2.groupdict()

{'numBytes0': None,
 'anyBytes0': None,
 'recType': '17',
 'ascii1': None,
 'numBytes2': None,
 'major': '03',
 'ascii2': None,
 'numBytes3': None,
 'minor': '03',
 'ascii3': None,
 'numBytes4': None,
 'length0': 'dd',
 'ascii4': '   .\\..@....B..|.V.\n',
 'numBytes5': '00c0 ',
 'length1': 'b7'}

In [177]:
m.groupdict()

{'numBytes0': '0030',
 'anyBytes0': '  01 f5 a8 98 00 00 ',
 'recType': '17',
 'ascii1': None,
 'numBytes2': None,
 'major': '03',
 'ascii2': None,
 'numBytes3': None,
 'minor': '03',
 'ascii3': None,
 'numBytes4': None,
 'length0': '00',
 'ascii4': None,
 'numBytes5': None,
 'length1': 'ac'}

In [188]:
frame_size_from_group_dict(m.groupdict())

172

In [None]:
([0-9a-f]{4})?(  ([0-9a-f]{2} )*)?(14|15|16|17)(   .{16}\n[0-9a-f]{4} )? (03)(   .{16}\n[0-9a-f]{4} )? (03)(   .{16}\n[0-9a-f]{4} )? ([0-9a-f]{2})(   .{16}\n[0-9a-f]{4} )? ([0-9a-f]{2})

In [None]:
0000  02 42 55 b3 54 37 02 42 ac 11 00 02 08 00 45 00   .BU.T7.B......E.
0010  00 d9 da 1f 40 00 40 06 43 20 ac 11 00 02 12 cd   ....@.@.C ......
0020  5d ff be 7a 01 bb ae 3d 18 ce d8 55 d2 c0 50 18   ]..z...=...U..P.
0030  01 f5 a8 98 00 00 17 03 03 00 ac 00 00 00 00 00   ................
0040  00 00 01 8a 01 b9 f4 be 8d ef 11 07 60 d7 2a 0a   ............`.*.
0050  c2 af ee 0d af 24 b7 30 af f5 86 38 ac 77 4c 1d   .....$.0...8.wL.
0060  dc cd ba b6 bc 71 dc 59 a9 66 7a 3c a3 87 63 4c   .....q.Y.fz<..cL
0070  20 2f f3 f3 65 9f b7 85 05 63 4e 7f 20 56 15 03    /..e....cN. V..
0080  03 ff 2f 74 c0 7b ed 3a bf 0c 7a a0 b8 21 03 16   0(.t.{.:..z..!..
0090  03 03 7b e6 7e 6f 63 8b e5 ee d1 ba 4a 14 03 03   ..{.~oc.....J...
00a0  1f 3a 74 65 ed c7 b9 ad 67 50 7f 58 17 03 03 99   .:te....gP.X....
00b0  94 5c b5 aa 40 a6 03 c4 d2 42 81 1a 17 03 03 dd   .\..@....B..|.V.
00c0  b7 b3 b8 da 4b ce 25 09 c5 28 96 94 5a 26 bc 15   ....K.%..(..Z&..
00d0  14 03 03 0f 56 15 03 03 5c 96 d5 92 fc d8 d8 d3   Ud..V..S\.......
00e0  9d ed 40 06 6b 44 17    

In [None]:
zoom filter: ((ip.src == 172.17.0.2 && tcp.srcport == 48762) || (ip.dst == 172.17.0.2 && tcp.dstport == 48762)) && tcp.len > 0