In [1]:
from dataclasses import dataclass
import pyshark
import nest_asyncio
from pyshark.packet.packet import Packet
from tqdm import tqdm
import os
from datetime import date 
import pandas as pd 
import numpy as np 
from concurrent.futures import ThreadPoolExecutor
import concurrent

In [2]:
@dataclass
class PacketInfo:
    seq_number: int 
    masked_src: str 
    masked_dst: str 
    cap_index: int
    src_port: int 
    dst_port: int
    mallicious: bool # this requires a more work
    date: date

In [3]:
# Apply the nest_asyncio patch
nest_asyncio.apply()

def convert_to_ip_str(ipnum):
    a = (ipnum & (255 << 0)) >> 0
    b = (ipnum & (255 << 8)) >> 8
    c = (ipnum & (255 << 16)) >>16
    d = (ipnum & (255 << 24)) >> 24
    return f"{d}.{c}.{b}.{a}"

def check_if_mallicious(dstport_number, srcport_number, seq_num):
    dstport_number = int(dstport_number)
    srcport_number = int(srcport_number)
    dst_contains = dstport_number in [23, 2323, 23231, 5555, 7547]
    src_contains = srcport_number in [23, 2323, 23231, 5555, 7547]
    return dst_contains or src_contains

def get_n_packages(file_name, number_of_packages):
    cap = pyshark.FileCapture(file_name)
    return_array = []

    s = file_name.split("/")[-1].replace("1400.pcap", "")
    current_date = date(year=int(s[:4]), month=int(s[4:6]), day=int(s[6:]))

    progress_bar = tqdm(total=number_of_packages, unit='iB', unit_scale=True)

    # Iterate over packets and display their information
    for i, packet in enumerate(cap):
        if i >= number_of_packages: break
        packet: Packet = packet
        if ('TCP' not in packet) or ('IP' not in packet): continue
        progress_bar.update(1)
        # print(f"Seq: {packet.tcp.seq}")
        a = PacketInfo(
            seq_number=int(packet.tcp.seq),
            masked_dst=packet.ip.dst,
            masked_src=packet.ip.src,
            cap_index=i,
            date=current_date,
            dst_port=int(packet.tcp.dstport),
            src_port=int(packet.tcp.srcport),
            mallicious=check_if_mallicious(packet.tcp.dstport, packet.tcp.srcport, packet.tcp.seq)
        )
        return_array.append(a)
    return return_array

def get_n_packages_from_files(filenames, number_of_packages):
    all_lists = []
    with ThreadPoolExecutor(max_workers=len(filenames)) as executor:
        future_to_url = {executor.submit(get_n_packages, filename, number_of_packages): filename for filename in filenames}

        for future in concurrent.futures.as_completed(future_to_url):  
            name = future_to_url[future]
            try:
                all_lists += future.result()
            except Exception:
                print(f"Failed reading the packages from '{name}'")
    return all_lists

            

In [73]:
# ((3 << 24) + (35 << 16) + (87 << 8) + (153 << 0)) ==52647833

True

In [4]:
# get_n_packages_from_files()

filenames = ["./extracted/" + x for x in os.listdir("./extracted/") if not x.startswith(".")]

all_packages = list(get_n_packages_from_files(filenames, 1_000))

"Done"

  0%|          | 0.00/10.0k [00:00<?, ?iB/s]
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A






[A[A[A[A[A[A[A





[A[A[A[A[A[A







[A[A[A[A[A[A[A[A
[A


[A[A[A

  0%|          | 1.00/10.0k [00:00<29:29, 5.65iB/s]



[A[A[A[A




[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A
[A


[A[A[A

[A[A



  0%|          | 10.0/10.0k [00:00<04:01, 41.4iB/s]




[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A
[A


[A[A[A



[A[A[A[A

[A[A




  0%|          | 16.0/10.0k [00:00<03:58, 41.9iB/s]






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A
[A



[A[A[A[A


[A[A[A




[A[A[A[A[A






  0%|          | 21.0/10.0k [00:00<03:57, 42.0iB/s]

[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A



[A[A[A[A
[A




[A[A[A[A[A


[A[A[A






  0%|          | 26.0/10.0k

In [114]:
df = pd.DataFrame(all_packages)

In [121]:
df[df.seq_number > (8 << 24)]
# df

Unnamed: 0,seq_number,masked_src,masked_dst,cap_index,src_port,dst_port,mallicious,date
2517,4294967266,17.130.185.234,163.99.126.82,4202,443,64808,False,2022-05-02
3536,4294967233,61.76.85.29,202.62.224.100,5706,29128,22,False,2022-05-02
8477,4294966913,27.67.74.167,150.80.254.126,3362,58459,443,False,2022-08-15
8825,4294963964,163.67.134.80,52.159.232.22,3907,59340,443,False,2022-08-15
11205,4294967273,203.178.5.175,54.119.182.4,7230,57557,443,False,2022-08-15
11515,4294967017,133.27.77.127,58.32.182.14,7639,22,55530,False,2022-08-15
11978,4294966614,163.67.12.55,185.249.177.123,8369,443,59062,False,2022-08-15
12490,4294967259,190.156.3.206,203.178.9.80,9168,48425,21,False,2022-08-15
13120,4294967266,202.93.223.216,52.82.39.132,294,32224,443,False,2022-07-04
13452,4294967273,163.35.115.77,69.123.187.128,840,37474,443,False,2022-07-04


In [122]:
convert_to_ip_str(4294967273)

'255.255.255.233'

In [7]:
# Apply the nest_asyncio patch
nest_asyncio.apply()

def read_pcap(file_name, listttt: list=samyang):
    cap = pyshark.FileCapture(file_name)
    
    # Iterate over packets and display their information
    for i, packet in tqdm(enumerate(cap)):
        packet: Packet = packet
        if ('TCP' not in packet) or ('IP' not in packet): continue
        # print(f"Seq: {packet.tcp.seq}")
        pio = PacketInfo(
            seq_number=packet.tcp.seq,
            masked_dst=packet.ip.dst,
            masked_src=packet.ip.src,
            cap_index=i
        )
        listttt.append(pio)

# Replace 'your_pcap_file.pcap' with the path to your pcap file
read_pcap('./test.pcap')

4868954it [12:20:46, 109.55it/s] 


KeyboardInterrupt: 

In [8]:
len(samyang)

2308956

In [9]:
import pandas as pd 

In [11]:
r = pd.DataFrame(samyang)

Task exception was never retrieved
future: <Task finished name='Task-17693' coro=<TsharkXmlParser.get_packets_from_stream() done, defined at /opt/homebrew/lib/python3.11/site-packages/pyshark/tshark/output_parser/tshark_xml.py:24> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/Users/bombar/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/rn/8860wjyx3fq1pgh2qcb758n40000gn/T/ipykernel_12160/4081954385.py", line 22, in <module>
    read_pcap('./test.pcap')
  File "/var/folders/rn/8860wjyx3fq1pgh2qcb758n40000gn/T/ipykernel_12160/4081954385.py", line 9, in read_pcap
    for i, packet in tqdm(enumerate(cap)):
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 1182, in __iter__
    for obj in iterable:
  File "/opt/homebrew/lib/python3.11/site-packages/pyshark/capture/capture.py", line 221, in _packets_from_tshark_sync


In [12]:
r

Unnamed: 0,seq_number,masked_src,masked_dst,cap_index
0,1,157.92.66.107,8.150.193.133,1
1,1,17.122.149.3,163.173.35.131,2
2,1449,17.122.149.3,163.173.35.131,3
3,2897,17.122.149.3,163.173.35.131,5
4,4345,17.122.149.3,163.173.35.131,6
...,...,...,...,...
2308951,1,188.108.218.111,202.209.179.52,4868934
2308952,1,163.173.213.168,64.68.64.94,4868938
2308953,0,91.19.181.211,133.180.178.9,4868941
2308954,1434,133.47.141.221,112.90.37.248,4868943


In [13]:
r.to_csv("./zortzort.csv")