In [1]:
import os
import time
from glob import glob
import csv
import concurrent.futures
import pandas as pd
from scapy.layers.inet import IP, TCP, UDP, ICMP
from scapy.all import *

In [2]:
protocols = pd.read_csv('./protocol-numbers.csv')
protocols = protocols[['Decimal', 'Keyword']]
protocols.head(8)

Unnamed: 0,Decimal,Keyword
0,0,HOPOPT
1,1,ICMP
2,2,IGMP
3,3,GGP
4,4,IPv4
5,5,ST
6,6,TCP
7,7,CBT


In [3]:
input_pcap_files = glob('./Dataset/*.pcap')
output_csv_files = './csv_dataset'

if not os.path.exists(output_csv_files):
    os.makedirs(output_csv_files)

In [4]:
class PacketAnalyzer:
    def __init__(self, pcap_files):
        self.pcap_files = pcap_files

    def analyze_packet(self, packet):
        protocol = "Unknown"
        src_ip = "N/A"
        dst_ip = "N/A"
        src_port = "N/A"
        dst_port = "N/A"
        seq = 0
        ack = 0
        flags = "N/A"

        if packet.haslayer(IP):
            ip = packet[IP]
            src_ip = ip.src
            dst_ip = ip.dst

            if packet.haslayer(ICMP):
                protocol = "ICMP"
            else:
                if packet.haslayer(TCP):
                    protocol = protocols.loc[ip.proto]['Keyword'] if ip.sport != 22 else "SSH"
                    src_port = ip.sport
                    dst_port = ip.dport
                    seq = packet[TCP].seq
                    ack = packet[TCP].ack
                    flags = packet[TCP].sprintf('%flags%')
        else:
            if packet.haslayer(ARP):
                arp = packet[ARP]
                protocol = "ARP"
                src_ip = arp.psrc
                dst_ip = arp.pdst

        return {
            "timestamp": packet.time,
            "source_ip": src_ip,
            "destination_ip": dst_ip,
            "protocol": protocol,
            "source_port": src_port,
            "destination_port": dst_port,
            "length": len(packet),
            "seq": seq,
            "ack": ack,
            "flags": flags
        }

    def analyze_pcap_file(self, pcap_file):
        print(f"[INFO] Analyzing {pcap_file.split('/')[-1]} ...")
        start_time = time.time()
        results = []
        packets = rdpcap(pcap_file)
        for packet in packets:
            result = self.analyze_packet(packet)
            results.append(result)

        # Save results to CSV immediately after analysis
        filename = os.path.join(output_csv_files, os.path.basename(pcap_file).replace('.pcap', '.csv'))
        self.save_to_csv(results, filename)
        end_time = time.time()
        print(f"[INFO] Done analyzing {pcap_file.split('/')[-1]} in {round(end_time - start_time, 2)} seconds.")

    def save_to_csv(self, results, filename):
        with open(filename, mode='w', newline='') as file:
            fieldnames = results[0].keys()
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(results)

    def process_pcap_files(self):
        print(f"[INFO] Processing {len(self.pcap_files)} pcap files ...")
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            executor.map(self.analyze_pcap_file, self.pcap_files)

        print("[INFO] Done processing pcap files.")

In [5]:
analyzer = PacketAnalyzer(input_pcap_files)
analyzer.process_pcap_files()

[INFO] Processing 21 pcap files ...
[INFO] Analyzing netbios_ssn2.pcap ...
[INFO] Analyzing hydra_ssh2.pcap ...
[INFO] Analyzing vsftpd2.pcap ...
[INFO] Analyzing ruby_drb.pcap ...
[INFO] Done analyzing hydra_ssh2.pcap in 37.22 seconds.
[INFO] Analyzing hydra_ftp.pcap ...
[INFO] Done analyzing hydra_ftp.pcap in 8.81 seconds.
[INFO] Analyzing vsftpd.pcap ...
[INFO] Done analyzing vsftpd2.pcap in 75.6 seconds.
[INFO] Analyzing hydra_ftp2.pcap ...
[INFO] Done analyzing hydra_ftp2.pcap in 11.13 seconds.
[INFO] Analyzing smtp22.pcap ...
[INFO] Done analyzing smtp22.pcap in 2.1 seconds.
[INFO] Analyzing distcc_exec_backdoor.pcap ...
[INFO] Done analyzing distcc_exec_backdoor.pcap in 0.15 seconds.
[INFO] Analyzing unreallrcd.pcap ...
[INFO] Done analyzing ruby_drb.pcap in 97.28 seconds.
[INFO] Analyzing 0day.pcap ...
[INFO] Done analyzing netbios_ssn2.pcap in 105.27 seconds.
[INFO] Analyzing blackEnergy.pcap ...
[INFO] Done analyzing blackEnergy.pcap in 1.89 seconds.
[INFO] Analyzing ruby_drb