# HIKARI-2021 Dataset Analysis

* **Author:** Patrik Goldschmidt (igoldschmidt@fit.vut.cz)
* **Project:** Network Intrusion Datasets: A Survey, Limitations, and Recommendations
* **Date:** 2024

Although the paper describes only the year 2021, we will analyze data from 2022 as well.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import os

pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

In [2]:
# Data paths - as downloaded from Zenodo and unzipped
BASE_FOLDER = '/data/disk2/hikari-2021'
FOLDER_PCAP_2021 = os.path.join(BASE_FOLDER, '2021')
FOLDER_PCAP_2022 = os.path.join(BASE_FOLDER, '2022')

CSV_2021 = os.path.join(BASE_FOLDER, 'ALLFLOWMETER_HIKARI2021.csv')
CSV_2022 = os.path.join(BASE_FOLDER, 'ALLFLOWMETER_HIKARI2022.csv')

PKL_2021 = os.path.join(BASE_FOLDER, 'ALLFLOWMETER_HIKARI2021.pkl')
PKL_2022 = os.path.join(BASE_FOLDER, 'ALLFLOWMETER_HIKARI2022.pkl')

## PKL vs. CSV Comparison

In [3]:
data2021_pkl = pd.read_pickle(PKL_2021)
data2021_csv = pd.read_csv(CSV_2021, index_col=0)

In [4]:
data2021_pkl

Unnamed: 0.1,Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,traffic_category,Label
0,0,Cg61Jch3vdz9DBptj,103.255.15.23,13316,128.199.242.104,443,2.207588,15,14,6,6,6.794746,6.341763,13.136509,0.933333,464,20,40,492,32,44,2,2,2,6,5,26,0,0,0,0,0.0,742.0,1826.0,121.733333,220.736581,0.0,1448.0,5025.0,358.928571,552.239840,0.0,1448.0,6851.0,236.241379,424.859275,18.119812,1.963762e+06,2.207603e+06,1.576859e+05,5.205052e+05,7.867813,2.032929e+06,2.177950e+06,1.675346e+05,5.606267e+05,7.867813,1.963762e+06,2.207603e+06,78842.963491,3.696378e+05,3103.387105,7.5,7.0,913.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.207603e+06,2.207603e+06,2.207603e+06,2.207603e+06,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,29200,65160,0,Bruteforce-XML,1
1,1,CdRIlqLWdj35Y9vW9,103.255.15.23,13318,128.199.242.104,443,15.624266,15,14,6,6,0.960045,0.896042,1.856087,0.933333,488,20,44,468,32,44,2,2,2,6,5,26,0,0,0,0,0.0,745.0,1829.0,121.933333,221.339257,0.0,1448.0,5025.0,358.928571,552.239840,0.0,1448.0,6854.0,236.344828,424.987166,20.980835,1.534300e+07,1.562428e+07,1.116020e+06,4.094889e+06,20.980835,1.541144e+07,1.559517e+07,1.199628e+06,4.270148e+06,10.013580,1.534300e+07,1.562428e+07,558009.896960,2.897622e+06,438.676603,7.5,7.0,914.5,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.883792e+04,2.524381e+05,2.812760e+05,1.406380e+05,158109.181742,1.534300e+07,1.534300e+07,1.534300e+07,1.534300e+07,0.0,29200,65160,0,Bruteforce-XML,1
2,2,CLzp9Khd0Y09Qkgrg,103.255.15.23,13320,128.199.242.104,443,12.203357,14,13,6,5,1.147225,1.065281,2.212506,0.928571,432,20,40,448,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,36.001205,1.196814e+07,1.220338e+07,9.387216e+05,3.314032e+06,15.020370,1.203674e+07,1.217482e+07,1.014569e+06,3.471107e+06,15.020370,1.196814e+07,1.220338e+07,469360.810060,2.345336e+06,561.566789,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.891302e+04,2.063251e+05,2.352381e+05,1.176190e+05,125449.251656,1.196814e+07,1.196814e+07,1.196814e+07,1.196814e+07,0.0,29200,65160,0,Bruteforce-XML,1
3,3,Cnf1YA4iLB4CSNWB88,103.255.15.23,13322,128.199.242.104,443,9.992448,14,13,6,5,1.401058,1.300983,2.702041,0.928571,432,20,40,436,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,50.067902,9.759205e+06,9.992470e+06,7.686515e+05,2.701448e+06,20.980835,9.828447e+06,9.963348e+06,8.302790e+05,2.833716e+06,20.980835,9.759205e+06,9.992470e+06,384325.770231,1.912152e+06,685.817940,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.952909e+04,2.037361e+05,2.332652e+05,1.166326e+05,123182.931318,9.759205e+06,9.759205e+06,9.759205e+06,9.759205e+06,0.0,29200,65160,0,Bruteforce-XML,1
4,4,C4ZKvv3fpO72EAOsJ6,103.255.15.23,13324,128.199.242.104,443,7.780611,14,14,6,5,1.799345,1.799345,3.598689,1.000000,432,20,40,480,32,44,2,2,2,6,5,25,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,358.928571,792.173394,0.0,2896.0,6853.0,244.750000,583.468215,16.927719,7.545305e+06,7.780620e+06,5.985092e+05,2.087417e+06,15.020370,7.613719e+06,7.750841e+06,5.962185e+05,2.108534e+06,9.059906,7.545305e+06,7.780620e+06,288171.114745,1.450411e+06,880.779153,7.0,7.0,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.855015e+04,2.067649e+05,2.353151e+05,1.176575e+05,126016.885411,7.545305e+06,7.545305e+06,7.545305e+06,7.545305e+06,0.0,29200,65160,0,Bruteforce-XML,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555273,280838,C9b6Aa2csiogu3vVp9,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555274,280839,CGDT4r4PAbp3mvaI6k,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555275,280840,CJUxTk4Qd0kHliUKR9,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555276,280841,CknUJi2R1iYJG3li3k,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1


In [5]:
data2021_csv

Unnamed: 0.1,Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,traffic_category,Label
0,0,Cg61Jch3vdz9DBptj,103.255.15.23,13316,128.199.242.104,443,2.207588,15,14,6,6,6.794746,6.341763,13.136509,0.933333,464,20,40,492,32,44,2,2,2,6,5,26,0,0,0,0,0.0,742.0,1826.0,121.733333,220.736581,0.0,1448.0,5025.0,358.928571,552.239840,0.0,1448.0,6851.0,236.241379,424.859275,18.119812,1.963762e+06,2.207603e+06,1.576859e+05,5.205052e+05,7.867813,2.032929e+06,2.177950e+06,1.675346e+05,5.606267e+05,7.867813,1.963762e+06,2.207603e+06,78842.963491,3.696378e+05,3103.387105,7.5,7.0,913.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.207603e+06,2.207603e+06,2.207603e+06,2.207603e+06,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,29200,65160,0,Bruteforce-XML,1
1,1,CdRIlqLWdj35Y9vW9,103.255.15.23,13318,128.199.242.104,443,15.624266,15,14,6,6,0.960045,0.896042,1.856087,0.933333,488,20,44,468,32,44,2,2,2,6,5,26,0,0,0,0,0.0,745.0,1829.0,121.933333,221.339257,0.0,1448.0,5025.0,358.928571,552.239840,0.0,1448.0,6854.0,236.344828,424.987166,20.980835,1.534300e+07,1.562428e+07,1.116020e+06,4.094889e+06,20.980835,1.541144e+07,1.559517e+07,1.199628e+06,4.270148e+06,10.013580,1.534300e+07,1.562428e+07,558009.896960,2.897622e+06,438.676603,7.5,7.0,914.5,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.883792e+04,2.524381e+05,2.812760e+05,1.406380e+05,158109.181742,1.534300e+07,1.534300e+07,1.534300e+07,1.534300e+07,0.0,29200,65160,0,Bruteforce-XML,1
2,2,CLzp9Khd0Y09Qkgrg,103.255.15.23,13320,128.199.242.104,443,12.203357,14,13,6,5,1.147225,1.065281,2.212506,0.928571,432,20,40,448,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,36.001205,1.196814e+07,1.220338e+07,9.387216e+05,3.314032e+06,15.020370,1.203674e+07,1.217482e+07,1.014569e+06,3.471107e+06,15.020370,1.196814e+07,1.220338e+07,469360.810060,2.345336e+06,561.566789,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.891302e+04,2.063251e+05,2.352381e+05,1.176190e+05,125449.251656,1.196814e+07,1.196814e+07,1.196814e+07,1.196814e+07,0.0,29200,65160,0,Bruteforce-XML,1
3,3,Cnf1YA4iLB4CSNWB88,103.255.15.23,13322,128.199.242.104,443,9.992448,14,13,6,5,1.401058,1.300983,2.702041,0.928571,432,20,40,436,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,50.067902,9.759205e+06,9.992470e+06,7.686515e+05,2.701448e+06,20.980835,9.828447e+06,9.963348e+06,8.302790e+05,2.833716e+06,20.980835,9.759205e+06,9.992470e+06,384325.770231,1.912152e+06,685.817940,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.952909e+04,2.037361e+05,2.332652e+05,1.166326e+05,123182.931318,9.759205e+06,9.759205e+06,9.759205e+06,9.759205e+06,0.0,29200,65160,0,Bruteforce-XML,1
4,4,C4ZKvv3fpO72EAOsJ6,103.255.15.23,13324,128.199.242.104,443,7.780611,14,14,6,5,1.799345,1.799345,3.598689,1.000000,432,20,40,480,32,44,2,2,2,6,5,25,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,358.928571,792.173394,0.0,2896.0,6853.0,244.750000,583.468215,16.927719,7.545305e+06,7.780620e+06,5.985092e+05,2.087417e+06,15.020370,7.613719e+06,7.750841e+06,5.962185e+05,2.108534e+06,9.059906,7.545305e+06,7.780620e+06,288171.114745,1.450411e+06,880.779153,7.0,7.0,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2.855015e+04,2.067649e+05,2.353151e+05,1.176575e+05,126016.885411,7.545305e+06,7.545305e+06,7.545305e+06,7.545305e+06,0.0,29200,65160,0,Bruteforce-XML,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555273,280838,C9b6Aa2csiogu3vVp9,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555274,280839,CGDT4r4PAbp3mvaI6k,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555275,280840,CJUxTk4Qd0kHliUKR9,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1
555276,280841,CknUJi2R1iYJG3li3k,103.255.15.42,138,103.255.15.255,138,0.000000,1,0,1,0,0.000000,0.000000,0.000000,0.000000,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,201.0,201.0,201.0,201.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,201.0,201.0,201.0,201.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,1.0,0.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0,0,0,XMRIGCC CryptoMiner,1


In [6]:
data2021_pkl.equals(data2021_csv)

True

Ok, so CSV and PKL for 2021 are equal. Let's see 2022.

In [7]:
data2022_pkl = pd.read_pickle(PKL_2022)
data2022_csv = pd.read_csv(CSV_2022, index_col=0)

In [8]:
data2022_pkl

Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,bwd_last_window_size,attack_category,Label
0,Cmu9v81jToQyRF1gbk,184.0.48.168,38164,184.0.48.150,50443,0 days 00:00:00.000060,1,1,0,0,16644.063492,16644.063492,33288.126984,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,6.008148e+01,6.008148e+01,6.008148e+01,6.008148e+01,0.000000,0.000000e+00,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.081482,60.081482,60.081482,60.081482,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,64240,0,64240,0,Benign,0
1,CO21hl3TWkuXTOgajk,184.0.48.169,43068,184.0.48.150,50443,0 days 00:00:00.000083,1,1,0,0,12052.597701,12052.597701,24105.195402,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,8.296967e+01,8.296967e+01,8.296967e+01,8.296967e+01,0.000000,0.000000e+00,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.969666,82.969666,82.969666,82.969666,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,64240,0,64240,0,Benign,0
2,CBLJ6L19FP0MfYX7Oh,184.0.48.124,5678,255.255.255.255,5678,0 days 00:01:59.996602,3,0,3,0,0.025001,0.000000,0.025001,0.0,24,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,345.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,345.0,115.0,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,0.0,0.0,0.0,0.0,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,2.875081e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,0,0,0,0,Benign,0
3,ChTG451zJ7hUYOcqje,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.996909,2,0,2,0,0.033335,0.000000,0.033335,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,0.0,0.0,0.0,0.0,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,3.833531e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,0,0,0,0,Benign,0
4,Cn9y6E2KVxzQbs5wjc,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.992130,2,0,2,0,0.033338,0.000000,0.033338,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,0.0,0.0,0.0,0.0,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,3.833836e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,0,0,0,0,Benign,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16634,Clt16PPxzrXEtpa5d,184.0.48.20,53866,184.0.48.255,1947,0 days 00:00:00.000027,2,0,2,0,73584.280702,0.000000,73584.280702,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,2.717972e+01,2.717972e+01,2.717972e+01,2.717972e+01,0.000000,0.0,0.0,0.0,0.0,0.0,2.717972e+01,2.717972e+01,2.717972e+01,2.717972e+01,0.000000,2.943371e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.179718,27.179718,27.179718,27.179718,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
53170,Cs8RA72uHDiQa5ch2k,184.0.48.20,54318,184.0.48.255,1947,0 days 00:00:00.000027,2,0,2,0,74235.469027,0.000000,74235.469027,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,2.694130e+01,2.694130e+01,2.694130e+01,2.694130e+01,0.000000,0.0,0.0,0.0,0.0,0.0,2.694130e+01,2.694130e+01,2.694130e+01,2.694130e+01,0.000000,2.969419e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.941299,26.941299,26.941299,26.941299,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
53529,Cy4dqo4YEq5YGxjUXa,184.0.48.20,65355,184.0.48.255,1947,0 days 00:00:00,2,0,2,0,0.000000,0.000000,0.000000,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
86308,CFXfNV3OTG04e0UnP4,184.0.48.20,53642,184.0.48.255,1947,0 days 00:00:00.000054,2,0,2,0,37117.734513,0.000000,37117.734513,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,5.388260e+01,5.388260e+01,5.388260e+01,5.388260e+01,0.000000,0.0,0.0,0.0,0.0,0.0,5.388260e+01,5.388260e+01,5.388260e+01,5.388260e+01,0.000000,1.484709e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.882599,53.882599,53.882599,53.882599,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1


In [9]:
data2022_csv

Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,bwd_last_window_size,attack_category,Label
0,Cmu9v81jToQyRF1gbk,184.0.48.168,38164,184.0.48.150,50443,0 days 00:00:00.000060,1,1,0,0,16644.063492,16644.063492,33288.126984,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,6.008148e+01,6.008148e+01,6.008148e+01,6.008148e+01,0.000000,0.000000e+00,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.081482,60.081482,60.081482,60.081482,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,64240,0,64240,0,Benign,0
1,CO21hl3TWkuXTOgajk,184.0.48.169,43068,184.0.48.150,50443,0 days 00:00:00.000083,1,1,0,0,12052.597701,12052.597701,24105.195402,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,8.296967e+01,8.296967e+01,8.296967e+01,8.296967e+01,0.000000,0.000000e+00,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.969666,82.969666,82.969666,82.969666,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,64240,0,64240,0,Benign,0
2,CBLJ6L19FP0MfYX7Oh,184.0.48.124,5678,255.255.255.255,5678,0 days 00:01:59.996602,3,0,3,0,0.025001,0.000000,0.025001,0.0,24,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,345.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,345.0,115.0,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,0.0,0.0,0.0,0.0,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,2.875081e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999748e+07,5.999912e+07,1.199966e+08,5.999830e+07,1156.846698,0,0,0,0,Benign,0
3,ChTG451zJ7hUYOcqje,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.996909,2,0,2,0,0.033335,0.000000,0.033335,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,0.0,0.0,0.0,0.0,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,3.833531e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999691e+07,5.999691e+07,5.999691e+07,5.999691e+07,0.000000,0,0,0,0,Benign,0
4,Cn9y6E2KVxzQbs5wjc,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.992130,2,0,2,0,0.033338,0.000000,0.033338,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,0.0,0.0,0.0,0.0,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,3.833836e+00,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,5.999213e+07,5.999213e+07,5.999213e+07,5.999213e+07,0.000000,0,0,0,0,Benign,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16634,Clt16PPxzrXEtpa5d,184.0.48.20,53866,184.0.48.255,1947,0 days 00:00:00.000027,2,0,2,0,73584.280702,0.000000,73584.280702,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,2.717972e+01,2.717972e+01,2.717972e+01,2.717972e+01,0.000000,0.0,0.0,0.0,0.0,0.0,2.717972e+01,2.717972e+01,2.717972e+01,2.717972e+01,0.000000,2.943371e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.179718,27.179718,27.179718,27.179718,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
53170,Cs8RA72uHDiQa5ch2k,184.0.48.20,54318,184.0.48.255,1947,0 days 00:00:00.000027,2,0,2,0,74235.469027,0.000000,74235.469027,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,2.694130e+01,2.694130e+01,2.694130e+01,2.694130e+01,0.000000,0.0,0.0,0.0,0.0,0.0,2.694130e+01,2.694130e+01,2.694130e+01,2.694130e+01,0.000000,2.969419e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.941299,26.941299,26.941299,26.941299,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
53529,Cy4dqo4YEq5YGxjUXa,184.0.48.20,65355,184.0.48.255,1947,0 days 00:00:00,2,0,2,0,0.000000,0.000000,0.000000,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1
86308,CFXfNV3OTG04e0UnP4,184.0.48.20,53642,184.0.48.255,1947,0 days 00:00:00.000054,2,0,2,0,37117.734513,0.000000,37117.734513,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,40.0,40.0,80.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,40.0,80.0,40.0,0.0,5.388260e+01,5.388260e+01,5.388260e+01,5.388260e+01,0.000000,0.0,0.0,0.0,0.0,0.0,5.388260e+01,5.388260e+01,5.388260e+01,5.388260e+01,0.000000,1.484709e+06,2.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.882599,53.882599,53.882599,53.882599,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0,0,0,0,XMRIGCC CryptoMiner,1


In [10]:
data2022_pkl.equals(data2022_csv)

False

In [11]:
# If the DataFrames themselves are not equal, let's compare only the first lines
data2022_pkl.iloc[0].equals(data2022_csv.iloc[0])

False

In [12]:
# Since even the first row is different, let's see which columns differ
data2022_pkl.columns[(data2022_pkl.iloc[0] == data2022_csv.iloc[0]) == False]

Index(['Label'], dtype='object')

In [13]:
# Why are labels different?
data2022_pkl.iloc[0]['Label'], data2022_csv.iloc[0]['Label']

('0', 0)

In [14]:
data2022_pkl['Label'].value_counts()

Label
0    214904
1     13349
Name: count, dtype: int64

In [15]:
data2022_csv['Label'].value_counts()

Label
0    214904
1     13349
Name: count, dtype: int64

In [16]:
# Are the same same if we drop Label column?
data2022_csv.drop(columns=['Label']).equals(data2022_pkl.drop(columns=['Label']))

True

OK, data are the same, but pickle loading has converted the labels into ints instead of strings, causing the discrepancy. However, this has no practical consequence, so we consider the data are the same.

## 2021 Dataset Analysis

In [17]:
data2021 = data2021_csv

In [18]:
data2021.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 555278 entries, 0 to 555277
Data columns (total 87 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   Unnamed: 0                555278 non-null  int64  
 1   uid                       555278 non-null  object 
 2   originh                   555278 non-null  object 
 3   originp                   555278 non-null  int64  
 4   responh                   555278 non-null  object 
 5   responp                   555278 non-null  int64  
 6   flow_duration             555278 non-null  float64
 7   fwd_pkts_tot              555278 non-null  int64  
 8   bwd_pkts_tot              555278 non-null  int64  
 9   fwd_data_pkts_tot         555278 non-null  int64  
 10  bwd_data_pkts_tot         555278 non-null  int64  
 11  fwd_pkts_per_sec          555278 non-null  float64
 12  bwd_pkts_per_sec          555278 non-null  float64
 13  flow_pkts_per_sec         555278 non-null  float6

In [19]:
len(data2021)

555278

In [20]:
data2021.head()

Unnamed: 0.1,Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,traffic_category,Label
0,0,Cg61Jch3vdz9DBptj,103.255.15.23,13316,128.199.242.104,443,2.207588,15,14,6,6,6.794746,6.341763,13.136509,0.933333,464,20,40,492,32,44,2,2,2,6,5,26,0,0,0,0,0.0,742.0,1826.0,121.733333,220.736581,0.0,1448.0,5025.0,358.928571,552.23984,0.0,1448.0,6851.0,236.241379,424.859275,18.119812,1963762.0,2207603.0,157685.9,520505.2,7.867813,2032929.0,2177950.0,167534.6,560626.7,7.867813,1963762.0,2207603.0,78842.963491,369637.8,3103.387105,7.5,7.0,913.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,2207603.0,2207603.0,2207603.0,2207603.0,0.0,0.0,0.0,0.0,0.0,0.0,29200,65160,0,Bruteforce-XML,1
1,1,CdRIlqLWdj35Y9vW9,103.255.15.23,13318,128.199.242.104,443,15.624266,15,14,6,6,0.960045,0.896042,1.856087,0.933333,488,20,44,468,32,44,2,2,2,6,5,26,0,0,0,0,0.0,745.0,1829.0,121.933333,221.339257,0.0,1448.0,5025.0,358.928571,552.23984,0.0,1448.0,6854.0,236.344828,424.987166,20.980835,15343000.0,15624280.0,1116020.0,4094889.0,20.980835,15411440.0,15595170.0,1199628.0,4270148.0,10.01358,15343000.0,15624280.0,558009.89696,2897622.0,438.676603,7.5,7.0,914.5,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,28837.92,252438.1,281276.0,140638.0,158109.181742,15343000.0,15343000.0,15343000.0,15343000.0,0.0,29200,65160,0,Bruteforce-XML,1
2,2,CLzp9Khd0Y09Qkgrg,103.255.15.23,13320,128.199.242.104,443,12.203357,14,13,6,5,1.147225,1.065281,2.212506,0.928571,432,20,40,448,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,36.001205,11968140.0,12203380.0,938721.6,3314032.0,15.02037,12036740.0,12174820.0,1014569.0,3471107.0,15.02037,11968140.0,12203380.0,469360.81006,2345336.0,561.566789,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,28913.02,206325.1,235238.1,117619.0,125449.251656,11968140.0,11968140.0,11968140.0,11968140.0,0.0,29200,65160,0,Bruteforce-XML,1
3,3,Cnf1YA4iLB4CSNWB88,103.255.15.23,13322,128.199.242.104,443,9.992448,14,13,6,5,1.401058,1.300983,2.702041,0.928571,432,20,40,436,32,44,2,2,2,6,5,24,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,386.538462,817.479013,0.0,2896.0,6853.0,253.814815,592.570284,50.067902,9759205.0,9992470.0,768651.5,2701448.0,20.980835,9828447.0,9963348.0,830279.0,2833716.0,20.980835,9759205.0,9992470.0,384325.770231,1912152.0,685.81794,7.0,6.5,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,29529.09,203736.1,233265.2,116632.6,123182.931318,9759205.0,9759205.0,9759205.0,9759205.0,0.0,29200,65160,0,Bruteforce-XML,1
4,4,C4ZKvv3fpO72EAOsJ6,103.255.15.23,13324,128.199.242.104,443,7.780611,14,14,6,5,1.799345,1.799345,3.598689,1.0,432,20,40,480,32,44,2,2,2,6,5,25,0,0,0,0,0.0,744.0,1828.0,130.571429,226.803444,0.0,2896.0,5025.0,358.928571,792.173394,0.0,2896.0,6853.0,244.75,583.468215,16.927719,7545305.0,7780620.0,598509.2,2087417.0,15.02037,7613719.0,7750841.0,596218.5,2108534.0,9.059906,7545305.0,7780620.0,288171.114745,1450411.0,880.779153,7.0,7.0,914.0,2512.5,0.0,0.0,0.0,0.0,0.0,0.0,28550.15,206764.9,235315.1,117657.5,126016.885411,7545305.0,7545305.0,7545305.0,7545305.0,0.0,29200,65160,0,Bruteforce-XML,1


In [21]:
data2021.describe()

Unnamed: 0.1,Unnamed: 0,originp,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,Label
count,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0,555278.0
mean,137369.392132,38673.198034,5082.161647,9.306544,18.264091,18.892578,7.956928,15.803417,1720.704202,1674.140666,3394.844869,0.887003,636.1381,19.901433,24.607501,575.428,16.54519,19.689366,0.562095,0.923988,0.503098,7.084219,10.782839,34.937831,0.001439,1.1e-05,0.002329,3.8e-05,22.69905,207.591415,1872.353,52.682038,56.488003,31.246709,1252.857824,26593.91,309.276711,363.23948,22.705447,1263.603752,28466.27,186.045581,299.247061,893204.6,5102050.0,9134269.0,1190675.0,932954.9,15284.12,4232945.0,7234529.0,345451.9,963441.8,893746.1,5202171.0,9312579.0,1080620.0,708271.3,35163.15,12.653119,13.143448,1507.875699,15924.95,18.984345,6423.852,0.030403,2.513407,1248.447,5797473.0,640550.6,880624.1,999637.5,735286.5,133588.3,3490637.0,5000630.0,8312942.0,4073219.0,799257.6,14750.725718,21005.879513,4546.87221,0.067887
std,99946.804651,18071.825216,12915.8129,101.80273,463.901484,469.078877,73.441224,467.349374,4994.579973,4548.503983,9316.856663,0.511737,17625.6,12.524893,16.577095,14950.92,12.151789,15.924656,0.825811,1.018941,0.898075,73.276271,173.178129,932.467347,0.038844,0.003287,0.057376,0.00671,33.855436,287.06048,22293.59,53.300571,89.671573,48.048751,2778.113834,400471.1,569.643855,648.563051,33.867741,2774.930843,403874.4,286.17378,524.095261,7145073.0,23159400.0,101757100.0,7273388.0,4779988.0,257071.8,22175010.0,73325960.0,1682992.0,4913081.0,7144566.0,23230610.0,101805500.0,7190908.0,3437376.0,218383.7,85.057715,89.297659,17077.706573,148434.3,872.755605,67969.26,0.551991,19.292902,97611.73,18862080.0,4315385.0,17282710.0,28451170.0,8693403.0,8289045.0,18049300.0,23265160.0,96833720.0,19159580.0,7206576.0,15003.491439,29155.406813,11069.456088,0.251552
min,0.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,52201.0,28581.0,53.0,0.000311,1.0,1.0,1.0,0.0,4.430152,3.168143,7.624355,0.866667,16.0,8.0,8.0,8.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,36.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,88.0,44.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.00679,310.1826,310.8978,306.8447,0.0,1.322224,1.0,1.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,152.8263,155.9258,155.9258,155.9258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,121610.5,42693.5,443.0,0.026218,2.0,2.0,2.0,2.0,39.760205,43.938937,82.775849,1.0,28.0,20.0,20.0,16.0,8.0,8.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,42.0,72.0,40.0,0.0,0.0,95.0,148.0,95.0,28.991378,0.0,117.0,244.0,68.0,41.7193,16.21246,20.98083,20.98083,20.98083,0.0,4.053116,391.0065,391.0065,390.0528,0.0,15.97405,25079.01,26223.9,10016.01,13948.45,5297.83,2.0,2.0,72.0,147.0,0.0,0.0,0.0,0.0,0.0,0.0,25526.05,25879.86,25885.11,25871.04,0.0,0.0,0.0,0.0,0.0,0.0,5840.0,0.0,0.0,0.0
75%,211889.75,53120.0,443.0,0.347231,10.0,12.0,3.0,5.0,80.376058,80.237673,160.578254,1.0,328.0,32.0,40.0,368.0,32.0,40.0,1.0,2.0,1.0,3.0,5.0,20.0,0.0,0.0,0.0,0.0,36.0,517.0,836.0,83.3,132.068504,52.0,2394.0,5025.0,448.555556,775.794646,36.0,2394.0,6855.0,264.352941,626.269641,29.08707,184027.2,313759.1,34299.42,52459.26,34.09386,139133.0,250115.9,22547.64,38280.1,72.95609,187160.0,347828.1,25377.91,31917.63,16512.68,9.0,7.5,771.0,4037.0,0.0,0.0,0.0,0.0,0.0,0.0,262070.7,284153.9,285737.0,278214.9,0.0,0.0,0.0,0.0,0.0,0.0,29200.0,65160.0,402.0,0.0
max,350709.0,65535.0,65522.0,17393.0306,326292.0,326827.0,3953.0,326822.0,223696.213333,110376.421053,223696.213333,6.0,12420100.0,44.0,60.0,10458470.0,40.0,60.0,15.0,23.0,110.0,3953.0,98009.0,653118.0,4.0,1.0,4.0,2.0,1020.0,34560.0,5795999.0,4841.491139,6039.500702,1350.0,65228.0,161262200.0,19194.344828,20011.104987,1020.0,65228.0,161400600.0,10265.046192,16932.559085,98096690.0,299803800.0,17393030000.0,101064600.0,111541900.0,82009190.0,299803800.0,17392810000.0,82009190.0,127043700.0,82009190.0,299803800.0,17393030000.0,101064600.0,111541900.0,40936410.0,18729.666667,20682.666667,345222.0,53754070.0,246786.5,13937410.0,89.5,4726.0,33821130.0,382654900.0,920023700.0,10620030000.0,13130000000.0,4376667000.0,5438463000.0,299803800.0,299803800.0,17386800000.0,299803800.0,202996200.0,65535.0,65535.0,65535.0,1.0


In [22]:
data2021['Label'].value_counts()

Label
0    517582
1     37696
Name: count, dtype: int64

In [23]:
data2021['traffic_category'].value_counts()

traffic_category
Benign                 347431
Background             170151
Probing                 23388
Bruteforce               5884
Bruteforce-XML           5145
XMRIGCC CryptoMiner      3279
Name: count, dtype: int64

In [24]:
data2021['originh'].value_counts()

originh
103.255.15.23      213520
103.255.15.150     182631
103.255.15.27       57488
103.255.15.20       26862
103.255.15.67       14200
                    ...  
88.202.190.137          1
115.146.126.242         1
192.155.93.221          1
103.231.46.238          1
205.185.126.174         1
Name: count, Length: 2899, dtype: int64

In [25]:
data2021['responh'].value_counts().head(20)

responh
8.8.8.8               189666
128.199.242.104        99663
103.255.15.23          90678
255.255.255.255        36528
103.255.15.255         20913
128.199.88.81           7988
2600:1901:0:38d7::      7568
117.18.237.29           6650
34.107.221.82           5497
103.255.15.150          2990
10.255.254.228          2087
13.227.228.50           1702
13.227.228.95           1636
13.227.228.125          1557
13.227.228.53           1552
13.227.228.83           1310
13.227.228.12           1264
13.227.228.71           1262
13.227.228.6            1210
8.8.4.4                 1005
Name: count, dtype: int64

In [26]:
data2021['responp'].value_counts().head(10)

responp
53       191560
443      187508
80        32570
1947      29976
42000     28746
42001     28740
5678      18582
161       14219
137        6247
1514       2087
Name: count, dtype: int64

Flow data does not offer any timestamp information - continuity and duration analysis thus needs to be performed on the related PCAP.

### 2021 PCAP Analysis

In [27]:
os.listdir(FOLDER_PCAP_2021)

['Friday_2021-04-16_2304.pcap',
 'Saturday_2021-04-17_0357.pcap',
 'Sunday_2021-04-11_2154.pcap',
 'Sunday_2021-05-02_1659.pcap',
 'Monday_2021-04-12_0611.pcap',
 'Sunday_2021-05-02_1206.pcap']

There are only 6 PCAPs, we can capinfos each of them and them sum the packets/times manually.

In [28]:
for file in os.listdir(FOLDER_PCAP_2021):
    file_fullpath = os.path.join(FOLDER_PCAP_2021, file)

    !capinfos -a -e -u -s -c -x -M $file_fullpath

File name:           /data/disk2/hikari-2021/2021/Friday_2021-04-16_2304.pcap
Packet size limit:   inferred: 45 bytes
Number of packets:   4725390
File size:           5351944192 bytes
Capture duration:    15655.749278 seconds
First packet time:   2021-04-16 18:04:38.600181
Last packet time:    2021-04-16 22:25:34.349459
Average packet rate: 301.83 packets/sec
File name:           /data/disk2/hikari-2021/2021/Saturday_2021-04-17_0357.pcap
Number of packets:   708181
File size:           644815820 bytes
Capture duration:    10798.818274 seconds
First packet time:   2021-04-16 22:57:03.389853
Last packet time:    2021-04-17 01:57:02.208127
Average packet rate: 65.58 packets/sec
File name:           /data/disk2/hikari-2021/2021/Sunday_2021-04-11_2154.pcap
Number of packets:   622670
File size:           712250558 bytes
Capture duration:    7197.087176 seconds
First packet time:   2021-04-11 16:54:05.495732
Last packet time:    2021-04-11 18:54:02.582908
Average packet rate: 86.52 packets/

In [29]:
SCRIPT_PKTCOUNT = '/data/AAAA_SCRIPTS/countpkts.sh'
SCRIPT_TOTALDUR = '/data/AAAA_SCRIPTS/countdur.sh'

In [30]:
# Summarize all packets
!$SCRIPT_PKTCOUNT $FOLDER_PCAP_2021

Processing /data/disk2/hikari-2021/2021/Friday_2021-04-16_2304.pcap
Processing /data/disk2/hikari-2021/2021/Monday_2021-04-12_0611.pcap
Processing /data/disk2/hikari-2021/2021/Saturday_2021-04-17_0357.pcap
Processing /data/disk2/hikari-2021/2021/Sunday_2021-04-11_2154.pcap
Processing /data/disk2/hikari-2021/2021/Sunday_2021-05-02_1206.pcap
Processing /data/disk2/hikari-2021/2021/Sunday_2021-05-02_1659.pcap

TOTAL FILES : 6
TOTAL PACKETS: 13178641


In [31]:
# Return the total duration of all captures in seconds (they are non-overlapping, so summing is fine)
!$SCRIPT_TOTALDUR $FOLDER_PCAP_2021

64791.187248


# 2022 Dataset analysis

In [32]:
data2022 = data2022_pkl

In [33]:
data2022.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 228253 entries, 0 to 99240
Data columns (total 87 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   uid                       228253 non-null  object 
 1   originh                   228253 non-null  object 
 2   originp                   228253 non-null  int64  
 3   responh                   228253 non-null  object 
 4   responp                   228253 non-null  int64  
 5   flow_duration             228253 non-null  object 
 6   fwd_pkts_tot              228253 non-null  int64  
 7   bwd_pkts_tot              228253 non-null  int64  
 8   fwd_data_pkts_tot         228253 non-null  int64  
 9   bwd_data_pkts_tot         228253 non-null  int64  
 10  fwd_pkts_per_sec          228253 non-null  float64
 11  bwd_pkts_per_sec          228253 non-null  float64
 12  flow_pkts_per_sec         228253 non-null  float64
 13  down_up_ratio             228253 non-null  float64

In [34]:
len(data2022)

228253

In [35]:
data2022.describe()

Unnamed: 0,originp,responp,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,bwd_last_window_size
count,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0,228253.0
mean,36835.267633,7572.013108,7.544996,6.224299,1.500913,2.698269,4249.762,2656.37037,6906.132,0.686902,244.786776,31.047425,37.371846,201.714098,23.688539,28.931055,1.305039,1.721958,0.212321,1.257285,2.308307,12.41829,0.0,0.0,6.1e-05,3.5e-05,5.86108,160.742001,1231.376,28.373125,47.534058,0.247397,960.054277,3119.781,161.555119,296.349449,5.886078,970.176808,4351.157,95.030894,218.885283,526303.1,3023349.0,4453383.0,871495.3,849881.0,45068.11,2298123.0,2892729.0,358961.7,787707.7,524485.6,3049619.0,4607068.0,754124.5,660265.5,180077.9,6.175072,5.143211,261.33221,2469.663,34.669776,651.0029,0.045987,0.286461,18312.68,123791.6,595112.5,703932.4,791666.3,645359.5,70973.98,2606269.0,2748440.0,3815401.0,2665447.0,73894.91,52971.788279,41171.981104,14400.563611,817.609613
std,17707.592755,15979.443219,20.630049,16.22458,14.701351,10.308325,53379.21,6547.921713,54480.91,0.41093,659.838747,8.824979,9.630374,519.999423,12.860379,16.274839,1.173468,0.814854,0.492835,12.811778,7.309661,36.38758,0.0,0.0,0.009817,0.00592,26.648322,329.839819,112939.2,78.203163,94.138438,7.571228,1420.328209,54271.85,293.638668,437.031215,26.850825,1432.249781,126552.0,157.801449,331.787132,5192822.0,12434180.0,84871180.0,5466256.0,4130964.0,1559593.0,11041240.0,26259800.0,2413521.0,4286978.0,5190984.0,12392630.0,84955030.0,5344893.0,3192977.0,7108985.0,7.784241,8.075575,1186.639592,26833.86,1190.284609,33905.02,0.80899,5.088882,889942.3,3374709.0,1250759.0,1972918.0,8473676.0,1412372.0,950433.3,12153130.0,12439820.0,82627110.0,12219800.0,1200950.0,22837.106818,30975.991524,25183.061648,6979.116204
min,68.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,23638.0,443.0,1.0,1.0,0.0,0.0,1.434792,0.563013,1.962682,0.666667,40.0,32.0,40.0,20.0,20.0,20.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.22166,94.89059,89.8838,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,86.06911,87.97646,87.97646,87.97646,0.0,0.0,0.0,0.0,0.0,0.0,64240.0,0.0,63.0,0.0
50%,40172.0,443.0,6.0,5.0,0.0,0.0,64.75948,58.353541,123.7339,0.833333,200.0,32.0,40.0,168.0,32.0,40.0,2.0,2.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.953674,12452.13,18990.99,3668.213,2903.344,1.192093,3131.866,4486.084,1047.055,1359.641,0.953674,12604.95,20405.05,2068.186,2620.26,0.0,6.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12776.14,15522.96,15619.99,15383.96,0.0,0.0,0.0,0.0,0.0,0.0,64240.0,65160.0,312.0,64.0
75%,50846.0,3306.0,12.0,11.0,3.0,6.0,1748.72,1305.707018,3083.594,0.928571,392.0,32.0,40.0,360.0,32.0,40.0,2.0,2.0,0.0,3.0,6.0,22.0,0.0,0.0,0.0,0.0,0.0,261.0,524.0,50.0,86.753919,0.0,2816.0,4057.0,368.818182,830.134425,0.0,2816.0,4892.0,212.695652,595.787444,154.9721,532802.1,1170960.0,107397.4,55247.65,13.11302,77420.0,205658.0,21374.51,31178.94,30.04074,561700.1,1229111.0,59155.04,44346.06,1792.516,8.5,7.0,417.5,2723.0,0.0,0.0,0.0,0.0,0.0,0.0,286840.2,421173.1,442543.0,347045.9,0.0,0.0,0.0,0.0,0.0,0.0,65535.0,65160.0,19164.0,64.0
max,65534.0,65000.0,3214.0,2357.0,2514.0,1743.0,3355443.0,524288.0,3355443.0,16.555556,102856.0,44.0,60.0,75448.0,44.0,60.0,140.0,7.0,12.0,2448.0,1494.0,5195.0,0.0,0.0,2.0,1.0,976.0,33304.0,24868510.0,8132.280249,5997.207616,1348.0,65160.0,21931060.0,21135.153846,17102.398746,1348.0,65160.0,25065150.0,10370.084906,15960.117671,74396930.0,169437100.0,17942910000.0,74396930.0,67577790.0,147958800.0,169437500.0,7189996000.0,147958800.0,84717460.0,74396930.0,169437100.0,17942910000.0,74396930.0,46993210.0,739875200.0,1201.0,1750.0,233479.0,8361821.0,233216.0,10965530.0,131.0,1743.0,201405700.0,177018700.0,27143810.0,149972600.0,1806965000.0,52513360.0,72218270.0,169437100.0,169437100.0,17925320000.0,169437100.0,89378640.0,65535.0,65535.0,65535.0,65535.0


In [36]:
data2022.head()

Unnamed: 0,uid,originh,originp,responh,responp,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,down_up_ratio,fwd_header_size_tot,fwd_header_size_min,fwd_header_size_max,bwd_header_size_tot,bwd_header_size_min,bwd_header_size_max,flow_FIN_flag_count,flow_SYN_flag_count,flow_RST_flag_count,fwd_PSH_flag_count,bwd_PSH_flag_count,flow_ACK_flag_count,fwd_URG_flag_count,bwd_URG_flag_count,flow_CWR_flag_count,flow_ECE_flag_count,fwd_pkts_payload.min,fwd_pkts_payload.max,fwd_pkts_payload.tot,fwd_pkts_payload.avg,fwd_pkts_payload.std,bwd_pkts_payload.min,bwd_pkts_payload.max,bwd_pkts_payload.tot,bwd_pkts_payload.avg,bwd_pkts_payload.std,flow_pkts_payload.min,flow_pkts_payload.max,flow_pkts_payload.tot,flow_pkts_payload.avg,flow_pkts_payload.std,fwd_iat.min,fwd_iat.max,fwd_iat.tot,fwd_iat.avg,fwd_iat.std,bwd_iat.min,bwd_iat.max,bwd_iat.tot,bwd_iat.avg,bwd_iat.std,flow_iat.min,flow_iat.max,flow_iat.tot,flow_iat.avg,flow_iat.std,payload_bytes_per_second,fwd_subflow_pkts,bwd_subflow_pkts,fwd_subflow_bytes,bwd_subflow_bytes,fwd_bulk_bytes,bwd_bulk_bytes,fwd_bulk_packets,bwd_bulk_packets,fwd_bulk_rate,bwd_bulk_rate,active.min,active.max,active.tot,active.avg,active.std,idle.min,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,bwd_last_window_size,attack_category,Label
0,Cmu9v81jToQyRF1gbk,184.0.48.168,38164,184.0.48.150,50443,0 days 00:00:00.000060,1,1,0,0,16644.063492,16644.063492,33288.126984,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.08148,60.08148,60.08148,60.08148,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.081482,60.081482,60.081482,60.081482,0.0,0.0,0.0,0.0,0.0,0.0,64240,0,64240,0,Benign,0
1,CO21hl3TWkuXTOgajk,184.0.48.169,43068,184.0.48.150,50443,0 days 00:00:00.000083,1,1,0,0,12052.597701,12052.597701,24105.195402,1.0,40,40,40,20,20,20,0,1,1,0,0,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.96967,82.96967,82.96967,82.96967,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.969666,82.969666,82.969666,82.969666,0.0,0.0,0.0,0.0,0.0,0.0,64240,0,64240,0,Benign,0
2,CBLJ6L19FP0MfYX7Oh,184.0.48.124,5678,255.255.255.255,5678,0 days 00:01:59.996602,3,0,3,0,0.025001,0.0,0.025001,0.0,24,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,345.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,345.0,115.0,0.0,59997480.0,59999120.0,119996600.0,59998300.0,1156.846698,0.0,0.0,0.0,0.0,0.0,59997480.0,59999120.0,119996600.0,59998300.0,1156.846698,2.875081,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59997480.0,59999120.0,119996600.0,59998300.0,1156.846698,0,0,0,0,Benign,0
3,ChTG451zJ7hUYOcqje,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.996909,2,0,2,0,0.033335,0.0,0.033335,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,59996910.0,59996910.0,59996910.0,59996910.0,0.0,0.0,0.0,0.0,0.0,0.0,59996910.0,59996910.0,59996910.0,59996910.0,0.0,3.833531,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59996910.0,59996910.0,59996910.0,59996910.0,0.0,0,0,0,0,Benign,0
4,Cn9y6E2KVxzQbs5wjc,184.0.48.124,5678,255.255.255.255,5678,0 days 00:00:59.992130,2,0,2,0,0.033338,0.0,0.033338,0.0,16,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,115.0,115.0,230.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,115.0,230.0,115.0,0.0,59992130.0,59992130.0,59992130.0,59992130.0,0.0,0.0,0.0,0.0,0.0,0.0,59992130.0,59992130.0,59992130.0,59992130.0,0.0,3.833836,1.0,0.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59992130.0,59992130.0,59992130.0,59992130.0,0.0,0,0,0,0,Benign,0


In [37]:
data2022['Label'].value_counts()

Label
0    214904
1     13349
Name: count, dtype: int64

In [38]:
data2022['attack_category'].value_counts()

attack_category
Benign                 214904
XMRIGCC CryptoMiner      7595
Bruteforce-XML           3650
Bruteforce               2104
Name: count, dtype: int64

So 2022 capture offers the same attacks as 2021 does.

In [39]:
# Source IP addresses
data2022['originh'].value_counts()

originh
184.0.48.169       75772
184.0.48.20         7028
203.178.143.112     5754
140.213.164.207     5087
140.213.177.193     5072
                   ...  
36.73.71.163           1
36.68.13.117           1
114.79.47.162          1
182.2.71.129           1
182.2.73.10            1
Name: count, Length: 5803, dtype: int64

In [40]:
data2022['responh'].value_counts()

responh
184.0.48.169       102164
184.0.48.171        71128
184.0.48.150        35093
255.255.255.255      8844
184.0.48.255         7595
                    ...  
116.12.46.36            1
185.199.111.133         1
180.209.98.28           1
202.201.0.160           1
103.141.154.2           1
Name: count, Length: 101, dtype: int64

In [41]:
data2022['responp'].value_counts()

responp
443      108490
3306      71133
50443     27355
1947       7811
5678       4182
          ...  
8827          1
33100         1
45406         1
169           1
22236         1
Name: count, Length: 1012, dtype: int64

Similarly to 2021 data, temporal analysis was performed through provided PCAPs.

### 2022 PCAP Analysis

In [42]:
for file in os.listdir(FOLDER_PCAP_2022):
    file_fullpath = os.path.join(FOLDER_PCAP_2022, file)

    !capinfos -a -e -u -s -c -x -M $file_fullpath

File name:           /data/disk2/hikari-2021/2022/Monday_2022-04-11_0622_BRUTEFORCE_XML_150s.pcap
Number of packets:   216832
File size:           270859896 bytes
Capture duration:    10799.543691 seconds
First packet time:   2022-04-11 01:22:38.420829
Last packet time:    2022-04-11 04:22:37.964520
Average packet rate: 20.08 packets/sec
File name:           /data/disk2/hikari-2021/2022/Tuesday_2022-04-12_1418_BRUTEFORCE_XML_169s.pcap
Number of packets:   27380760
File size:           7306065968 bytes
Capture duration:    1585.282896 seconds
First packet time:   2022-04-12 09:18:52.726368
Last packet time:    2022-04-12 09:45:18.009264
Average packet rate: 17271.84 packets/sec
File name:           /data/disk2/hikari-2021/2022/Sunday_2022-04-10_2335_BRUTEFORCE_HTTPS_150s.pcap
Number of packets:   327043
File size:           275188455 bytes
Capture duration:    17996.987748 seconds
First packet time:   2022-04-10 18:35:03.365614
Last packet time:    2022-04-10 23:35:00.353362
Average pac

In [43]:
# Summarize all packets
!$SCRIPT_PKTCOUNT $FOLDER_PCAP_2022

Processing /data/disk2/hikari-2021/2022/Monday_2022-04-11_0622_BRUTEFORCE_XML_150s.pcap
Processing /data/disk2/hikari-2021/2022/Sunday_2022-04-10_2335_BRUTEFORCE_HTTPS_150s.pcap
Processing /data/disk2/hikari-2021/2022/Tuesday_2022-04-12_0554_BRUTEFORCE_HTTPS_169s.pcap
Processing /data/disk2/hikari-2021/2022/Tuesday_2022-04-12_1418_BRUTEFORCE_XML_169s.pcap

TOTAL FILES : 4
TOTAL PACKETS: 53302884


In [44]:
# Summarize all packets
!$SCRIPT_TOTALDUR $FOLDER_PCAP_2022

37581.344777
