# InSDN Dataset Analysis

* **Author:** Patrik Goldschmidt (igoldschmidt@fit.vut.cz)
* **Project:** Network Intrusion Datasets: A Survey, Limitations, and Recommendations
* **Date:** 2024

In [1]:
import pandas as pd
import numpy as np
import os

pd.set_option('display.max_columns', None)

In [2]:
CSV_FOLDER = 'InSDN_DatasetCSV'

In [3]:
data_ovs  = pd.read_csv(os.path.join(CSV_FOLDER, 'OVS.csv'))
data_meta = pd.read_csv(os.path.join(CSV_FOLDER, 'metasploitable-2.csv'))
data_norm = pd.read_csv(os.path.join(CSV_FOLDER, 'Normal_data.csv'))

## Normal Data

In [4]:
data_norm.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68424 entries, 0 to 68423
Data columns (total 84 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Flow ID            68424 non-null  object 
 1   Src IP             68424 non-null  object 
 2   Src Port           68424 non-null  int64  
 3   Dst IP             68424 non-null  object 
 4   Dst Port           68424 non-null  int64  
 5   Protocol           68424 non-null  int64  
 6   Timestamp          68424 non-null  object 
 7   Flow Duration      68424 non-null  int64  
 8   Tot Fwd Pkts       68424 non-null  int64  
 9   Tot Bwd Pkts       68424 non-null  int64  
 10  TotLen Fwd Pkts    68424 non-null  float64
 11  TotLen Bwd Pkts    68424 non-null  float64
 12  Fwd Pkt Len Max    68424 non-null  int64  
 13  Fwd Pkt Len Min    68424 non-null  int64  
 14  Fwd Pkt Len Mean   68424 non-null  float64
 15  Fwd Pkt Len Std    68424 non-null  float64
 16  Bwd Pkt Len Max    684

In [5]:
len(data_norm)

68424

In [6]:
data_norm.head()

Unnamed: 0,Flow ID,Src IP,Src Port,Dst IP,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,185.127.17.56-192.168.20.133-443-53648-6,185.127.17.56,443,192.168.20.133,53648,6,5/2/2020 13:58,245230,44,40,124937.0,1071.0,9100,0,2839.477273,1839.508257,517,0,26.775,109.188026,513835.9907,342.535579,2954.578313,7953.221927,64066.0,-44.0,238564.0,5548.0,10446.29576,64066.0,2.0,245230.0,6287.948718,12986.46879,79070.0,29.0,0,0,0,0,880,804,179.423398,163.11218,0,9100,1482.447059,1933.268313,3737526.0,0,1,0,0,1,0,0,0,0,1500.095238,2839.477273,26.775,0,0,0,0,0,0,44,124937,40,1071,-1,65535,41,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal
1,185.127.17.56-192.168.20.133-443-53650-6,192.168.20.133,53650,185.127.17.56,443,6,5/2/2020 13:58,1605449,107,149,1071.0,439537.0,517,0,10.009346,67.49668,27300,0,2949.912752,3012.589539,274445.3421,159.456949,6295.878431,56408.33052,859760.0,-102.0,1332121.0,12567.17925,83434.14155,861138.0,2.0,1603130.0,10831.95946,73926.65245,861129.0,1.0,0,0,0,0,2140,3004,66.648022,92.808928,0,27300,1714.428016,2713.465917,7362897.0,0,1,0,0,0,0,0,0,1,1721.125,10.009346,2949.912752,0,0,0,0,0,0,107,1071,149,439537,-1,64240,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal
2,192.168.20.133-192.168.20.2-35108-53-6,192.168.20.133,35108,192.168.20.2,53,6,5/2/2020 13:58,53078,5,5,66.0,758.0,66,0,13.2,29.516097,638,0,151.6,276.826299,15524.32269,188.401974,5897.555556,15184.8452,46232.0,19.0,50302.0,12575.5,22521.87727,46251.0,67.0,52962.0,13240.5,22052.04405,46258.0,405.0,0,0,0,0,100,124,94.200987,94.200987,0,638,74.909091,190.807471,36407.49,0,1,0,0,0,0,0,0,1,82.4,13.2,151.6,0,0,0,0,0,0,5,66,5,758,-1,64240,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal
3,192.168.20.133-192.168.20.2-35108-53-6,192.168.20.2,53,192.168.20.133,35108,6,5/2/2020 13:58,6975,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,286.738351,6975.0,0.0,6975.0,6975.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,20,20,143.369176,143.369176,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,0,0,0,0,0,0,1,0,1,0,-1,64239,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal
4,154.59.122.74-192.168.20.133-443-60900-6,192.168.20.133,60900,154.59.122.74,443,6,5/2/2020 13:58,190141,13,16,780.0,11085.0,427,0,60.0,130.042942,2596,0,692.8125,794.15735,62401.06027,152.518394,6790.75,12933.29591,38521.0,-54.0,86882.0,7240.166667,13050.84163,38805.0,1.0,190141.0,12676.06667,15949.09279,38521.0,1.0,0,0,0,0,260,344,68.370315,84.14808,0,2596,395.5,661.691706,437835.9,0,1,0,0,0,0,0,0,1,409.137931,60.0,692.8125,0,0,0,0,0,0,13,780,16,11085,-1,64240,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal


In [7]:
data_norm.describe()

Unnamed: 0,Src Port,Dst Port,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0,68424.0
mean,37361.032459,9733.738557,11.418947,13626360.0,11.807217,18.592628,3354.497,41281.16,425.863644,22.297469,152.916592,161.690114,1598.379998,25.996273,329.728639,382.544557,333100.3,10563.28,539954.3,1403683.0,5429239.0,37466.41,13077820.0,1154751.0,1889172.0,5184220.0,92595.54,12653440.0,866958.0,1669539.0,5101721.0,34933.73,0.0,0.111116,0.0,0.0,228.190927,361.339764,5133.32117,5431.571,21.184614,1829.100769,253.771799,412.080065,1133756.0,0.035441,0.197255,0.000643,0.111116,0.300962,0.0,0.0,0.0,1.633126,283.11957,152.916592,329.728639,0.0,0.0,0.0,0.0,0.0,0.0,11.807217,3354.618,18.592628,41292.23,-1.0,28244.664474,3.891266,0.0,188769.7,143402.5,398071.8,85634.84,4508770.0,604118.4,5066966.0,3975015.0
std,20305.851419,19352.308798,5.551915,33601070.0,112.712183,235.949827,156099.8,768010.8,1425.400448,67.228213,577.496895,718.456389,6227.551502,41.485833,721.792759,1039.375939,1452918.0,123722.3,1880600.0,4114642.0,14501350.0,959403.4,33339580.0,4071300.0,6005940.0,14376500.0,1526305.0,32658240.0,2914850.0,5186963.0,14225930.0,940891.8,0.0,0.314278,0.0,0.0,2548.449351,4865.50867,61687.111518,62121.26,26.733622,6317.546711,484.963508,981.80284,12295080.0,0.184892,0.39793,0.02535,0.314278,0.458679,0.0,0.0,0.0,1.21519,510.125658,577.496895,721.792759,0.0,0.0,0.0,0.0,0.0,0.0,112.712183,156057.3,235.949827,769025.0,0.0,31715.554236,57.08234,0.0,1373001.0,1105629.0,2467808.0,1027435.0,13146300.0,3486207.0,14481060.0,12463150.0
min,0.0,0.0,0.0,-154.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-262711.9,-16949.15,-154.0,0.0,-154.0,-11570.0,0.0,0.0,0.0,0.0,0.0,-154.0,-154.0,0.0,-154.0,-154.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,34213.5,53.0,6.0,3134.0,1.0,2.0,30.0,137.0,30.0,0.0,20.5,0.0,77.0,0.0,30.4,0.0,146.1983,18.8289,1106.188,1239.055,2650.0,12.0,0.0,0.0,0.0,0.0,0.0,2652.0,1432.5,0.0,2269.0,1.0,0.0,0.0,0.0,0.0,8.0,24.0,1.194663,9.970001,0.0,178.0,61.333333,47.920072,2296.333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,66.909091,20.5,30.4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,30.0,2.0,137.0,-1.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,43383.0,443.0,6.0,4799.0,1.0,3.0,36.0,502.0,36.0,0.0,34.0,0.0,322.0,30.0,168.605263,156.0,74484.93,791.7656,1756.667,1963.307,4031.0,61.0,0.0,0.0,0.0,0.0,0.0,3970.0,2061.0,2101.521,3486.0,164.0,0.0,0.0,0.0,0.0,8.0,24.0,186.654225,515.0745,0.0,345.0,136.4,149.834732,22450.45,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,170.5,34.0,168.605263,0.0,0.0,0.0,0.0,0.0,0.0,1.0,36.0,3.0,502.0,-1.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,52105.25,443.0,17.0,712247.0,6.0,5.0,599.0,762.0,249.0,34.0,68.0,86.849692,438.0,34.0,235.333333,217.155091,185345.5,1240.31,64878.12,61793.06,520571.2,107.0,344682.0,30365.11,31778.76,226694.2,81.0,99583.25,17791.33,21544.63,52460.25,1224.25,0.0,0.0,0.0,0.0,120.0,124.0,319.081047,914.3554,34.0,517.0,169.4,204.045214,41634.45,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,225.0,68.0,235.333333,0.0,0.0,0.0,0.0,0.0,0.0,6.0,599.0,5.0,762.0,-1.0,64240.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,65513.0,60998.0,17.0,120000000.0,16928.0,34094.0,31600000.0,107000000.0,64239.0,3900.0,32119.5,45423.83252,64239.0,761.0,22660.72269,32119.5,86300000.0,3000000.0,119000000.0,62400000.0,119000000.0,119000000.0,120000000.0,118000000.0,84400000.0,119000000.0,118000000.0,120000000.0,65000000.0,68300000.0,119000000.0,65000000.0,0.0,1.0,0.0,0.0,360448.0,681904.0,1000000.0,2000000.0,448.0,64239.0,18354.0,31345.42239,983000000.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,7.0,21413.0,32119.5,22660.72269,0.0,0.0,0.0,0.0,0.0,0.0,16928.0,31565150.0,34094.0,107442000.0,-1.0,65535.0,11180.0,0.0,104000000.0,68300000.0,104000000.0,104000000.0,119000000.0,73000000.0,119000000.0,119000000.0


In [8]:
data_norm['Label'].value_counts()

Normal    68424
Name: Label, dtype: int64

## Metasploitable Data

In [9]:
data_meta['Label'].value_counts()

DDoS     73529
Probe    61757
DoS       1145
BFA        295
U2R         17
Name: Label, dtype: int64

In [10]:
data_meta.describe()

Unnamed: 0,Src Port,Dst Port,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0,136743.0
mean,7584.451285,11313.557747,2.796172,8394408.0,0.926951,3.091632,8.757662,8.173025,8.036229,0.001638,2.502787,4.41429,8.139744,0.054701,1.50381,3.389199,2951.329,76047.96,936225.2,2735042.0,8174313.0,1568.36,224328.8,38715.1,24439.33,60907.57,11564.1,8362434.0,1396225.0,3337970.0,8165058.0,1367.21,0.0,0.002969,0.0,0.0,30.989228,66.576629,1090.034482,74957.928026,0.000644,8.245819,1.681206,3.459486,44.291791,0.268913,0.191937,0.001514,0.002969,0.272475,0.0,0.0,0.0,0.447277,1.886816,2.502787,1.50381,0.0,0.0,0.0,0.0,0.0,0.0,0.926951,8.757662,3.091632,8.173025,-1.0,62.413652,0.298231,0.0,15119.78,4731.904,20836.35,12284.48,8143571.0,13755.85,8156480.0,8132080.0
std,15980.548414,17724.142526,2.994221,21385430.0,1.744029,1.982729,22.970945,15.063613,13.315169,0.235716,4.227463,7.338656,13.404118,1.282764,2.820743,5.690065,60289.87,103127.7,2371374.0,7005367.0,20999740.0,116792.5,4109885.0,620037.1,358164.0,888149.3,353722.6,21349750.0,3543600.0,8576423.0,21001520.0,8423.584,0.0,0.054408,0.0,0.0,56.280062,90.036957,31070.34067,88883.406997,0.168273,13.491232,2.912655,5.685419,79.12555,0.443396,0.393825,0.038878,0.054408,0.445234,0.0,0.0,0.0,0.773919,3.398579,4.227463,2.820743,0.0,0.0,0.0,0.0,0.0,0.0,1.744029,22.970945,1.982729,15.063613,0.0,387.333514,0.81267,0.0,224263.4,110032.1,281997.1,212440.2,20999770.0,315760.9,21005130.0,20999020.0
min,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04810207,1.0,0.0,1.0,-125.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014711,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,16.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,262.3166,16.0,0.0,16.0,12.0,0.0,0.0,0.0,0.0,0.0,16.0,16.0,0.0,16.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,243.412653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,33.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60606.06,33.0,0.0,33.0,16.0,0.0,0.0,0.0,0.0,0.0,32.0,32.0,0.0,32.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60606.06061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,80.0,33915.5,6.0,10210.0,3.0,4.0,30.0,30.0,30.0,0.0,7.5,15.0,30.0,0.0,4.285714,11.338934,0.9530518,125000.0,5966.0,807.1555,8375.0,30.0,1868.0,785.6667,863.1024,1669.0,77.0,8413.5,6824.333,1026.27,8077.0,156.0,0.0,0.0,0.0,0.0,104.0,144.0,0.047637,125000.0,0.0,30.0,5.454545,12.135598,147.272727,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,6.0,7.5,4.285714,0.0,0.0,0.0,0.0,0.0,0.0,3.0,30.0,4.0,30.0,-1.0,63.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,65518.0,58178.0,17.0,119965400.0,84.0,87.0,2460.0,2498.0,287.0,44.0,62.5,115.477704,104.0,44.0,44.0,45.337255,4666667.0,2000000.0,41600000.0,45800000.0,120000000.0,41600000.0,109000000.0,13800000.0,7625765.0,15100000.0,11400000.0,120000000.0,39700000.0,56100000.0,120000000.0,1031266.0,0.0,1.0,0.0,0.0,2696.0,2800.0,1000000.0,1000000.0,44.0,287.0,44.0,85.137812,7248.44697,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,5.0,66.0,62.5,44.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,2460.0,87.0,2498.0,-1.0,5840.0,82.0,0.0,4575740.0,3366629.0,4807377.0,4575740.0,120000000.0,9274678.0,120000000.0,120000000.0


In [11]:
data_meta['Timestamp']

0          10/1/2020 5:02
1          10/1/2020 5:02
2          10/1/2020 5:02
3          10/1/2020 1:39
4          10/1/2020 1:39
               ...       
136738    10/1/2020 20:36
136739    10/1/2020 20:36
136740     10/1/2020 4:41
136741     10/1/2020 4:41
136742     10/1/2020 4:41
Name: Timestamp, Length: 136743, dtype: object

In [12]:
data_ovs['Label'].value_counts()

DoS           52471
DDoS          48413
Probe         36372
BFA            1110
Web-Attack      192
BOTNET          164
Name: Label, dtype: int64

In [13]:
data_ovs.describe()

Unnamed: 0,Src Port,Dst Port,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0,138722.0
mean,28590.150805,4058.269092,3.911492,1705513.0,8.533751,2.95065,149.048759,292.504022,68.814644,0.026211,30.453656,41.11356,217.870792,0.021366,49.689263,89.839801,17572.731814,532892.9,222638.0,510914.5,1510970.0,25381.15,606068.5,112205.9,188488.3,431495.3,5020.109,1533497.0,411302.8,724376.0,1492555.0,37475.68,0.0,0.075777,0.0,0.028827,42.502963,67.32648,1849.774102,531043.1,0.021366,232.477365,40.564308,77.829315,35137.435389,0.058808,0.30089,0.000822,0.075777,0.262785,0.028827,0.0,0.0,0.541313,45.467341,30.453656,49.689263,0.0,0.0,0.0,0.0,0.0,0.0,8.533751,149.048759,2.95065,292.504022,-1.0,264.997477,1.360224,0.0,50271.55,1583.963,52426.07,49411.42,1420137.0,46330.44,1468712.0,1380748.0
std,23855.062703,11594.755381,2.877929,11072960.0,2445.718229,14.918974,1935.634892,741.152741,194.641622,0.962083,75.575257,101.877461,497.092119,0.877963,113.725114,203.82982,52223.440254,797947.6,1941544.0,3625125.0,10364830.0,1245281.0,6242851.0,1280338.0,2349113.0,4987373.0,137900.8,10591860.0,3226534.0,5233946.0,10431210.0,1631001.0,0.0,0.264643,0.0,0.167322,596.607226,480.067202,9464.28006,799063.2,0.877963,515.154413,87.959755,170.529302,84145.832134,0.235266,0.458646,0.028655,0.264643,0.440148,0.167322,0.0,0.0,1.028293,98.494423,75.575257,113.725114,0.0,0.0,0.0,0.0,0.0,0.0,2445.718229,1935.634892,14.918974,741.152741,0.0,3264.536634,47.433114,0.0,734199.4,77114.22,747673.5,732534.8,10249880.0,744302.3,10366890.0,10203320.0
min,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0244017,1.0,0.0,1.0,-634.0,0.0,0.0,0.0,0.0,-30.0,0.0,0.0,0.0,0.0,-634.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01177672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,477.327,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,391.9263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,37831.0,80.0,6.0,2208.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1154.83,1293.0,0.0,2152.0,6.0,0.0,0.0,0.0,0.0,0.0,1781.0,1296.292,0.0,1750.5,5.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,964.5062,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,48394.0,1058.0,6.0,7661.75,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000000.0,3365.256,2123.544,6744.0,1631.0,96.0,96.0,0.0,96.0,65.0,6972.75,3655.788,1720.301,6385.0,1573.0,0.0,0.0,0.0,0.0,64.0,72.0,99.072434,1000000.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,-1.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,63020.0,65389.0,17.0,119786100.0,910748.0,2039.0,77600.0,80143.0,1448.0,44.0,1201.181818,625.38812,2063.0,44.0,1068.573333,922.601648,958915.3657,2000000.0,82000000.0,57800000.0,82200000.0,82000000.0,118000000.0,41000000.0,58000000.0,82200000.0,18600000.0,120000000.0,84800000.0,57700000.0,85000000.0,84800000.0,0.0,1.0,0.0,1.0,65152.0,65264.0,1000000.0,2000000.0,44.0,2063.0,633.572519,725.954335,527009.6964,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,10.0,638.446154,1201.181818,1068.573333,0.0,0.0,0.0,0.0,0.0,0.0,910748.0,77600.0,2039.0,80143.0,-1.0,64240.0,2425.0,0.0,68100000.0,6092367.0,68100000.0,68100000.0,82200000.0,17800000.0,82200000.0,82200000.0


In [14]:
data_ovs['Timestamp']

0                 12/1/2020 1:14
1                 12/1/2020 1:14
2                 12/1/2020 1:14
3                 12/1/2020 1:14
4                 12/1/2020 1:14
                   ...          
138717    25/12/2019 05:19:55 PM
138718    25/12/2019 05:20:00 PM
138719    25/12/2019 05:20:00 PM
138720    25/12/2019 05:20:05 PM
138721    25/12/2019 05:20:05 PM
Name: Timestamp, Length: 138722, dtype: object

## Time Analysis

For the analysis purposes, merge all three together.

In [15]:
# As we noticed, timestamp format of these files is different.
# Therefore, convert the timestamps from the string format into a timestamp structure and merge afterwards
data_norm['tstamp'] = pd.to_datetime(data_norm['Timestamp'])
data_meta['tstamp'] = pd.to_datetime(data_meta['Timestamp'])
data_ovs['tstamp'] = pd.to_datetime(data_ovs['Timestamp'])

In [16]:
data_all = pd.concat([data_norm, data_meta, data_ovs])

In [17]:
len(data_all)

343889

In [18]:
# As we can see, timestamp are now in a unified datetime64[ns] format
data_all['tstamp']

0        2020-05-02 13:58:00
1        2020-05-02 13:58:00
2        2020-05-02 13:58:00
3        2020-05-02 13:58:00
4        2020-05-02 13:58:00
                 ...        
138717   2019-12-25 17:19:55
138718   2019-12-25 17:20:00
138719   2019-12-25 17:20:00
138720   2019-12-25 17:20:05
138721   2019-12-25 17:20:05
Name: tstamp, Length: 343889, dtype: datetime64[ns]

In [19]:
timestamps_sorted = data_all['tstamp'].sort_values(ascending=False)
timestamps_sorted

376      2020-12-01 01:27:00
479      2020-12-01 01:27:00
486      2020-12-01 01:27:00
485      2020-12-01 01:27:00
484      2020-12-01 01:27:00
                 ...        
138571   2019-12-25 17:16:49
138566   2019-12-25 17:16:48
138569   2019-12-25 17:16:48
138567   2019-12-25 17:16:48
138568   2019-12-25 17:16:48
Name: tstamp, Length: 343889, dtype: datetime64[ns]

In [20]:
# Compute span
span = timestamps_sorted.iloc[0] - timestamps_sorted.iloc[-1]
span

Timedelta('341 days 08:10:12')

In [21]:
# Are there gaps in the data (was the capture interrupted?)
def measure_real_capture_dur(data: pd.Series, gap_max_secs: int = 300) -> float:
    """Computes total timespan of the capture. Expects iterable containing timestamps objects sorted in a descending manner"""
    total_dur = pd.Timedelta(seconds=0)
    current_dur = pd.Timedelta(seconds=0)
    cont_durations = []
    last_tstamp = data.iloc[0]
    contiguous = True

    # Iterate through the dataframe to find out gaps
    for cur_tstamp in data:
        dur_gap = last_tstamp - cur_tstamp

        if dur_gap <= pd.Timedelta(seconds=gap_max_secs):
            total_dur += dur_gap
            current_dur += dur_gap
        else:
            cont_durations.append(current_dur)
            current_dur = pd.Timedelta(seconds=0)

            if contiguous:
                contiguous = False

        last_tstamp = cur_tstamp

    # Log the final continuous block and sort them via their length
    cont_durations.append(current_dur)
    cont_durations.sort(reverse=True)

    return total_dur, contiguous, cont_durations

In [22]:
measure_real_capture_dur(timestamps_sorted, int(span.total_seconds() * 0.01))

(Timedelta('5 days 06:39:09'),
 False,
 [Timedelta('2 days 00:56:10'),
  Timedelta('1 days 09:15:59'),
  Timedelta('0 days 19:59:00'),
  Timedelta('0 days 07:25:00'),
  Timedelta('0 days 06:07:00'),
  Timedelta('0 days 06:03:00'),
  Timedelta('0 days 04:40:00'),
  Timedelta('0 days 00:13:00')])

The capture is apparently scattered across a few hours throughout a year. The capture is discontinous.