# BOUN DDoS Dataset Analysis

* **Author:** Patrik Goldschmidt (igoldschmidt@fit.vut.cz)
* **Project:** Network Intrusion Datasets: A Survey, Limitations, and Recommendations
* **Date:** 2024

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# There are essentially 2 ddos files with features based on frames, distinguished by TCP/UDP protocols
DATA_TCP = '/data/bounddos/BOUN_TCP_Anon.csv'
DATA_UDP = '/data/bounddos/BOUN_UDP_Anon.csv'

In [3]:
tcp = pd.read_csv(DATA_TCP)
udp = pd.read_csv(DATA_UDP)

In [6]:
# Give us a total length
len(tcp) + len(udp)

17382940

In [4]:
tcp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9335605 entries, 0 to 9335604
Data columns (total 12 columns):
 #   Column            Dtype  
---  ------            -----  
 0   Time              float64
 1   Frame Number      int64  
 2   Frame_length      int64  
 3   Source_ip         object 
 4   Destination_IP    object 
 5   Source_Port       float64
 6   Destination_Port  float64
 7   SYN               object 
 8   ACK               object 
 9   RST               object 
 10  TTL               object 
 11  TCP_Protocol      object 
dtypes: float64(3), int64(2), object(7)
memory usage: 854.7+ MB


In [8]:
udp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8047335 entries, 0 to 8047334
Data columns (total 12 columns):
 #   Column            Dtype  
---  ------            -----  
 0   Time              float64
 1   Frame Number      int64  
 2   Frame_length      int64  
 3   Source_ip         object 
 4   Destination_IP    object 
 5   Source_Port       float64
 6   Destination_Port  float64
 7   SYN               object 
 8   ACK               object 
 9   RST               object 
 10  TTL               object 
 11  TCP_Protocol      object 
dtypes: float64(3), int64(2), object(7)
memory usage: 736.8+ MB


In [5]:
tcp.head()

Unnamed: 0,Time,Frame Number,Frame_length,Source_ip,Destination_IP,Source_Port,Destination_Port,SYN,ACK,RST,TTL,TCP_Protocol
0,0.0,1,64,10.50.197.71,10.50.192.199,49543.0,443.0,Not set,Set,Not set,127,TCP
1,2e-06,2,2978,10.50.192.199,10.50.197.71,443.0,49543.0,Not set,Set,Not set,127,TCP
2,4e-05,3,64,10.50.197.71,10.50.192.199,49543.0,443.0,Not set,Set,Not set,127,TCP
3,4.2e-05,4,4438,10.50.192.199,10.50.197.71,443.0,49543.0,Not set,Set,Not set,127,TCP
4,5.3e-05,5,64,10.50.197.71,10.50.192.199,49543.0,443.0,Not set,Set,Not set,127,TCP


In [7]:
# Get the total time of the TCP capture
tcp.iloc[-1]['Time']

482.662914

In [9]:
# Get the total time of the UDP capture
udp.iloc[-1]['Time']

484.638749

In [10]:
# Get the total time
tcp.iloc[-1]['Time'] + udp.iloc[-1]['Time']

967.301663

## Search for Attacking Traffic

In [23]:
# Victim IP based on the documentation
victim_ip = '10.50.199.86'

In [16]:
tcp['Source_ip'].value_counts()

Source_ip
10.50.197.71                1027055
10.50.211.244                455906
92.45.54.178                 394649
e8:e7:32:70:9b:c7            387919
10.50.192.199                375232
                             ...   
119.8.142.82                      1
200.75.168.203                    1
168.212.173.238                   1
165.250.129.151                   1
fe80::e48:85ff:feff:dbbb          1
Name: count, Length: 137521, dtype: int64

In [17]:
tcp['Destination_IP'].value_counts()

Destination_IP
10.50.197.71      754501
216.58.208.111    611887
10.50.209.134     394654
10.50.192.199     369086
10.50.211.244     354571
                   ...  
213.248.98.86          1
157.56.52.20           1
111.221.77.175         1
77.58.129.241          1
17.248.146.139         1
Name: count, Length: 14630, dtype: int64

In [27]:
# How many Destination IP victim packets are there for the TCP attack?
(tcp['Destination_IP'] == victim_ip).sum()

125557

In line with the documentation

In [28]:
(udp['Destination_IP'] == victim_ip).sum()

260646

In [29]:
# Does the timestamp and the packet ID of the 1st attack IP in line with the documentation?
tcp[tcp['Destination_IP'] == victim_ip]

Unnamed: 0,Time,Frame Number,Frame_length,Source_ip,Destination_IP,Source_Port,Destination_Port,SYN,ACK,RST,TTL,TCP_Protocol
1945580,80.222685,1945581,64,247.63.41.216,10.50.199.86,1984.0,80.0,Set,Not set,Not set,63,TCP
1945607,80.224990,1945608,64,56.122.27.45,10.50.199.86,1986.0,80.0,Set,Not set,Not set,63,TCP
1945622,80.226084,1945623,64,100.77.175.130,10.50.199.86,1987.0,80.0,Set,Not set,Not set,63,TCP
1945651,80.227203,1945652,64,135.63.45.32,10.50.199.86,1988.0,80.0,Set,Not set,Not set,63,TCP
1945673,80.228548,1945674,64,211.89.143.211,10.50.199.86,1989.0,80.0,Set,Not set,Not set,63,TCP
...,...,...,...,...,...,...,...,...,...,...,...,...
7885573,402.355771,7885574,64,179.211.46.142,10.50.199.86,52026.0,80.0,Set,Not set,Not set,63,TCP
7885580,402.356217,7885581,64,53.226.67.150,10.50.199.86,52027.0,80.0,Set,Not set,Not set,63,TCP
7885591,402.356663,7885592,64,14.183.36.188,10.50.199.86,52028.0,80.0,Set,Not set,Not set,63,TCP
7885596,402.357107,7885597,64,103.154.139.160,10.50.199.86,52029.0,80.0,Set,Not set,Not set,63,TCP
