In [1]:
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, make_scorer, f1_score, roc_auc_score, roc_curve, auc, classification_report, confusion_matrix
import seaborn as sns
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
path = os.getcwd()

In [3]:
data = pd.read_csv(f'{path}/data/netdump_workHrs-2024-02-02_07.00.01.pcap_lycos.csv')

In [4]:
data.head()

Unnamed: 0,flow_id,src_addr,src_port,dst_addr,dst_port,ip_prot,timestamp,flow_duration,down_up_ratio,pkt_len_max,pkt_len_min,pkt_len_mean,pkt_len_var,pkt_len_std,bytes_per_s,pkt_per_s,fwd_pkt_per_s,bwd_pkt_per_s,fwd_pkt_cnt,fwd_pkt_len_tot,fwd_pkt_len_max,fwd_pkt_len_min,fwd_pkt_len_mean,fwd_pkt_len_std,fwd_pkt_hdr_len_tot,fwd_pkt_hdr_len_min,fwd_non_empty_pkt_cnt,bwd_pkt_cnt,bwd_pkt_len_tot,bwd_pkt_len_max,bwd_pkt_len_min,bwd_pkt_len_mean,bwd_pkt_len_std,bwd_pkt_hdr_len_tot,bwd_pkt_hdr_len_min,bwd_non_empty_pkt_cnt,iat_max,iat_min,iat_mean,iat_std,fwd_iat_tot,fwd_iat_max,fwd_iat_min,fwd_iat_mean,fwd_iat_std,bwd_iat_tot,bwd_iat_max,bwd_iat_min,bwd_iat_mean,bwd_iat_std,active_max,active_min,active_mean,active_std,idle_max,idle_min,idle_mean,idle_std,flag_SYN,flag_fin,flag_rst,flag_ack,flag_psh,fwd_flag_psh,bwd_flag_psh,flag_urg,fwd_flag_urg,bwd_flag_urg,flag_cwr,flag_ece,fwd_bulk_bytes_mean,fwd_bulk_pkt_mean,fwd_bulk_rate_mean,bwd_bulk_bytes_mean,bwd_bulk_pkt_mean,bwd_bulk_rate_mean,fwd_subflow_bytes_mean,fwd_subflow_pkt_mean,bwd_subflow_bytes_mean,bwd_subflow_pkt_mean,fwd_tcp_init_win_bytes,bwd_tcp_init_win_bytes,label
0,35.241.53.87-10.38.195.145-443-45614-6,35.241.53.87,443,10.38.195.145,45614,6,1706868024264707,5000049,0.0,73.0,56.0,70.166667,48.166667,6.940221,84.19917,1.199988,1.199988,0.0,6,421,73.0,56.0,70.166667,6.940221,192,32,6,0,0,0.0,0.0,0.0,0.0,0,0,0,2303447.0,374327.0,1000009.8,783876.1,5000049,2303447.0,374327.0,1000009.8,783876.1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,6,6,6,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,140.333333,2.0,0.0,0.0,274,-1,NeedLabel
1,31.13.67.32-10.38.76.94-443-54648-6,31.13.67.32,443,10.38.76.94,54648,6,1706868022900442,8576531,0.0,46.0,24.0,35.0,242.0,15.556349,8.161808,0.233195,0.233195,0.0,2,70,46.0,24.0,35.0,15.556349,64,32,2,0,0,0.0,0.0,0.0,0.0,0,0,0,8576531.0,8576531.0,8576531.0,0.0,8576531,8576531.0,8576531.0,8576531.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,2,2,2,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,1.0,0.0,0.0,273,-1,NeedLabel
2,31.13.67.32-10.38.2.184-443-43372-6,31.13.67.32,443,10.38.2.184,43372,6,1706868005817925,75680036,0.0,46.0,24.0,41.6,96.8,9.838699,2.748413,0.066068,0.066068,0.0,5,208,46.0,24.0,41.6,9.838699,160,32,5,0,0,0.0,0.0,0.0,0.0,0,0,0,37573442.0,4825207.0,18920009.0,14315710.0,75680036,37573442.0,4825207.0,18920009.0,14315710.0,0,0.0,0.0,0.0,0.0,4825207.0,4825207.0,4825207.0,0.0,22017221.0,11264166.0,16640693.5,7603558.0,0,1,0,5,5,5,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,41.6,1.0,0.0,0.0,265,-1,NeedLabel
3,190.98.126.8-10.37.13.26-443-59588-6,190.98.126.8,443,10.37.13.26,59588,6,1706868090818442,2,0.0,24.0,0.0,12.0,288.0,16.970563,12000000.0,1000000.0,1000000.0,0.0,2,24,24.0,0.0,12.0,16.970563,64,32,1,0,0,0.0,0.0,0.0,0.0,0,0,0,2.0,2.0,2.0,0.0,2,2.0,2.0,2.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,2,1,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,2.0,0.0,0.0,502,-1,NeedLabel
4,146.75.124.84-10.38.195.93-443-44062-6,146.75.124.84,443,10.38.195.93,44062,6,1706868096157540,48,0.0,51.0,0.0,25.0,651.0,25.514702,1562500.0,62500.0,62500.0,0.0,3,75,51.0,0.0,25.0,25.514702,96,32,2,0,0,0.0,0.0,0.0,0.0,0,0,0,46.0,2.0,24.0,31.1127,48,46.0,2.0,24.0,31.1127,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,3,2,2,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,75.0,3.0,0.0,0.0,287,-1,NeedLabel


In [5]:
attack_data = data[(data['src_addr'] == '10.37.7.254') & (data['dst_addr'] == '10.37.26.145')]

In [6]:
attack_data.head()

Unnamed: 0,flow_id,src_addr,src_port,dst_addr,dst_port,ip_prot,timestamp,flow_duration,down_up_ratio,pkt_len_max,pkt_len_min,pkt_len_mean,pkt_len_var,pkt_len_std,bytes_per_s,pkt_per_s,fwd_pkt_per_s,bwd_pkt_per_s,fwd_pkt_cnt,fwd_pkt_len_tot,fwd_pkt_len_max,fwd_pkt_len_min,fwd_pkt_len_mean,fwd_pkt_len_std,fwd_pkt_hdr_len_tot,fwd_pkt_hdr_len_min,fwd_non_empty_pkt_cnt,bwd_pkt_cnt,bwd_pkt_len_tot,bwd_pkt_len_max,bwd_pkt_len_min,bwd_pkt_len_mean,bwd_pkt_len_std,bwd_pkt_hdr_len_tot,bwd_pkt_hdr_len_min,bwd_non_empty_pkt_cnt,iat_max,iat_min,iat_mean,iat_std,fwd_iat_tot,fwd_iat_max,fwd_iat_min,fwd_iat_mean,fwd_iat_std,bwd_iat_tot,bwd_iat_max,bwd_iat_min,bwd_iat_mean,bwd_iat_std,active_max,active_min,active_mean,active_std,idle_max,idle_min,idle_mean,idle_std,flag_SYN,flag_fin,flag_rst,flag_ack,flag_psh,fwd_flag_psh,bwd_flag_psh,flag_urg,fwd_flag_urg,bwd_flag_urg,flag_cwr,flag_ece,fwd_bulk_bytes_mean,fwd_bulk_pkt_mean,fwd_bulk_rate_mean,bwd_bulk_bytes_mean,bwd_bulk_pkt_mean,bwd_bulk_rate_mean,fwd_subflow_bytes_mean,fwd_subflow_pkt_mean,bwd_subflow_bytes_mean,bwd_subflow_pkt_mean,fwd_tcp_init_win_bytes,bwd_tcp_init_win_bytes,label
326,10.37.7.254-10.37.26.145-46458-5900-6,10.37.7.254,46458,10.37.26.145,5900,6,1706868301809249,342,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8771.93,8771.93,0.0,3,0,0.0,0.0,0.0,0.0,104,32,0,0,0,0.0,0.0,0.0,0.0,0,0,0,341.0,1.0,171.0,240.416306,342,341.0,1.0,171.0,240.416306,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,1,2,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64240,-1,NeedLabel
3434,10.37.7.254-10.37.26.145-0-0-1,10.37.7.254,0,10.37.26.145,0,1,1706868301761873,1,0.0,12.0,0.0,6.0,72.0,8.485281,12000000.0,2000000.0,2000000.0,0.0,2,12,12.0,0.0,6.0,8.485281,16,8,1,0,0,0.0,0.0,0.0,0.0,0,0,0,1.0,1.0,1.0,0.0,1,1.0,1.0,1.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,2.0,0.0,0.0,-1,-1,NeedLabel
14750,10.37.7.254-10.37.26.145-0-0-1,10.37.7.254,0,10.37.26.145,0,1,1706869801333114,0,0.0,12.0,0.0,6.0,72.0,8.485281,0.0,0.0,0.0,0.0,2,12,12.0,0.0,6.0,8.485281,16,8,1,0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,2.0,0.0,0.0,-1,-1,NeedLabel
14752,10.37.7.254-10.37.26.145-49251-5900-6,10.37.7.254,49251,10.37.26.145,5900,6,1706873401678897,617,0.0,8.0,0.0,4.0,32.0,5.656854,12965.96,3241.491,3241.491,0.0,2,8,8.0,0.0,4.0,5.656854,20,0,1,0,0,0.0,0.0,0.0,0.0,0,0,0,617.0,617.0,617.0,0.0,617,617.0,617.0,617.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,2.0,0.0,0.0,0,-1,NeedLabel
14753,10.37.7.254-10.37.26.145-49251-80-6,10.37.7.254,49251,10.37.26.145,80,6,1706873401678948,568,0.0,8.0,0.0,4.0,32.0,5.656854,14084.51,3521.127,3521.127,0.0,2,8,8.0,0.0,4.0,5.656854,20,0,1,0,0,0.0,0.0,0.0,0.0,0,0,0,568.0,568.0,568.0,0.0,568,568.0,568.0,568.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,2.0,0.0,0.0,0,-1,NeedLabel


In [9]:
data.label[(data['src_addr'] == '10.37.7.254') & (data['dst_addr'] == '10.37.26.145')] = 1

In [10]:
data.label[data.label != 1] = 0

In [11]:
data.label.value_counts()

label
0    297532
1     69084
Name: count, dtype: int64