In [1]:
!pip install stable-baselines3[extra] sb3-contrib imbalanced-learn > /dev/null 2>&1

In [1]:
import pandas as pd
import gym
import random
import numpy as np
import torch as th
import matplotlib.pyplot as plt
import seaborn as sns
import os

from imblearn.over_sampling import RandomOverSampler, SMOTE


from scipy.stats import gmean

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy

## Reward factor and minority classes

In [2]:
train = pd.read_csv("/notebooks/FinalDataset/top003_train_encoded.csv")
y_train = train['attack_cat']

rw_factor = compute_class_weight(class_weight='balanced',
                                 classes=np.unique(y_train),
                                 y=y_train)

rw_factor = rw_factor.tolist()

minority_classes = np.where(rw_factor > gmean(rw_factor))[0]

print(rw_factor)
print(minority_classes)

proportions = y_train.value_counts(normalize=True) * 100
print(proportions)

[6.820410868124585, 7.859488354333715, 1.1188476695203153, 0.41002350503963986, 0.7544071834341213, 0.31105872397013934, 1.3080418136180219, 12.520681265206813, 114.35555555555555]
[0 1 7 8]
5    35.720301
3    27.098717
4    14.728268
2     9.930852
6     8.494462
0     1.629097
1     1.413719
7     0.887421
8     0.097163
Name: attack_cat, dtype: float64


## Environment

In [3]:
class TrainEnv(gym.Env):
    def __init__(self, dataset, minority_classes, rw_factor=None):
        
        super().__init__()

        self.x, self.y = dataset
        self.minority_classes = minority_classes
        self.rw_factor = rw_factor
        self.cnt = 0
        self.idx = random.randint(0, len(self.x) - 1)
        self.action_space = gym.spaces.Discrete(len(np.unique(self.y)))
        self.observation_space = gym.spaces.Box(low=-1, high=1,
                                                shape=(self.x.shape[1], ),
                                                dtype=np.float32)        

    def step(self, action):
        done = False
        
        if int(action == self.expected_action):
            if self.rw_factor == None:
                reward = 1 
            else: 
                reward = self.rw_factor[self.expected_action]
        else:
            if self.rw_factor == None:
                reward = -1
            else:
                reward = -self.rw_factor[self.expected_action]
            if self.expected_action in self.minority_classes:
                done = True
                
        done = True
        
        self.cnt += 1
        if self.cnt >= len(self.x):
            done = True
            
        self.idx += 1
        if self.idx >= len(self.x):
            self.idx = 0
            
        obs = self.seq_observation()
        
        return obs, reward, done, {}

    def reset(self):
        obs = self.seq_observation()
        self.cnt = 0
        return obs
    
    def seq_observation(self):        
        obs = self.x[self.idx]
        self.expected_action = self.y[self.idx]
        return obs

## Callback

In [4]:
class SaveBest(BaseCallback):
    def __init__(self, check_freq: int, log_dir: str, eval_set, name: str, verbose: int = 0, patience: int = 50, max_score=-np.inf):
        super(SaveBest, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, name)        
        self.max_score = max_score
        self.patience = patience
        self.failures = 0
        self.x_test, self.y_test = eval_set
            
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            y_pred, _ = self.model.predict(self.x_test, deterministic=True)
            current_score = metrics.f1_score(self.y_test, y_pred, average="macro", zero_division=0)
            diff = current_score - self.max_score
            
            if diff >= 0.001:
                self.max_score = current_score
                print(f'New best F1 score at {self.num_timesteps}: {self.max_score}')
                # print(classification_report(self.y_test, y_pred, zero_division=0))
                print('----- ----- -----')
                self.model.save(self.save_path)
                self.failures = 0
            
            else:
                self.failures += 1
                # print(f'Same F1 score at {self.num_timesteps}: {self.max_score}')
                # print('----- ----- -----')
                if self.failures >= self.patience:
                    print(f'Early training stop at {self.num_timesteps}')
                    return False
            
            return True

## Parallel environment setup

In [5]:
def make_env(rank, dataset, rw_factor, minority_classes, seed=0):
    def _init():
        env = TrainEnv(dataset=dataset, rw_factor=None, minority_classes=minority_classes)
        env = Monitor(env, './')
        env.seed(seed + rank)
        return env
    set_random_seed(seed)
    return _init

n_parallel_env = 8

## 0.03

In [6]:
train = pd.read_csv("/notebooks/FinalDataset/top003_train_encoded.csv")
test = pd.read_csv("/notebooks/FinalDataset/top003_test_encoded.csv")

x_train = train.drop('attack_cat', axis=1)
y_train = train['attack_cat']

x_test = test.drop('attack_cat', axis=1)
y_test = test['attack_cat']

del train, test

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(123504, 22)
(123504,)
(41169, 22)
(41169,)


### Unbalanced

In [8]:
np.unique(y_train.values)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [9]:
env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_003_Unbalanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_003_Unbalanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

New best F1 score at 8000: 0.031374123827568864
----- ----- -----
New best F1 score at 24000: 0.21097027599912668
----- ----- -----
New best F1 score at 40000: 0.2258962711070274
----- ----- -----
New best F1 score at 56000: 0.23650045820163149
----- ----- -----
New best F1 score at 72000: 0.24333462883132087
----- ----- -----
New best F1 score at 88000: 0.27056283946369775
----- ----- -----
New best F1 score at 104000: 0.27098165293611526
----- ----- -----
New best F1 score at 120000: 0.2866390033802586
----- ----- -----
New best F1 score at 136000: 0.30834977541480374
----- ----- -----
New best F1 score at 152000: 0.3096915739723109
----- ----- -----
New best F1 score at 168000: 0.31004517933585163
----- ----- -----
New best F1 score at 216000: 0.31015523627408
----- ----- -----
New best F1 score at 232000: 0.3121613396242529
----- ----- -----
New best F1 score at 248000: 0.31307764354979856
----- ----- -----
New best F1 score at 264000: 0.31345037133023823
----- ----- -----
New best

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Balanced

In [7]:
oversample = SMOTE(sampling_strategy='not majority', random_state=42)
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = shuffle(x_train, y_train, random_state=42)

proportions = y_train.value_counts(normalize=True) * 100
print(proportions)

env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_003_Balanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_003_Balanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

1    11.111111
5    11.111111
0    11.111111
6    11.111111
7    11.111111
8    11.111111
2    11.111111
4    11.111111
3    11.111111
Name: attack_cat, dtype: float64
New best F1 score at 8000: 0.031374123827568864
----- ----- -----
New best F1 score at 24000: 0.29451802355876033
----- ----- -----
New best F1 score at 168000: 0.30727814228432204
----- ----- -----
New best F1 score at 184000: 0.3095614241061741
----- ----- -----
New best F1 score at 200000: 0.3150830337027636
----- ----- -----
New best F1 score at 328000: 0.32482166083190456
----- ----- -----
New best F1 score at 368000: 0.3348677120851134
----- ----- -----
New best F1 score at 384000: 0.3388967865246352
----- ----- -----
New best F1 score at 416000: 0.3427033993931456
----- ----- -----
New best F1 score at 432000: 0.3470447487651249
----- ----- -----
New best F1 score at 448000: 0.35251886961829404
----- ----- -----
New best F1 score at 480000: 0.37068925083387233
----- ----- -----
New best F1 score at 608000: 0.38093

## 0.02

In [8]:
train = pd.read_csv("/notebooks/FinalDataset/top002_train_encoded.csv")
test = pd.read_csv("/notebooks/FinalDataset/top002_test_encoded.csv")

x_train = train.drop('attack_cat', axis=1)
y_train = train['attack_cat']

x_test = test.drop('attack_cat', axis=1)
y_test = test['attack_cat']

del train, test

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(123504, 154)
(123504,)
(41169, 154)
(41169,)


### Unbalanced

In [7]:
env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_002_Unbalanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_002_Unbalanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

New best F1 score at 8000: 0.06248339818900403
----- ----- -----
New best F1 score at 24000: 0.2974124781916323
----- ----- -----
New best F1 score at 40000: 0.32567643101881283
----- ----- -----
New best F1 score at 88000: 0.34007118012323007
----- ----- -----
New best F1 score at 104000: 0.3443251316523072
----- ----- -----
New best F1 score at 168000: 0.348157188171964
----- ----- -----
New best F1 score at 216000: 0.3491721922228718
----- ----- -----
New best F1 score at 248000: 0.35049144421689316
----- ----- -----
New best F1 score at 264000: 0.35328496840549334
----- ----- -----
New best F1 score at 432000: 0.3552712253129255
----- ----- -----
New best F1 score at 512000: 0.3668564133713813
----- ----- -----
New best F1 score at 608000: 0.3691435874542734
----- ----- -----
New best F1 score at 624000: 0.3716543882367617
----- ----- -----
New best F1 score at 696000: 0.37509897663426034
----- ----- -----
New best F1 score at 712000: 0.3766170745818781
----- ----- -----
New best F

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Balanced

In [9]:
oversample = SMOTE(sampling_strategy='not majority', random_state=42)
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = shuffle(x_train, y_train, random_state=42)

proportions = y_train.value_counts(normalize=True) * 100
print(proportions)

env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_002_Balanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_002_Balanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

1    11.111111
5    11.111111
0    11.111111
6    11.111111
7    11.111111
8    11.111111
2    11.111111
4    11.111111
3    11.111111
Name: attack_cat, dtype: float64
New best F1 score at 8000: 0.06248339818900403
----- ----- -----
New best F1 score at 24000: 0.26851826100340587
----- ----- -----
New best F1 score at 40000: 0.38336410120911174
----- ----- -----
New best F1 score at 136000: 0.3909546283801629
----- ----- -----
New best F1 score at 232000: 0.41614240962540555
----- ----- -----
New best F1 score at 264000: 0.4235622214428831
----- ----- -----
New best F1 score at 280000: 0.42988339428888983
----- ----- -----
New best F1 score at 416000: 0.43107171019735935
----- ----- -----
New best F1 score at 808000: 0.4361675017718029
----- ----- -----
New best F1 score at 856000: 0.43731929406110776
----- ----- -----
New best F1 score at 888000: 0.43973347149279046
----- ----- -----
New best F1 score at 936000: 0.44261161139690025
----- ----- -----
New best F1 score at 1016000: 0.446

## 0.01

In [10]:
train = pd.read_csv("/notebooks/FinalDataset/top001_train_encoded.csv")
test = pd.read_csv("/notebooks/FinalDataset/top001_test_encoded.csv")

x_train = train.drop('attack_cat', axis=1)
y_train = train['attack_cat']

x_test = test.drop('attack_cat', axis=1)
y_test = test['attack_cat']

del train, test

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(123504, 168)
(123504,)
(41169, 168)
(41169,)


### Unbalanced

In [10]:
env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_001_Unbalanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_001_Unbalanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

New best F1 score at 8000: 0.004977402259557516
----- ----- -----
New best F1 score at 24000: 0.269861871508707
----- ----- -----
New best F1 score at 40000: 0.27101956926538034
----- ----- -----
New best F1 score at 56000: 0.2812861378740166
----- ----- -----
New best F1 score at 72000: 0.31226619805037975
----- ----- -----
New best F1 score at 88000: 0.3316931189805285
----- ----- -----
New best F1 score at 104000: 0.33826705376956645
----- ----- -----
New best F1 score at 120000: 0.34454247451006964
----- ----- -----
New best F1 score at 136000: 0.3461584126744963
----- ----- -----
New best F1 score at 152000: 0.34876061113093715
----- ----- -----
New best F1 score at 168000: 0.3522749859012811
----- ----- -----
New best F1 score at 232000: 0.3533117340115342
----- ----- -----
New best F1 score at 312000: 0.3566733968603422
----- ----- -----
New best F1 score at 496000: 0.35785437464006264
----- ----- -----
New best F1 score at 560000: 0.3725078649172206
----- ----- -----
New best F

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Balanced

In [11]:
oversample = SMOTE(sampling_strategy='not majority', random_state=42)
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = shuffle(x_train, y_train, random_state=42)

proportions = y_train.value_counts(normalize=True) * 100
print(proportions)

env = SubprocVecEnv([make_env(i, dataset=(x_train.values, y_train.values), 
                              rw_factor=None, minority_classes=minority_classes) for i in range(n_parallel_env)])

env.reset()

callback = SaveBest(check_freq=1000, log_dir="/notebooks/Models/RL", eval_set=(x_test, y_test), name="RL_001_Balanced")

model = PPO(policy='MlpPolicy',
            env=env,
            device="cuda")

model.learn(total_timesteps=5e6,
            callback=callback)

model = PPO.load("/notebooks/Models/RL/RL_001_Balanced",
                   device="cuda")
y_pred, _ = model.predict(x_test, deterministic=True)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

1    11.111111
5    11.111111
0    11.111111
6    11.111111
7    11.111111
8    11.111111
2    11.111111
4    11.111111
3    11.111111
Name: attack_cat, dtype: float64
New best F1 score at 8000: 0.004977402259557516
----- ----- -----
New best F1 score at 24000: 0.3495081034789635
----- ----- -----
New best F1 score at 40000: 0.37333170601711985
----- ----- -----
New best F1 score at 56000: 0.39712282960590695
----- ----- -----
New best F1 score at 232000: 0.40018223698967914
----- ----- -----
New best F1 score at 248000: 0.406147225554882
----- ----- -----
New best F1 score at 264000: 0.4093807975555897
----- ----- -----
New best F1 score at 280000: 0.4110802786604639
----- ----- -----
New best F1 score at 328000: 0.4188704120435577
----- ----- -----
New best F1 score at 384000: 0.43553736172351254
----- ----- -----
New best F1 score at 416000: 0.44256815680357897
----- ----- -----
New best F1 score at 432000: 0.44720356446211273
----- ----- -----
New best F1 score at 560000: 0.4520030