In [1]:
import os
import random
import timeit
import wandb

import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support
from tqdm import tqdm

import torch
from torch import nn
from torch import optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.io import read_image

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
RANDOM_SEED = 42
BATCH_SIZE = 1024
EPOCHS = 10
LEARNING_RATE = 1e-3
PATCH_SIZE = 8
HEIGHT = 32
WIDTH = 64
IN_CHANNELS = 3
NUM_HEADS = 8
DROPOUT = 0.1
ADAM_WEIGHT_DECAY = 0
ADAM_BETAS = (0.9, 0.999)
ACTIVATION="gelu"
NUM_ENCODERS = 4
EMBED_DIM = (PATCH_SIZE ** 2) * IN_CHANNELS # (8**2)*3=192
NUM_PATCHES = (HEIGHT // PATCH_SIZE) * (WIDTH // PATCH_SIZE) # 4*8=32

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda'

In [4]:
class PatchEmbedding(nn.Module):
    def __init__(self, embed_dim, patch_size, num_patches, dropout, in_channels):
        super().__init__()
        self.patcher = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=embed_dim,
                kernel_size=patch_size,
                stride=patch_size,
            ),
            nn.Flatten(2))

        self.cls_token = nn.Parameter(torch.randn(size=(1, in_channels, embed_dim)), requires_grad=True)
        self.position_embeddings = nn.Parameter(torch.randn(size=(1, num_patches+in_channels, embed_dim)), requires_grad=True)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):        
        cls_token = self.cls_token.expand(x.shape[0], -1, -1)

        x = self.patcher(x).permute(0, 2, 1)
        x = torch.cat([cls_token, x], dim=1)
        x = self.position_embeddings + x
        x = self.dropout(x)
        return x

In [5]:
model = PatchEmbedding(EMBED_DIM, PATCH_SIZE, NUM_PATCHES, DROPOUT, IN_CHANNELS).to(device)
x = torch.randn(BATCH_SIZE, IN_CHANNELS, HEIGHT, WIDTH).to(device)
print(model(x).shape)

torch.Size([1024, 35, 192])


In [6]:
class ViT(nn.Module):
    def __init__(self, num_patches, num_classes, patch_size, embed_dim, num_encoders, num_heads, dropout, activation, in_channels):
        super().__init__()
        self.embeddings_block = PatchEmbedding(embed_dim, patch_size, num_patches, dropout, in_channels)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout, activation=activation, batch_first=True, norm_first=True)
        self.encoder_blocks = nn.TransformerEncoder(encoder_layer, num_layers=num_encoders)

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(normalized_shape=embed_dim),
            nn.Linear(in_features=embed_dim, out_features=num_classes)
        )

    def forward(self, x):
        x = self.embeddings_block(x)
        x = self.encoder_blocks(x)
        x = self.mlp_head(x[:, 0, :])  # Apply MLP on the CLS token only
        return x

In [7]:
model = ViT(NUM_PATCHES, 15, PATCH_SIZE, EMBED_DIM, NUM_ENCODERS, NUM_HEADS, DROPOUT, ACTIVATION, IN_CHANNELS).to(device)
x = torch.randn(BATCH_SIZE, IN_CHANNELS, HEIGHT, WIDTH).to(device)
print(model(x).shape) # BATCH_SIZE X NUM_CLASSES

torch.Size([1024, 15])




In [8]:
model

ViT(
  (embeddings_block): PatchEmbedding(
    (patcher): Sequential(
      (0): Conv2d(3, 192, kernel_size=(8, 8), stride=(8, 8))
      (1): Flatten(start_dim=2, end_dim=-1)
    )
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder_blocks): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=192, out_features=192, bias=True)
        )
        (linear1): Linear(in_features=192, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=192, bias=True)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (mlp_head): Sequential(
    (0): LayerNorm((192,), eps

In [9]:
class CicIds2017(Dataset):
    BASE_PATH = "C:\VScode Projects\FIIT_MASTERS\DP\datasets\CIC-IDS-2017"
    MAPPING_FILE = "\cicids2017_img.csv"
    index: int
    batch_size: int
    classes_count: int
    classes_list: list
    
    def __init__(self, shuffle: bool = False):        
        self.mapping = pd.read_csv(self.BASE_PATH+self.MAPPING_FILE)
        self.mapping = pd.get_dummies(self.mapping, columns=['label'])
        
        if shuffle:
            self.mapping = self.mapping.sample(frac=1) # shuffle
            
        self.classes_list = [label.split("_")[1] for label in self.mapping.columns[1:]]
        
        self.mapping = self.mapping.to_numpy()
        
        self.classes_count = len(self.mapping[0]) - 1
        
        self.transform = transforms.Compose([transforms.ToTensor()]) 
        
    def __len__(self):
        return len(self.mapping)
    
    def __getitem__(self, idx):
        img_name = self.mapping[idx, 0]
        img_path = os.path.join(self.BASE_PATH + "\image", img_name)
        img = read_image(img_path)
        
        label = [1 if label_class is True else 0 for label_class in self.mapping[idx, 1:]]
        label = np.array(label)
        
        return img, label
    
    def translate_encoded_label(self, encoded_label):
        return self.classes_list[list(encoded_label).index(1)]

In [10]:
dataset = CicIds2017()
print(len(dataset))

763416


In [12]:
train_split = int(0.9 * len(dataset))
val_split = int(0.8 * len(dataset))
train, test = random_split(dataset, [train_split, len(dataset) - train_split])
train, val = random_split(train, [val_split, len(train) - val_split])


train_dataloader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=True)

In [13]:
print(len(train))
print(len(val))
print(len(test))

610732
76342
76342


In [14]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([1024, 3, 32, 64])
Labels batch shape: torch.Size([1024, 15])


In [15]:
val_features, val_labels = next(iter(val_dataloader))
print(f"Feature batch shape: {val_features.size()}")
print(f"Labels batch shape: {val_labels.size()}")

Feature batch shape: torch.Size([1024, 3, 32, 64])
Labels batch shape: torch.Size([1024, 15])


In [16]:
test_features, test_labels = next(iter(test_dataloader))
print(f"Feature batch shape: {test_features.size()}")
print(f"Labels batch shape: {test_labels.size()}")

Feature batch shape: torch.Size([1024, 3, 32, 64])
Labels batch shape: torch.Size([1024, 15])


In [17]:
def precision_recall_f1(predictions, labels):
    y_true = []
    y_pred = []
    for x,y in zip(predictions, labels):
        y_pred.append(x)
        y_true.append(list(y).index(1.0))
        
    p, r, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="macro")
    return p, r, f1

predictions = torch.Tensor(np.array([0, 1, 0, 0, 2]))
labels = torch.Tensor(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1]]))
p, r, f1 = precision_recall_f1(predictions, labels)
print(f"Precision: {p}")
print(f"Recall: {r}")
print(f"F1 score: {f1}")

Precision: 0.8888888888888888
Recall: 0.8333333333333334
F1 score: 0.8222222222222223


In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), betas=ADAM_BETAS, lr=LEARNING_RATE, weight_decay=ADAM_WEIGHT_DECAY)

run = wandb.init(
    project = "DP",
    config={
        "learning_rate": LEARNING_RATE,
        "architecture": "ViT",
        "dataset": "CIC-IDS-2017-payload",
        "epochs": EPOCHS,
    }
)

start = timeit.default_timer()
for epoch in tqdm(range(EPOCHS), position=0, leave=True):
    model.train()
    train_labels = []
    train_preds = []
    train_running_loss = 0
    for idx, (img, label) in enumerate(tqdm(train_dataloader, position=0, leave=True)):
        img = img.float().to(device)
        label = label.float().to(device)
        y_pred = model(img)
        y_pred_label = torch.argmax(y_pred, dim=1)

        train_labels.extend(label.cpu().detach())
        train_preds.extend(y_pred_label.cpu().detach())
        
        loss = criterion(y_pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_running_loss += loss.item()
    train_loss = train_running_loss / (idx + 1)

    model.eval()
    val_labels = []
    val_preds = []
    val_running_loss = 0
    with torch.no_grad():
        for idx, (img, label) in enumerate(tqdm(val_dataloader, position=0, leave=True)):
            img = img.float().to(device)
            label = label.float().to(device)         
            y_pred = model(img)
            y_pred_label = torch.argmax(y_pred, dim=1)
            
            val_labels.extend(label.cpu().detach())
            val_preds.extend(y_pred_label.cpu().detach())
            
            loss = criterion(y_pred, label)
            val_running_loss += loss.item()
    val_loss = val_running_loss / (idx + 1)

    print("-"*30)
    print(f"Train Loss EPOCH {epoch+1}: {train_loss:.4f}")
    print(f"Valid Loss EPOCH {epoch+1}: {val_loss:.4f}")
    train_accuracy = sum(1 for x,y in zip(train_preds, train_labels) if x == list(y).index(1.0)) / len(train_labels)
    print(f"Train Accuracy EPOCH {epoch+1}: {train_accuracy:.4f}")
    val_accuracy = sum(1 for x,y in zip(val_preds, val_labels) if x == list(y).index(1.0)) / len(val_labels)
    print(f"Valid Accuracy EPOCH {epoch+1}: {val_accuracy:.4f}")
    precision, recall, f1score = precision_recall_f1(train_preds, train_labels)
    print(f"Precision: {precision}, Recall: {recall}, F1 score: {f1score}")
    print("-"*30)
    wandb.log(
        {
            "epoch": epoch,
            "train_acc": train_accuracy,
            "train_loss": train_loss,
            "val_acc": val_accuracy,
            "val_loss": val_loss,
            "precision": precision,
            "recall": recall,
            "f1 score": f1score
        }
    )


stop = timeit.default_timer()
print(f"Training Time: {stop-start:.2f}s")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvikioza[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 597/597 [13:06<00:00,  1.32s/it]
100%|██████████| 75/75 [01:33<00:00,  1.25s/it]


------------------------------
Train Loss EPOCH 1: 0.7134
Valid Loss EPOCH 1: 0.6477
Train Accuracy EPOCH 1: 0.6832
Valid Accuracy EPOCH 1: 0.6986


 10%|█         | 1/10 [15:46<2:22:02, 946.98s/it]

Precision: 0.6625560979956547, Recall: 0.5371732536353484, F1 score: 0.5493838581252948
------------------------------


100%|██████████| 597/597 [04:04<00:00,  2.44it/s]
100%|██████████| 75/75 [00:23<00:00,  3.26it/s]


------------------------------
Train Loss EPOCH 2: 0.6418
Valid Loss EPOCH 2: 0.6427
Train Accuracy EPOCH 2: 0.7033
Valid Accuracy EPOCH 2: 0.7015


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 20%|██        | 2/10 [21:23<1:18:24, 588.10s/it]

Precision: 0.7117664233791945, Recall: 0.6249089455111758, F1 score: 0.6280224995094258
------------------------------


100%|██████████| 597/597 [04:07<00:00,  2.42it/s]
100%|██████████| 75/75 [00:22<00:00,  3.34it/s]


------------------------------
Train Loss EPOCH 3: 0.6364
Valid Loss EPOCH 3: 0.6434
Train Accuracy EPOCH 3: 0.7058
Valid Accuracy EPOCH 3: 0.7005


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 30%|███       | 3/10 [27:05<55:30, 475.75s/it]  

Precision: 0.7261522222183449, Recall: 0.6428166434063433, F1 score: 0.6449318947546414
------------------------------


100%|██████████| 597/597 [04:06<00:00,  2.42it/s]
100%|██████████| 75/75 [00:23<00:00,  3.26it/s]


------------------------------
Train Loss EPOCH 4: 0.6342
Valid Loss EPOCH 4: 0.6394
Train Accuracy EPOCH 4: 0.7064
Valid Accuracy EPOCH 4: 0.7041


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 40%|████      | 4/10 [32:53<42:30, 425.05s/it]

Precision: 0.727863866264671, Recall: 0.6468766923753716, F1 score: 0.649321946636165
------------------------------


100%|██████████| 597/597 [04:05<00:00,  2.43it/s]
100%|██████████| 75/75 [00:22<00:00,  3.31it/s]


------------------------------
Train Loss EPOCH 5: 0.6327
Valid Loss EPOCH 5: 0.6417
Train Accuracy EPOCH 5: 0.7074
Valid Accuracy EPOCH 5: 0.7037


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 50%|█████     | 5/10 [38:33<32:52, 394.53s/it]

Precision: 0.7292429423492636, Recall: 0.64795294597029, F1 score: 0.6497445720826618
------------------------------


100%|██████████| 597/597 [04:05<00:00,  2.43it/s]
100%|██████████| 75/75 [00:22<00:00,  3.34it/s]


------------------------------
Train Loss EPOCH 6: 0.6323
Valid Loss EPOCH 6: 0.6366
Train Accuracy EPOCH 6: 0.7070
Valid Accuracy EPOCH 6: 0.7042


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 60%|██████    | 6/10 [44:09<24:58, 374.74s/it]

Precision: 0.7295459703397743, Recall: 0.6512261051883417, F1 score: 0.6535665186196179
------------------------------


100%|██████████| 597/597 [04:07<00:00,  2.41it/s]
100%|██████████| 75/75 [00:22<00:00,  3.28it/s]


------------------------------
Train Loss EPOCH 7: 0.6309
Valid Loss EPOCH 7: 0.6388
Train Accuracy EPOCH 7: 0.7078
Valid Accuracy EPOCH 7: 0.7034


 70%|███████   | 7/10 [49:52<18:12, 364.22s/it]

Precision: 0.7270334947535144, Recall: 0.649756794397425, F1 score: 0.6505448509005892
------------------------------


100%|██████████| 597/597 [04:05<00:00,  2.43it/s]
100%|██████████| 75/75 [00:23<00:00,  3.19it/s]


------------------------------
Train Loss EPOCH 8: 0.6324
Valid Loss EPOCH 8: 0.6374
Train Accuracy EPOCH 8: 0.7071
Valid Accuracy EPOCH 8: 0.7044


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 80%|████████  | 8/10 [55:32<11:52, 356.45s/it]

Precision: 0.7217947474927308, Recall: 0.6460885289728053, F1 score: 0.6465427093725223
------------------------------


100%|██████████| 597/597 [04:06<00:00,  2.43it/s]
100%|██████████| 75/75 [00:22<00:00,  3.33it/s]


------------------------------
Train Loss EPOCH 9: 0.6297
Valid Loss EPOCH 9: 0.6358
Train Accuracy EPOCH 9: 0.7083
Valid Accuracy EPOCH 9: 0.7045


 90%|█████████ | 9/10 [1:01:13<05:51, 351.82s/it]

Precision: 0.7979959812244561, Recall: 0.6646352507396663, F1 score: 0.6737594205716658
------------------------------


100%|██████████| 597/597 [04:13<00:00,  2.36it/s]
100%|██████████| 75/75 [00:22<00:00,  3.33it/s]


------------------------------
Train Loss EPOCH 10: 0.6294
Valid Loss EPOCH 10: 0.6379
Train Accuracy EPOCH 10: 0.7080
Valid Accuracy EPOCH 10: 0.7005


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 10/10 [1:06:59<00:00, 401.95s/it]


Precision: 0.736079967782809, Recall: 0.6606305952272553, F1 score: 0.6631911474833295
------------------------------
Training Time: 4016.24s


In [19]:
# Save as artifact for version control.
torch.save(model.state_dict(), '../saved/model_test_cic')
artifact = wandb.Artifact('model_test_cic', type='model')
artifact.add_file('../saved/model_test_cic')
run.log_artifact(artifact)
run.finish()

0,1
epoch,▁▂▃▃▄▅▆▆▇█
f1 score,▁▅▆▇▇▇▇▆█▇
precision,▁▄▄▄▄▄▄▄█▅
recall,▁▆▇▇▇▇▇▇██
train_acc,▁▇▇▇██████
train_loss,█▂▂▁▁▁▁▁▁▁
val_acc,▁▄▃█▇█▇██▃
val_loss,█▅▆▃▄▁▃▂▁▂

0,1
epoch,9.0
f1 score,0.66319
precision,0.73608
recall,0.66063
train_acc,0.70797
train_loss,0.62941
val_acc,0.70054
val_loss,0.63791
