<a href="https://colab.research.google.com/github/zeton24/gsn_iot_anomalies_detection/blob/main/GSN_projekt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Convolutional (Conv2d)
https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d

Average Pooling https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html#torch.nn.AvgPool2d

Normalization
https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html#torch.nn.BatchNorm1d

Spatial Dropout
https://pytorch.org/docs/stable/generated/torch.nn.Dropout2d.html#torch.nn.Dropout2d

Fully connected layer -> nn.Linear()


In [1]:
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

In [2]:
import torch

## Dataset i Dataloader

todo
 - transformy w datasecie
 - tensory na wyjściu dataloadera

In [3]:
def to_one_hot(category, n_classes):
  result = torch.zeros(n_classes)
  result[category] = 1
  return result

In [4]:
# wzięte z https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
# i znacząco zmienione, bo my mamy 1 plik wejściowy, a oni folder z obrazkami
# jak to nie będzie mega powolne, to spoko, ale może być powolne i wtedy nie wiem, co robimy

class IoTDataset(torch.utils.data.Dataset):
  def __init__(self, input_file):
    self.input_file = open(input_file, "r")
    self.dict_of_classes = {"Mirai-Ackflooding": 0, "Mirai-Hostbruteforceg": 1, "Mirai-UDP Flooding": 2, "Mirai-HTTP Flooding": 3,
                            "DoS-Synflooding": 4, "Scan Port OS": 5, "Normal": 6} # todo reszta klas
    self.n_classes = 16 # len(self.dict_of_classes)

  def __len__(self):
    i = 0
    for i, line in enumerate(self.input_file):
      pass
    self.input_file.seek(0)
    return i + 1

  def __getitem__(self, idx):
    for i, line in enumerate(self.input_file):
      if i == idx:
        data = line.split(";") # docelowo zmienić na ","

        image = torch.Tensor(list(map(float, data[:64])))
        image = torch.reshape(image, (1,64))

        label = data[-1]
        if label.endswith("\n"):
          label = label[:-1]
        label = self.dict_of_classes[label] # -2, lub -3 dla prostszych przypadków
        label = to_one_hot(label, self.n_classes)

        self.input_file.seek(0)
        break
    return image, label

In [6]:
dataset = IoTDataset("test_dataset.csv")
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [29]:
for train_features, train_labels in dataloader:
  print(train_features.shape, train_features.dtype)
  print(train_labels)
  break

torch.Size([1, 1, 64]) torch.float32
tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])


## Elementy do wykorzystania

### Modele sieci 1D, 2D, 3D

In [7]:
class Model1D(nn.Module):
    def __init__(self):
        super().__init__()
        self.convblock1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=32, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(32),
            nn.AvgPool1d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock2 = nn.Sequential(
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(64),
            nn.AvgPool1d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock3 = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(128),
            nn.AvgPool1d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock4 = nn.Sequential(
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(256),
            nn.AvgPool1d(2),
            nn.Dropout(p=0.05)
        )

        # Nazwa bloku do rozważenia, taka mi się wymyśliła, ale nie upieram się przy niej.
        self.evaluator = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 16)
            # Bez aktywacji na końcu, bo softmax się doda automatycznie razem z cross entropy.
        )

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.convblock4(x)
        x = self.evaluator(x)
        return x

In [8]:
class Model2D(nn.Module):
    def __init__(self):
        super().__init__()
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.AvgPool2d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.AvgPool2d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.AvgPool2d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),

            # Oni w artykule dają normalizację, ale pytorch wywala błąd.
            # Moim zdaniem słusznie, bo o ile rozumiem ten wzór na normalizację (co jest w dokumentacji),
            # to przy wymiarze wejścia [batch_size, 256, 1, 1] wyjście to tensor zer o takim samym wymiarze.
            # Po prostu normalizacja zmienia nam średnią na 0 (odchylenie standardowe też, ale to nieistotne),
            # a jak mamy 1 element, to zmiana średniej na 0, to zmiana elementu na 0.
            # Więc nawet jak jakoś obejdziemy ten błąd, to wyniki będą bez sensu.
            # Moim zdaniem w artykule jest błąd (a przynajmniej na rysunku, może w implementacji tego nie ma).

            nn.BatchNorm2d(256),
            nn.Dropout(p=0.05)
        )

        self.evaluator = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 16)
        )

    def forward(self, x):
        x = torch.reshape(x, (x.shape[0],1,8,8)) # można ten reshape dać gdzie indziej jak się znajdzie lepsze miejsce
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.convblock4(x)
        x = self.evaluator(x)
        return x

In [9]:
class Model3D(nn.Module):
    def __init__(self):
        super().__init__()
        self.convblock1 = nn.Sequential(
            nn.Conv3d(in_channels=1, out_channels=32, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(32),
            nn.AvgPool3d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock2 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=64, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(64),
            nn.AvgPool3d(2),
            nn.Dropout(p=0.05)
        )

        self.convblock3 = nn.Sequential(
            nn.Conv3d(in_channels=64, out_channels=128, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(128),
            nn.Dropout(p=0.05)
        )

        self.convblock4 = nn.Sequential(
            nn.Conv3d(in_channels=128, out_channels=256, kernel_size=5, padding='same'),
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(256),
            nn.Dropout(p=0.05)
        )

        self.evaluator = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 16)
        )

    def forward(self, x):
        x = torch.reshape(x, (x.shape[0],1,4,4,4))
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.convblock4(x)
        x = self.evaluator(x)
        return x

### Batch loader

In [10]:
# Na bazie datasetu z labów - zwraca kolejne batche.
# Nie jest jakiś genialny, ale i tak go używam tylko tu do testów.

class Dataset:
  def __init__(self, data, labels, batch_size=1):
    self.data = data
    self.labels = labels
    self.batch_size = batch_size
    self.n_batches = len(self.data) // batch_size

  def __iter__(self):
    for i in range(self.n_batches):
      inputs = torch.zeros(self.batch_size, 1, 64) # shape zahardkodowany
      labels = torch.zeros(self.batch_size, self.labels.shape[1])

      for j in range(self.batch_size):
        inputs[j, 0] = self.data[i*self.batch_size+j]
        labels[j]= self.labels[i*self.batch_size+j]

      yield inputs, labels

  # todo - jakiś shuffle by się przydał, ale to tylko jeśli byśmy z tego korzystały

### Funkcje do treningu i testowania

In [11]:
# Funkcja przeprowadzająca traning
def train(net, loader, optimizer, criterion, epochs):
  net.train()
  for epoch in range(epochs):
    epoch_loss = 0.
    for i, (inputs, labels) in enumerate(loader):

      # zero the parameter gradients
      optimizer.zero_grad()

      outputs = net(inputs)
      loss = criterion(outputs, labels)

      loss.backward()
      optimizer.step()

      epoch_loss += loss

    # Wczesne przerwanie w razie dostatecznie dobrych wyników.
    if epoch_loss<0.1:
      print(f'[{epoch + 1}, {i + 1:5d}] eloss: {epoch_loss:.3f}')
      break

    if epoch % 5 == 4: # wypisywanie co 5 epok
      print(f'[{epoch + 1}, {i + 1:5d}] eloss: {epoch_loss:.3f}')

In [12]:
def test(network, test_set, n_classes = 16):
  network.eval() # Przełącza sieć w tryb testowania, m. in. wyłącza dropouta.
  err_matrix = torch.zeros((n_classes, n_classes), dtype=int)

  with torch.no_grad():

    for inputs, labels in test_set:

      outputs = network(inputs)

      _, label = torch.max(labels, 1)
      _, predicted = torch.max(outputs, 1)

      for truth, prediction in zip(label, predicted):
        err_matrix[truth, prediction] += 1

  return err_matrix

In [13]:
# Funkcja tworzy losowe etykiety one-hot.

def rand_and_predict(dataset_size, output_length):
  labels = torch.zeros(dataset_size, output_length)
  for i in range(dataset_size):
    hot = torch.randint(output_length,(1,))
    labels[i, hot] = 1
  return labels


## Test na małych danych

In [14]:
inputsR = torch.rand(24,1,64)
labelsR = rand_and_predict(24,16)

In [15]:
classes = {"Mirai-Ackflooding": 0, "Mirai-Hostbruteforceg": 1, "Mirai-UDP Flooding": 2, "Mirai-HTTP Flooding": 3, "DoS-Synflooding": 4, "Scan Port OS": 5, "Normal": 6}
inputs = torch.zeros(24,1,64)
labels = torch.zeros(24,16)
with open('test_dataset.csv') as f:
  for i, line in enumerate(f):
    data = line.split(";") # docelowo zmienić na ","
    image = torch.Tensor(list(map(float, data[1:65])))
    image = torch.reshape(image, (1,64))

    label = data[-1]
    if label.endswith("\n"):
      label = label[:-1]
    label = classes[label]
    label = to_one_hot(label, 16)

    inputs[i] = image
    labels[i] = label

In [21]:
loader = Dataset(inputs, labels, batch_size=1)

In [17]:
criterion = nn.CrossEntropyLoss()
criterion_test = nn.CrossEntropyLoss()

### Model 1D

In [27]:
network1 = Model1D()
optimizer1 = optim.Adam(network1.parameters(), lr=0.001)

In [28]:
train(network1, dataloader, optimizer1, criterion, 100)

[5,    24] eloss: 40.877
[10,    24] eloss: 33.170
[15,    24] eloss: 31.742
[20,    24] eloss: 30.291
[25,    24] eloss: 26.671
[30,    24] eloss: 27.405
[35,    24] eloss: 25.353
[40,    24] eloss: 24.087
[45,    24] eloss: 24.806
[50,    24] eloss: 23.498
[55,    24] eloss: 21.670
[60,    24] eloss: 39.874
[65,    24] eloss: 23.592
[70,    24] eloss: 23.179
[75,    24] eloss: 19.978
[80,    24] eloss: 20.069
[85,    24] eloss: 19.272
[90,    24] eloss: 22.855
[95,    24] eloss: 17.845
[100,    24] eloss: 17.003


In [30]:
result1 = test(network1, dataloader)
print(f"Accuracy: {sum(sum(result1*torch.eye(16)))*100/sum(sum(result1)):.2f}%")
print(result1)

Accuracy: 12.50%
tensor([[3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [4, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


### Model 2D

In [None]:
network2 = Model2D()
optimizer2 = optim.Adam(network2.parameters(), lr=0.001)

In [None]:
train(network2, dataloader, optimizer2, criterion, 1000)

[5,     4] eloss: 7.275
[10,     4] eloss: 7.030
[15,     4] eloss: 6.238
[20,     4] eloss: 6.616
[25,     4] eloss: 7.006
[30,     4] eloss: 6.149
[35,     4] eloss: 6.485
[40,     4] eloss: 6.458
[45,     4] eloss: 7.641
[50,     4] eloss: 6.131
[55,     4] eloss: 6.651
[60,     4] eloss: 6.188
[65,     4] eloss: 7.138
[70,     4] eloss: 6.377
[75,     4] eloss: 6.658
[80,     4] eloss: 6.430
[85,     4] eloss: 7.010
[90,     4] eloss: 6.625
[95,     4] eloss: 6.997
[100,     4] eloss: 6.651
[105,     4] eloss: 6.966
[110,     4] eloss: 6.573
[115,     4] eloss: 6.902
[120,     4] eloss: 5.474
[125,     4] eloss: 5.982
[130,     4] eloss: 5.916
[135,     4] eloss: 6.248
[140,     4] eloss: 5.447
[145,     4] eloss: 6.966
[150,     4] eloss: 6.682
[155,     4] eloss: 6.024
[160,     4] eloss: 5.989
[165,     4] eloss: 6.268
[170,     4] eloss: 5.765
[175,     4] eloss: 5.820
[180,     4] eloss: 5.712
[185,     4] eloss: 6.296
[190,     4] eloss: 5.583
[195,     4] eloss: 6.952
[200, 

In [None]:
result2 = test(network2, dataloader)
print(f"Accuracy: {sum(sum(result2*torch.eye(16)))*100/sum(sum(result2)):.2f}%")
print(result2)

Accuracy: 50.00%
tensor([[2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


### Model 3D

In [None]:
network3 = Model3D()
optimizer3 = optim.Adam(network3.parameters(), lr=0.001)

In [None]:
train(network3, dataloader, optimizer3, criterion, 1000)

[5,     4] eloss: 7.161
[10,     4] eloss: 7.325
[15,     4] eloss: 7.343
[20,     4] eloss: 6.893
[25,     4] eloss: 6.531
[30,     4] eloss: 6.352
[35,     4] eloss: 6.177
[40,     4] eloss: 7.261
[45,     4] eloss: 5.986
[50,     4] eloss: 6.563
[55,     4] eloss: 7.978
[60,     4] eloss: 7.122
[65,     4] eloss: 6.607
[70,     4] eloss: 5.824
[75,     4] eloss: 6.536
[80,     4] eloss: 6.441
[85,     4] eloss: 7.166
[90,     4] eloss: 5.965
[95,     4] eloss: 6.461
[100,     4] eloss: 6.912
[105,     4] eloss: 5.504
[110,     4] eloss: 7.765
[115,     4] eloss: 5.779
[120,     4] eloss: 6.643
[125,     4] eloss: 6.589
[130,     4] eloss: 6.201
[135,     4] eloss: 6.680
[140,     4] eloss: 6.206
[145,     4] eloss: 6.293
[150,     4] eloss: 5.939
[155,     4] eloss: 6.773
[160,     4] eloss: 5.777
[165,     4] eloss: 5.747
[170,     4] eloss: 6.199
[175,     4] eloss: 6.494
[180,     4] eloss: 6.612
[185,     4] eloss: 6.787
[190,     4] eloss: 6.832
[195,     4] eloss: 6.887
[200, 

In [None]:
result3 = test(network3, dataloader)
print(f"Accuracy: {sum(sum(result3*torch.eye(16)))*100/sum(sum(result3)):.2f}%")
print(result3)

Accuracy: 66.67%
tensor([[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


## Notatki, żeby nie szukać ciągle w artykule, bałagan trochę, ale są

koszt - cross entropy

aktywacja lineara - relu

aktywacja po 16 neuronach - softmax

zrobili L1, L2 i dropout, ale nie wchodziłam w to dokładnie

batch - 64, 128 dawały najlepsze wyniki

eksperymentalnie sprawdzili, że 100 epok daje zbieżność

w adamie dali 0.0001 learning rate
