<a href="https://colab.research.google.com/github/zeton24/gsn_iot_anomalies_detection/blob/dataset/datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import gdown
import pandas as pd

In [8]:
class IoTDataset:
    def __init__(self, file_path: str, binary_classification=False):
        self.data = pd.read_pickle(file_path, compression="gzip")
        if binary_classification:
          mapping_binary = {"Anomaly": 1, "Normal": 0}
          self.labels = self.data["Label"].map(mapping_binary)
        else:
          self.labels = pd.get_dummies(self.data["Cat"], dtype=int).values
        self.data = self.data.iloc[:, :-2]
        self.counter = 0
        self.length = self.__len__()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        data = self.data.iloc[index].values
        return data, self.labels[index]


    def __iter__(self):
        return self

    def __next__(self):
        if self.counter < self.length:
          item = self.__getitem__(self.counter)
          self.counter += 1
          return item
        else:
          self.counter = 0
          raise StopIteration

# Dataloader?

In [10]:
mqtt_id = "1WdhBh-HbywkLLo1mROMjB_9b7kHOum0I"
gdown.download(id=mqtt_id, output="mqtt_dataset.pkl.gzip")

Downloading...
From (original): https://drive.google.com/uc?id=1WdhBh-HbywkLLo1mROMjB_9b7kHOum0I
From (redirected): https://drive.google.com/uc?id=1WdhBh-HbywkLLo1mROMjB_9b7kHOum0I&confirm=t&uuid=6d47487f-70e6-4c28-9401-ef243ea40dd7
To: /content/mqtt_dataset.pkl.gzip
100%|██████████| 358M/358M [00:03<00:00, 90.4MB/s]


'mqtt_dataset.pkl.gzip'

In [11]:
mqtt_dataset = IoTDataset(file_path="mqtt_dataset.pkl.gzip")

In [12]:
for x, y in mqtt_dataset:
  print(x,y)
  break

[5.80000000e+01 2.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 3.44827586e+04 5.80000000e+01
 0.00000000e+00 5.80000000e+01 5.80000000e+01 5.80000000e+01
 5.80000000e+01 0.00000000e+00 5.80000000e+01 5.80000000e+01
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 6.40000000e+01
 0.00000000e+00 3.44827586e+04 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 2.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 2.29000000e+02 0.00000000e+00 0.00000000e+00 3.20000000e+01
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00] [1 0 0 0 0]


In [2]:
intrusion_id="1rHGxn7L_BgR0lva1KESP-6_H-x2tCMn0"
gdown.download(id=intrusion_id, output="intrusion_dataset.pkl.gzip")

Downloading...
From (original): https://drive.google.com/uc?id=1rHGxn7L_BgR0lva1KESP-6_H-x2tCMn0
From (redirected): https://drive.google.com/uc?id=1rHGxn7L_BgR0lva1KESP-6_H-x2tCMn0&confirm=t&uuid=41a9341c-5ef7-4c4e-9480-5358180d0d13
To: /content/intrusion_dataset.pkl.gzip
100%|██████████| 34.6M/34.6M [00:01<00:00, 20.9MB/s]


'intrusion_dataset.pkl.gzip'

In [9]:
intrusion = IoTDataset("intrusion_dataset.pkl.gzip")
intrusion[0]

(array([ 7.50000000e+01,  1.00000000e+00,  1.00000000e+00,  9.82000000e+02,
         1.43000000e+03,  9.82000000e+02,  9.82000000e+02,  9.82000000e+02,
         0.00000000e+00,  1.43000000e+03,  1.43000000e+03,  1.43000000e+03,
         0.00000000e+00,  3.21600000e+07,  2.66666667e+04,  7.50000000e+01,
         0.00000000e+00,  7.50000000e+01,  7.50000000e+01,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  8.00000000e+00,
         8.00000000e+00,  1.33333333e+04,  1.33333333e+04,  9.82000000e+02,
         1.43000000e+03,  1.28066667e+03,  2.58652921e+02,  6.69013333e+04,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  1.00000000e+00,  1.92100000e+03,
         9.82000000e+02,  1.43000000e+03,  0.00000000e+00,  0.00000000e+00,
         1.0