In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn import metrics
import scipy.spatial as sp
from torch.autograd import Variable
import argparse
import scipy.io

In [2]:
import pickle

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='NSL')
parser.add_argument('--beta', type=float, default=0.1)
parser.add_argument("--dev", help="device", default="cuda:0")
parser.add_argument("--epochs", type=int, help="number of epochs for ae", default=5000)
parser.add_argument("--lr", type=float, help="learning rate", default=1e-2)
parser.add_argument("--memlen", type=int, help="size of memory", default=2048)
parser.add_argument("--seed", type=int, help="random seed", default=0)
#args = parser.parse_args()
args, unknown = parser.parse_known_args()

In [4]:
args

Namespace(dataset='NSL', beta=0.1, dev='cuda:0', epochs=5000, lr=0.01, memlen=2048, seed=0)

In [5]:
torch.manual_seed(args.seed)
nfile = None
lfile = None

In [6]:
#@title unused codes
# # set the data file and the label file
# if args.dataset == 'NSL':
#     nfile = 'data/nsl.txt'
#     lfile = 'data/nsllabel.txt'
# elif args.dataset == 'KDD':
#     nfile = '../data/kdd.txt'
#     lfile = '../data/kddlabel.txt'
# elif args.dataset == 'UNSW':
#     nfile = '../data/unsw.txt'
#     lfile = '../data/unswlabel.txt'
# elif args.dataset == 'DOS':
#     nfile = '../data/dos.txt'
#     lfile = '../data/doslabel.txt'
# elif args.dataset == 'XYZ':
#     nfile = '../data/xyz.txt'
#     lfile = '../data/xyzlabel.txt'
# else:
#     df = scipy.io.loadmat('../data/'+args.dataset+".mat")
#     numeric = torch.FloatTensor(df['X'])
#     labels = (df['y']).astype(float).reshape(-1)



In [7]:
device = torch.device('cuda:0')
device

device(type='cuda', index=0)

### MemStream Model

In [8]:
class MemStream(nn.Module):
    def __init__(self, in_dim, params):
        super(MemStream, self).__init__()
        self.params = params
        self.in_dim = in_dim
        self.out_dim = in_dim*2
        self.memory_len = params['memory_len']
        self.max_thres = torch.tensor(params['beta']).to(device)

        self.memory = torch.randn(self.memory_len, self.out_dim).to(device)

        self.mem_data = torch.randn(self.memory_len, self.in_dim).to(device)

        self.memory.requires_grad = False # this attribute of tensor
        self.mem_data.requires_grad = False

        self.batch_size = params['memory_len']

        self.num_mem_update = 0

        self.encoder = nn.Sequential(
            nn.Linear(self.in_dim, self.out_dim),
            nn.Tanh(),
        ).to(device)

        self.decoder = nn.Sequential(
            nn.Linear(self.out_dim, self.in_dim)
        ).to(device)

        self.clock = 0
        self.last_update = -1

        self.optimizer = torch.optim.Adam(self.parameters(), lr=params['lr'])
        self.loss_fn = nn.MSELoss()

        self.count = 0

    
    def train_autoencoder(self, data, epochs):
        self.mean, self.std = self.mem_data.mean(0), self.mem_data.std(0)
        new = (data - self.mean) / self.std # z-score
        new[:, self.std == 0] = 0 # let new=0 where self.mem_data.std(0)==0, because 0 cannot be devided
        new = Variable(new)

        for epoch in range(epochs):
            self.optimizer.zero_grad()
            output = self.decoder(self.encoder(new + 0.001*torch.randn_like(new).to(device)))
            loss = self.loss_fn(output, new)
            loss.backward()
            self.optimizer.step()
            

    def update_memory(self, output_loss, encoder_output, data):
        if output_loss <= self.max_thres: # when output_loss is less than self.max_thres
            least_used_pos = self.count % self.memory_len
            self.memory[least_used_pos] = encoder_output
            self.mem_data[least_used_pos] = data
            # update the self.mean and self.std
            self.mean, self.std = self.mem_data.mean(0), self.mem_data.std(0) 
            self.count += 1
            return 1
        return 0

    def initialize_memory(self, x):
        mean, std = model.mem_data.mean(0), model.mem_data.std(0)
        new = (x - mean) / std
        new[:, std == 0] = 0
        self.memory = self.encoder(new) 
        self.memory.requires_grad = False
        self.mem_data = x

    def forward(self, x):
        # new is the z_score
        new = (x - self.mean) / self.std
        new[:, self.std == 0] = 0

        encoder_output = self.encoder(new) 
        loss_values = torch.norm(self.memory - encoder_output, dim=1, p=1).min()
        self.update_memory(loss_values, encoder_output, x)
        return loss_values

In [9]:
# print(f'args.dataset: {args.dataset}')
# if args.dataset in ['KDD', 'NSL', 'UNSW', 'DOS']:
#     print(f'nifle: {nfile}; lfile: {lfile}')
#     numeric = torch.FloatTensor(np.loadtxt(nfile, delimiter = ','))
#     labels = np.loadtxt(lfile, delimiter=',')

# if args.dataset == 'KDD':
#     labels = 1 - labels

In [10]:
# numeric = torch.FloatTensor(np.loadtxt('data/nsl.txt', delimiter=','))
# numeric.shape

In [11]:
def load_pickle(filename):
    with open(filename, 'rb') as lf:
        load_data = pickle.load(lf)
    return load_data

In [13]:
numeric = torch.FloatTensor(load_pickle('pickle/y.pickle').numpy())
numeric.shape
numeric = numeric.reshape((numeric.shape[0]), 1)
numeric.shape

2022-10-08 10:27:25.742900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-08 10:27:25.835000: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-10-08 10:27:25.835015: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-10-08 10:27:25.836690: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN

torch.Size([4618332, 1])

In [14]:
torch.manual_seed(args.seed)
N = args.memlen #2048
params = {
          'beta': args.beta, 
          'memory_len': N, 
          'batch_size':1, 
          'lr':args.lr
         }

In [15]:
#model = MemStream(numeric[0].shape[0],params).to(device)
model = MemStream(numeric[0].shape[0],params).to(device)
model

NVIDIA GeForce RTX 3090 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA GeForce RTX 3090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



MemStream(
  (encoder): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=True)
    (1): Tanh()
  )
  (decoder): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
  (loss_fn): MSELoss()
)

In [16]:
batch_size = params['batch_size']
print(args.dataset, args.beta, args.memlen, args.lr, args.epochs)
data_loader = DataLoader(numeric, batch_size=batch_size)

NSL 0.1 2048 0.01 5000


In [17]:
#init_data = numeric[labels == 0][:N].to(device)
init_data = numeric[:][:N].to(device)
init_data

RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [18]:
model.mem_data = init_data

torch.set_grad_enabled(True)
model.train_autoencoder(Variable(init_data).to(device), epochs=args.epochs)
torch.set_grad_enabled(False)
model.initialize_memory(Variable(init_data[:N]))

RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
torch.set_grad_enabled(False)
model.initialize_memory(Variable(init_data[:N]))

In [None]:
from tqdm import tqdm
err = []
for data in tqdm(data_loader):
    output = model(data.to(device))
    err.append(output)

In [None]:
err

In [None]:
scores = np.array([i.cpu() for i in err])
auc = metrics.roc_auc_score(labels, scores)
print("ROC-AUC", auc)