In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
sys.dont_write_bytecode = True
sys.path.append('../')


import argparse
import numpy as np
import pandas as pd
import random
from importlib import reload  
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.utils import shuffle

from loglizer.models import InvariantsMiner, PCA, IsolationForest, OneClassSVM, LogClustering, LR, SVM
from loglizer import dataloader, preprocessing
from loglizer.utils import metrics

In [2]:
ouput_dir = "../output/bgl/"
middle_dir = ""
log_file = "BGL.log"

<!-- # Produce event templates from train test dataset -->

# Split train test data

In [3]:
(x_train, y_train), (x_test, y_test) = dataloader.load_data(ouput_dir, middle_dir, log_file, is_mapping=True)

  train = np.array(train).reshape(-1,1)
  test_normal = np.array(test_normal).reshape(-1,1)
  abnormal = np.array(abnormal).reshape(-1,1)


Train normal size: 13718
Train abnormal size: 1207
Total logkey(exclude 0:UNK) 1000
Test normal size: 20579
Test abnormal size: 1811
num_unk_event in test data: 0


In [4]:
feature_extractor = preprocessing.FeatureExtractor()
x_train = feature_extractor.fit_transform(x_train)
x_test = feature_extractor.transform(x_test)

Train data shape: 14925-by-831

Test data shape: 22390-by-831



In [5]:
%%time
print("="*20 + " Model: PCA " + "="*20)
for th in np.arange(1):
    print("theshold", th)
    model = PCA(n_components=0.8, threshold=1, c_alpha = 1.9600)
    model.fit(x_train)
    print('Train validation:')
    precision, recall, f1, auc, aupr = model.evaluate(x_train, y_train)
    print('Test validation:')
    precision, recall, f1, auc, aupr = model.evaluate(x_test, y_test)

theshold 0
n_components: 5
Project matrix shape: 831-by-831
SPE threshold: 1

Train validation:
Confusion Matrix: TP: 1193, FP: 11880, TN: 1838, FN: 14
Precision: 9.126%, recall: 98.840%, F1-measure: 16.709%, AUC: 56.119%, AUPR: 9.114%

Total test time: 0.462 s

Test validation:
Confusion Matrix: TP: 1777, FP: 17873, TN: 2706, FN: 34
Precision: 9.043%, recall: 98.123%, F1-measure: 16.560%, AUC: 55.636%, AUPR: 9.025%

Total test time: 0.656 s

CPU times: user 31.6 s, sys: 56.7 s, total: 1min 28s
Wall time: 1.39 s


In [6]:
%%time
print("="*20 + " Model: IsolationForest " + "="*20)
model = IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', random_state=19)
model.fit(x_train)
print('Train validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_train, y_train)
print('Test validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_test, y_test)

Train validation:
Confusion Matrix: TP: 177, FP: 0, TN: 13718, FN: 1030
Precision: 100.000%, recall: 14.665%, F1-measure: 25.578%, AUC: 57.332%, AUPR: 21.566%

Total test time: 3.022 s

Test validation:
Confusion Matrix: TP: 267, FP: 0, TN: 20579, FN: 1544
Precision: 100.000%, recall: 14.743%, F1-measure: 25.698%, AUC: 57.372%, AUPR: 21.639%

Total test time: 4.558 s

CPU times: user 5.46 s, sys: 2.27 s, total: 7.73 s
Wall time: 7.74 s


In [7]:
%%time
print("="*20 + " Model: one class SVM " + "="*20)
model = OneClassSVM(kernel='rbf')
model.fit(x_train, y_train)

print('Train validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_train, y_train)
print('Test validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_test, y_test)

Train validation:
Confusion Matrix: TP: 151, FP: 13718, TN: 0, FN: 1056
Precision: 1.089%, recall: 12.510%, F1-measure: 2.003%, AUC: 28.314%, AUPR: 7.317%

Total test time: 71.113 s

Test validation:
Confusion Matrix: TP: 226, FP: 20579, TN: 0, FN: 1585
Precision: 1.086%, recall: 12.479%, F1-measure: 1.999%, AUC: 28.221%, AUPR: 7.319%

Total test time: 107.138 s

CPU times: user 5min 4s, sys: 187 ms, total: 5min 4s
Wall time: 5min 4s


In [None]:
%%time
print("="*20 + " Model: LogClustering " + "="*20)
max_dist = 0.3  # the threshold to stop the clustering process
anomaly_threshold = 0.3  # the threshold for anomaly detection
model = LogClustering(max_dist=max_dist, anomaly_threshold=anomaly_threshold)
model.fit(x_train[y_train == 0, :])  # Use only normal samples for training
print('Train validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_train, y_train)
print('Test validation:')
precision, recall, f1, auc, aupr = model.evaluate(x_test, y_test)
