### Use Antagonist to train a symptom detection model 

#### ToDo

- [OK] Change threshold to detect symptoms
- Use API from the plugin to get and store labels
- [OK] Use training data from a metric in addition to current data
- [OK] Remove known anomalies from training data 

#### Reproducibility

In [15]:
# Torch
import torch
torch.manual_seed(0)
torch.use_deterministic_algorithms(True)

# Python
import random
random.seed(0)

# Numpy
import numpy as np
np.random.seed(0)

#### Dataset preparation

Note: the dataset needs to be downloaded using the script `download_SMD_dataset.sh` in the `scripts/antagonist-ml` folder.

In [16]:
from data_utils import SMD

In [17]:
db = SMD(dataset_folder=r"D:\antagonist\data\ServerMachineDataset")

In [18]:
dataframes_train, _ = db.read_dataset(group_name="Group 1", train=True, retrieve_labels=True)
dataframes, files = db.read_dataset(group_name="Group 1", train=False, retrieve_labels=True)


In [19]:
service_idx = 0
df,service_id = dataframes[service_idx], files[service_idx]
df,labels = df[df.columns[:-2].tolist()+['timestamp']], df[['label']]
df_train = dataframes_train[service_idx]
df_train = df_train[df_train.columns[:-1].tolist()+['timestamp']]

In [20]:
network_incidents = db.get_interpretation_labels(service_id)

#### Train model utils

In [21]:
import pandas as pd
from auto_encoder import Vanilla_AE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score

In [22]:
n_inputs = df.shape[1] - 1
layer_sizes = [8, 4, 2]
lr = 0.005
batch_size = 32
epochs = 40
validation_split = 0.2
early_stopping = True
patience = 3
Q = 0.99  # residual cut

In [23]:
def train_model(df: pd.DataFrame):
    ae = Vanilla_AE(n_inputs=n_inputs, layer_sizes=layer_sizes)

    # Get data but the timestamp
    X_train = df.values[:, :-1]

    # scaler init and fitting
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # model fitting
    ae.fit(
        X_train_scaled,
        early_stopping=early_stopping,
        validation_split=validation_split,
        epochs=epochs,
        lr=lr,
        batch_size=batch_size,
        verbose=0,
        shuffle=True,
        patience=patience,
        delta=0.001,
    )

    # results predicting
    residuals_train = (
        pd.DataFrame(X_train_scaled - ae.predict(X_train_scaled)).abs()
    )
    threshold = residuals_train.quantile(Q,axis=0) * 5 / 2

    return scaler, ae, threshold 

In [24]:
def find_consecutive_true_np(arr):
    result = []
    for i in range(arr.shape[1]):
        s = arr[:, i]
        m = np.r_[False, s, False]
        idx = np.flatnonzero(m[1:] != m[:-1])
        result.append(list(zip(idx[::2], idx[1::2])) )
    return result

def predict(df, scaler, ae, threshold, aggregate=False ):

    X_hat = scaler.transform(df.values[:,:-1])
    residuals_full_df = X_hat - ae.predict(X_hat)
    residuals_full_df = pd.DataFrame(residuals_full_df).abs()
    
    symptoms = (residuals_full_df > threshold).values

    return symptoms.any(axis=1) if aggregate else symptoms

In [25]:
def eval_model(df, labels,  scaler, ae, threshold ):

    X_hat = scaler.transform(df.values[:,:-1])
    residuals_full_df = X_hat - ae.predict(X_hat)
    residuals_full_df = pd.DataFrame(residuals_full_df).abs()
    residuals_full_df['outlier'] = (residuals_full_df > threshold).any(axis=1).astype(int).values

    return f1_score(labels['label'].values,residuals_full_df['outlier'].values, average='binary')

#### Iterative process simulation

Every day the model is retrained with the new data and new labels

In [26]:
import datetime
from collections import defaultdict

In [27]:
start_date = datetime.datetime.fromtimestamp(df['timestamp'].astype('int64').min()/10**9)
end_date = datetime.datetime.fromtimestamp(df['timestamp'].dt.ceil('D').astype('int64').max()/10**9)

In [28]:
scaler, ae, threshold = None, None, None
previous_day = None

predictions = defaultdict(list)

for current_day in pd.date_range(start=start_date, end=end_date, freq="D"):
    current_day = datetime.datetime.fromtimestamp(current_day.timestamp())
    df_today = df.loc[df["timestamp"] < current_day.ctime()]

    if df_today.shape[0] == 0:
        # first day
        continue

    # Predict symptoms for the current day
    if scaler is not None and previous_day is not None:
        df_pred = df.loc[
            (df["timestamp"] >= previous_day.ctime())
            & (df["timestamp"] < current_day.ctime())
        ]
        y_pred = predict(df_pred, scaler, ae, threshold, aggregate=False)

        intervals = find_consecutive_true_np(y_pred)
        for metric_id, symptoms in enumerate(intervals):
            for symp in symptoms:
                predictions[metric_id].append(
                    [
                        df_pred["timestamp"].iloc[symp[0]].timestamp(),
                        df_pred["timestamp"].iloc[symp[1] - 1].timestamp(),
                    ]
                )

    # Retrain the model on the available data removing anomalies (simulating human validation)
    mask_train = labels[: df_today.shape[0]]["label"] == 0
    df_today = pd.concat([df_train, df_today[mask_train]], ignore_index=True)
    scaler, ae, threshold = train_model(df_today)
    previous_day = current_day

Early stopping at epoch 7


In [29]:
# Evaluation on the overall test set
f1 = eval_model(df, labels, scaler, ae, threshold)
print(f"F1 score: {round(f1,4)}")

F1 score: 0.2942


In [32]:
# Store symptoms
predictions[9]

[[1577942040.0, 1577942040.0], [1577959500.0, 1577959500.0]]