### Use Antagonist to train a symptom detection model 

#### Reproducibility

In [1]:
# Torch
import torch
torch.manual_seed(0)
torch.use_deterministic_algorithms(True)

# Python
import random
random.seed(0)

# Numpy
import numpy as np
np.random.seed(0)

#### Dataset preparation

Note: the dataset needs to be downloaded using the script `download_SMD_dataset.sh` in the `scripts/antagonist-ml` folder.

In [2]:
import os 
from utils import SMD

In [3]:
data_folder = r"..\..\..\data"
db = SMD(dataset_folder=os.path.join(data_folder,'ServerMachineDataset'))

In [4]:
dataframes_train, _ = db.read_dataset(group_name="Group 1", train=True, retrieve_labels=True)
dataframes, files = db.read_dataset(group_name="Group 1", train=False, retrieve_labels=True)

In [5]:
service_idx = 0
df,service_id = dataframes[service_idx], files[service_idx]
df,labels = df[df.columns[:-2].tolist()+['timestamp']], df[['label']]
df_train = dataframes_train[service_idx]
df_train = df_train[df_train.columns[:-1].tolist()+['timestamp']]

In [6]:
network_incidents = db.get_interpretation_labels(service_id)

#### ML Model Loading

In [7]:
import datetime
import pandas as pd
from ml_ad import AENetworkAnomaly

In [23]:
current_day = datetime.datetime(2020, 1, 11)
next_day = current_day + datetime.timedelta(days=1)

In [24]:
# Create models folder
models_folder = os.path.join(data_folder,'models')
os.makedirs(models_folder,exist_ok=True)

In [25]:
model_name = f'ae_model_{(current_day).strftime("%Y%m%d")}'
model_folder = os.path.join(models_folder, model_name)

In [26]:
if os.path.exists(model_folder):
    # Train the model
    ml_model = AENetworkAnomaly.load(model_folder)
else:
    # Train new model 
    ml_model = AENetworkAnomaly(n_inputs=df.shape[1]-1)

    # Get data up to current day
    df_today = df.loc[df["timestamp"] < current_day.ctime()]

    mask_train = labels[: df_today.shape[0]]["label"] == 0
    df_today = pd.concat([df_train, df_today[mask_train]], ignore_index=True)

    # Train the model
    X_train = df_today.drop('timestamp',axis=1).values
    ml_model.fit(X_train)

    ml_model.store(model_folder)

Early stopping at epoch 8


#### Use model for detection

In [27]:
df_pred = df.loc[
    (df["timestamp"] >= current_day.ctime())
    & (df["timestamp"] < next_day.ctime())
]

X_pred = df_pred.drop('timestamp',axis=1).values
y_pred = ml_model.predict(X_pred, aggregate=False)
model_predictions = ml_model.parse_predictions(df_pred, y_pred)

### Convert to objects

In [28]:
# aggregate overlapping symptoms coming from different metrics
day_symptoms = [
    (metric_id, symptom[0], symptom[1]) for metric_id, symptoms_list in model_predictions.items() for symptom in symptoms_list 
]

# sort by starting timestamp
day_symptoms.sort(key=lambda x: x[1])

if len(day_symptoms) > 0:
    # create a list of incident in the form [(start_timestamp, end_timestamp, [symptom1, symptom2]),...]
    start = day_symptoms[0][1] 
    end = day_symptoms[0][2]
    network_incidents = [[start, end, [day_symptoms[0]]]]
    for symptom in day_symptoms[1:]:
        # if overlapping add to the current incident, new incident otherwise
        if symptom[1] <= end:
            network_incidents[-1][2].append(symptom)
            end = max(end, symptom[2])
            network_incidents[-1][1] = end
        else:
            start = symptom[1]
            end = symptom[2]
            network_incidents.append([start, end, [symptom]])

#### Store into antagonist

In [29]:
import sys 
sys.path.append("..")
from antagonist_ml.service import store_network_anomalies_labels, store_network_symptom_labels

In [32]:
for network_incident in network_incidents:

    ni_uuid = store_network_anomalies_labels(
        author_name=model_name,
        author_type="algorithm",
        author_version=1,
        description="Incident",
        start=network_incident[0],
        end=network_incident[1],
        state="incident-potential",
        version=1
    )


    for symptom in network_incident[2]:
        store_network_symptom_labels(
            author_name=model_name,
            author_type="algorithm",
            author_version=1,
            confidence=1.0,
            description="Symptom",
            start=symptom[1],
            end=symptom[2],
            version=1,
            tags={
                "machine":service_id,
                "metric_id":symptom[0]
            },
            network_anomaly_uuid=ni_uuid
        )

{"author": {"author_type": "algorithm", "name": "ae_model_20200111", "version": 1}, "description": "Incident", "start": "2020-01-11T01:07:00", "end": "2020-01-11T01:07:00", "id": "53221f60-7ed9-4451-9d9a-98543dcb15c4", "state": "incident-potential", "version": 1}
{"confidence-score": 1.0, "description": "Symptom", "start-time": "2020-01-11T01:07:00", "end-time": "2020-01-11T01:07:00", "event-id": "6cccfcda-9871-4c61-9ad0-80b44aa45fcf", "id": "5f39e4a9-3fd5-46c2-a870-3dd924ab7213", "source-name": "ae_model_20200111_1", "source-type": "algorithm", "tags": {"machine": "machine-1-1.txt", "metric_id": 33}}
{"symptom-id": "5f39e4a9-3fd5-46c2-a870-3dd924ab7213", "incident-id": "53221f60-7ed9-4451-9d9a-98543dcb15c4"}
{"author": {"author_type": "algorithm", "name": "ae_model_20200111", "version": 1}, "description": "Incident", "start": "2020-01-11T02:40:00", "end": "2020-01-11T02:41:00", "id": "c609c298-eaee-4226-81a7-1ae609cf2561", "state": "incident-potential", "version": 1}
{"confidence-scor