### Create an Anomaly Detector object

In [1]:
import sys
sys.path.append('..')
from demo_anomaly_detector import autoencoder_detector

# If a model has been pre-trained, it will be loaded automatically
anomaly_detector = autoencoder_detector.DemoAnomalyDetector()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### Retrieve the telemetry data from InfluxDB

In [4]:
import datetime
import sys
sys.path.append('..')
import influx_db_utils as influx

machine_id = 'machine-1-1'

# Read historical data from InfluxDB
# now = datetime.datetime.now()
# end = now - datetime.timedelta(days=1)
# start = end - datetime.timedelta(days=20)

end = datetime.datetime.now()
start = end - datetime.timedelta(days=365)

db = influx.SMDInfluxDB()
dataframes, machines = db.read_dataset(
    start_date=start,
    end_date=end,
    machine_name=machine_id,
)
telemetry_df = dataframes[0]  # pandas.DataFrame
telemetry_df = telemetry_df[
    telemetry_df.columns[1:].tolist()+['timestamp']]

KeyboardInterrupt: 

### Select a time window as training data

In [None]:
# Filter up to current day to simulate the predition on the next one
current_day = telemetry_df['timestamp'].min() + datetime.timedelta(days=32)
next_day = current_day + datetime.timedelta(days=1)
telemetry_df = telemetry_df[telemetry_df['timestamp']<current_day.ctime()]

### If the model has never been trained before train it now!

In [None]:
def format_symptoms(symptoms_json, start, end):
    # TODO: Move the filtering capabilities inside Antagonist
    source_type = "human"
    tags={"machine": machine_id}
    
    symptoms = []
    for symptom in symptoms_json:
        start_time = datetime.datetime.strptime(symptom['start-time'], '%a, %d %b %Y %H:%M:%S %Z').timestamp()
        end_time = datetime.datetime.strptime(symptom['end-time'], '%a, %d %b %Y %H:%M:%S %Z').timestamp()

        # TODO: Move the filtering capabilities inside Antagonist

        # verify overlap between symptom interval and filters one
        time_overlap = (start.timestamp() <= start_time <= end.timestamp()) or (start.timestamp() <= end_time <= end.timestamp())
        if (source_type is None or symptom["source-type"] == source_type) and time_overlap:
            if tags is None or all([symptom["tags"][tag] == tags[tag] for tag in tags]):
                symptom.update({
                    "start-time": start_time,
                    "end-time": end_time
                })
                symptoms.append(symptom)
    return symptoms


if not anomaly_detector.is_trained():
    import requests
    import pandas as pd
    import numpy as np

    # Get labels from Antagonist
    ANTAGONIST_HOST = "localhost:5001"
    response = requests.get(f"http://{ANTAGONIST_HOST}/api/rest/v1/symptom")
    response.raise_for_status()
    symptoms = response.json()

    # Format labels for the training
    annotation_df = pd.DataFrame()
    annotation_df['timestamp'] = telemetry_df['timestamp']
    annotation_df['label'] = 0

    for symptom in symptoms:
        start_time_epoch = pd.Timestamp(symptom['start-time'], unit="s", tz="UTC").timestamp()
        end_time_epoch = pd.Timestamp(symptom['end-time'], unit="s", tz="UTC").timestamp()
        
        # For now, skip anomalies bigger than 1 day
        if (end_time_epoch - start_time_epoch) > 86400:
            continue
        
        annotation_df.loc[
            (annotation_df['timestamp'] >= pd.Timestamp(symptom['start-time'], unit="s", tz="UTC")) &
            (annotation_df['timestamp'] <= pd.Timestamp(symptom['end-time'], unit="s", tz="UTC"))
        , 'label'] = 1

    # Train the model
    anomaly_detector.train(telemetry_df, annotation_df)

In [None]:
network_anomalies = anomaly_detector.detect(telemetry_df)

In [None]:
import time
import requests

ANTAGONIST_HOST = "localhost:5001"

group = "Group-1"

## Send the data to Antagonist
for network_anomaly in network_anomalies:

    # Create network anomaly label
    net_anomaly = {
        "annotator": {
            "name": anomaly_detector.get_model_name(),
            "annotator_type": "algorithm"
        },
        "description": f'Detected Network Anomaly on {machine_id} - {datetime.datetime.fromtimestamp(network_anomaly[0]).strftime("%Y-%m-%d at %H")}',
        "state": "incident-potential",
        "version": 1
    }
    response = requests.post(
        f"http://{ANTAGONIST_HOST}/api/rest/v1/network_anomaly", json=net_anomaly
    )
    response.raise_for_status()
    ni_uuid = response.json()

    # Create network symptoms labels and link with the network incident
    for symptom in network_anomaly[2]:
        tags = {
            "machine": machine_id,
            "metric": db.get_metric_names()[symptoám[0]],
            "group": group,
        }

        net_sym = {
            'start-time': datetime.datetime.fromtimestamp(symptom[1]).strftime("%Y-%m-%dT%H:%M:%S"),
            'end-time': datetime.datetime.fromtimestamp(symptom[2]).strftime("%Y-%m-%dT%H:%M:%S"),
            "event-id": ni_uuid,
            "concern-score": symptom[3],
            "confidence-score": symptom[4],
            "description": "Symptom",
            "pattern": "",
            "tags": tags,
            "annotator": {
                "name": f"{anomaly_detector.get_model_name()}",
                "annotator_type": "algorithm"
            }
        }

        # Persist the Symptom
        response = requests.post(
            f"http://{ANTAGONIST_HOST}/api/rest/v1/symptom", json=net_sym
        )
        response.raise_for_status()
        symptom_uuid = response.json()

        # Link the Symptom to the network anomaly
        sym_to_net = {"symptom-id": symptom_uuid, "incident-id": ni_uuid}
        response = requests.post(
            f"http://{ANTAGONIST_HOST}/api/rest/v1/network_anomaly/symptom", json=sym_to_net
        )
        response.raise_for_status()
