### Retrieve the telemetry data from InfluxDB

In [None]:
import datetime
import sys
sys.path.append('..')
import influx_db_utils as influx

# TOTAL_LEN_OF_DATA = 40  # Total number of days of data in the DB

group = "Group-1"
machine_id = 'machine-1-1'
# training_data_len = 20  # Number of days of data to use for training

# Read historical data from InfluxDB
end = datetime.datetime.now()
start = end - datetime.timedelta(days=365)

db = influx.SMDInfluxDB()
dataframes, machines = db.read_dataset(
    start_date=start,
    end_date=end,
    machine_name=machine_id,
)
historical_telemetry_df = dataframes[0]  # pandas.DataFrame
historical_telemetry_df = historical_telemetry_df[
    historical_telemetry_df.columns[1:].tolist()+['timestamp']]


In [None]:
# Filter up to current day to simulate the predition on the next one
current_day = historical_telemetry_df['timestamp'].min() + datetime.timedelta(days=36)
next_day = current_day + datetime.timedelta(days=1)

In [None]:
historical_telemetry_df = dataframes[0]
historical_telemetry_df = historical_telemetry_df[historical_telemetry_df['timestamp']<current_day.ctime()]

### Retrieve the anomaly labels validated by network experts (from Antagonist)

In [None]:
import requests


def format_symptoms(symptoms_json, start, end):
    # TODO: Move the filtering capabilities inside Antagonist
    source_type = "human"
    tags={"machine": machine_id}
    
    symptoms = []
    for symptom in symptoms_json:
        start_time = datetime.datetime.strptime(symptom['start-time'], '%a, %d %b %Y %H:%M:%S %Z').timestamp()
        end_time = datetime.datetime.strptime(symptom['end-time'], '%a, %d %b %Y %H:%M:%S %Z').timestamp()

        # TODO: Move the filtering capabilities inside Antagonist

        # verify overlap between symptom interval and filters one
        time_overlap = (start.timestamp() <= start_time <= end.timestamp()) or (start.timestamp() <= end_time <= end.timestamp())
        if (source_type is None or symptom["source-type"] == source_type) and time_overlap:
            if tags is None or all([symptom["tags"][tag] == tags[tag] for tag in tags]):
                symptom.update({
                    "start-time": start_time,
                    "end-time": end_time
                })
                symptoms.append(symptom)
    return symptoms


ANTAGONIST_HOST = "localhost:5001"
response = requests.get(f"http://{ANTAGONIST_HOST}/api/rest/v1/symptom")
response.raise_for_status()

# symptoms = format_symptoms(response.json(), start, end)
symptoms = response.json()


### Preparing Label Data

In [None]:
import pandas as pd
import numpy as np

annotation_df = pd.DataFrame()
annotation_df['timestamp'] = historical_telemetry_df['timestamp']
annotation_df['label'] = 0

for symptom in symptoms:
    start_time_epoch = pd.Timestamp(symptom['start-time'], unit="s", tz="UTC").timestamp()
    end_time_epoch = pd.Timestamp(symptom['end-time'], unit="s", tz="UTC").timestamp()
    
    if (end_time_epoch - start_time_epoch) > 86400:
        # Skip anomalies bigger than 1 day
        continue

    # print(symptom['start-time'])
    # print(symptom['end-time'])

    annotation_df.loc[
        (annotation_df['timestamp'] >= pd.Timestamp(symptom['start-time'], unit="s", tz="UTC")) &
        (annotation_df['timestamp'] <= pd.Timestamp(symptom['end-time'], unit="s", tz="UTC"))
    , 'label'] = 1
    # print(annotation_df[annotation_df['label'] == 1].shape)

In [None]:
annotation_df[annotation_df['label'] == 1].shape

### Load the current anomaly detector and generate the detection labels on the current data

In [None]:
import sys
sys.path.append('..')
from demo_anomaly_detector import autoencoder_detector
current_anomaly_detector = autoencoder_detector.DemoAnomalyDetector()
network_anomalies_current_model = current_anomaly_detector.detect(historical_telemetry_df)

### Create an Anomaly Detector object and train it on the labels that were retrieved from Antagonist

In [None]:
import sys
sys.path.append('..')
from demo_anomaly_detector import autoencoder_detector
new_anomaly_detector = autoencoder_detector.DemoAnomalyDetector()
new_anomaly_detector.train(historical_telemetry_df, annotation_df, force=True)

In [None]:
network_anomalies_new_model = new_anomaly_detector.detect(historical_telemetry_df)

### Compare the detector that was just trained with the one currently "in production"

In [None]:
import copy
import pandas as pd

def estimate_performance(network_anomalies, annotation_df):
    res = copy.deepcopy(annotation_df)
    res.set_index('timestamp', inplace=True)
    for network_anomaly in network_anomalies:
        for symptom in network_anomaly[2]:
            symptom_start_time = pd.Timestamp(symptom[1], unit="s", tz="UTC")
            symptom_end_time = pd.Timestamp(symptom[2], unit="s", tz="UTC")
            symptom_df = pd.DataFrame({'timestamp': pd.date_range(start=symptom_start_time, end=symptom_end_time, freq='1min', tz="UTC"), 'predicted_label': 1})
            symptom_df.set_index('timestamp', inplace=True)
            res = pd.concat([res, symptom_df], sort=False)
    false_positives = res[(res['label'] != res['predicted_label']) & (pd.isnull(res['label']))]
    false_negatives = res[(res['label'] != res['predicted_label']) & (pd.isnull(res['predicted_label']))]
    return len(false_positives), len(false_negatives)

current_fp, current_fn = estimate_performance(network_anomalies_current_model, annotation_df)
new_fp, new_fn = estimate_performance(network_anomalies_new_model, annotation_df)

In [None]:
print(f"Current model: FP={current_fp}, FN={current_fn}")
print(f"New model: FP={new_fp}, FN={new_fn}")

# Very simple comparison strategy to select the best model
if new_fp + new_fn < current_fp + current_fn:
    network_anomalies = network_anomalies_new_model
    anomaly_detector = new_anomaly_detector
else:
    # If the new model is not selected, it needs to be deleted
    new_anomaly_detector.delete()
    anomaly_detector = current_anomaly_detector

### Push the labels for the detected symptoms into Antagonist

In [None]:
def format_network_anomalies_for_antagonist(network_anomalies, tags, model_name):
    res = list()
    for anomaly in network_anomalies:
        symptoms = list()
        for symptom in anomaly[2]:
            tags = tags
            tags["metric"] = db.get_metric_names()[symptom[0]]
            symptom_dict = {
                'start-time': datetime.datetime.fromtimestamp(symptom[1]).strftime('%Y-%m-%dT%H:%M:%S'),
                'end-time': datetime.datetime.fromtimestamp(symptom[2]).strftime('%Y-%m-%dT%H:%M:%S'),
                "concern-score": symptom[3],
                "confidence-score": symptom[4],
                "description": "Symptom",
                "source-name": f"{model_name}",
                "source-type": "algorithm",
                "tags": tags,
                "action": "drop",
                "cause": "x",
                "reason": "x",
                "plane": "forwarding",
                "pattern": "",
            }
            symptoms.append(symptom_dict)

        anomaly_dict = {
            "author": {
                "author_type": "algorithm",
                "name": anomaly_detector.get_model_name(),
                "version": 1,
            },
            "description": f'Detected Network Anomaly on {machine_id} - {datetime.datetime.fromtimestamp(anomaly[0]).strftime("%Y-%m-%d at %H")}',
            "state": "incident-potential",
            "version": 1,
            "symptoms": symptoms
        }
        res.append(anomaly_dict)
    return res


tags = {'machine': machine_id, 'group': group}
anomalies_to_store = format_network_anomalies_for_antagonist(network_anomalies, tags, anomaly_detector.get_model_name())

### Send data to Antagonist

In [None]:
### Send the data to Antagonist
for network_anomaly in anomalies_to_store:
    symptoms = network_anomaly.pop("symptoms")
    response = requests.post(
        f"http://{ANTAGONIST_HOST}/api/rest/v1/incident", json=network_anomaly
    )
    response.raise_for_status()
    network_anomaly_id = response.json()

    # Create network symptoms labels and link with the network incident
    for symptom in symptoms:
        symptom['event-id'] = network_anomaly_id
        # Persist the Symptom
        response = requests.post(
            f"http://{ANTAGONIST_HOST}/api/rest/v1/symptom", json=symptom
        )
        response.raise_for_status()
        symptom_uuid = response.json()

        # Link the Symptom to the network anomaly
        sym_to_net = {"symptom-id": symptom_uuid, "incident-id": network_anomaly_id}
        response = requests.post(
            f"http://{ANTAGONIST_HOST}/api/rest/v1/incident/symptom", json=symptom
        )
        response.raise_for_status()