# Results 
- Plot training/test telemetry values, predictions, smoothed errors, and predicted and actual anomalies
- A specified results file from the `results` dir is used to highlight anomalous regions and read in data from the `data` dir

In [1]:
# Imports

# Update paths for custom modules
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '/home/alexey/School/Research/submodules')

import numpy as np
import os
import telemanom.helpers as helpers
from telemanom.plotting import Plotter
import pandas as pd
import plotly as py
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode
import cufflinks as cf
import glob
from tqdm import tqdm
import pickle
from mypkg import *

cf.go_offline()
init_notebook_mode(connected=True)

%load_ext autoreload
%autoreload 2

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Examine labeled_anomalies.csv

In [3]:
def examine_labels():
    # Show labeled_anomalies.csv file
    with pd.option_context('display.max_rows', 4, 'display.max_columns', 40):
        la_file = "../labeled_anomalies.csv"
        values = pd.read_csv(la_file)
        # Show full table
        display(values)

if True:
    examine_labels()
    print()
    print_anomaly_stats('MSL')
    print()
    print_anomaly_stats('SMAP')

Unnamed: 0,chan_id,spacecraft,anomaly_sequences,class,num_values
0,P-1,SMAP,"[[2149, 2349], [4536, 4844], [3539, 3779]]","[contextual, contextual, contextual]",8505
1,S-1,SMAP,"[[5300, 5747]]",[point],7331
...,...,...,...,...,...
80,M-7,MSL,"[[940, 1040]]",[point],2156
81,F-8,MSL,"[[1950, 2486]]",[contextual],2487



MSL:
       anomaly_sequences
count          27.000000
mean            1.333333
std             0.554700
min             1.000000
25%             1.000000
50%             1.000000
75%             2.000000
max             3.000000
Total anomalies: 36

SMAP:
       anomaly_sequences
count          55.000000
mean            1.254545
std             0.584307
min             1.000000
25%             1.000000
50%             1.000000
75%             1.000000
max             3.000000
Total anomalies: 69


## Don't remember what this is for...

In [4]:
def no_idea():
    fn = "P-1"
    fn_labels = np.load(f"/home/alexey/School/Research/submodules/TranAD/processed/SMAP/{fn}_labels.npy")
    print(fn_labels[2148:2150, :])

if True:
    no_idea()

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1.]]


## Print Precision and Recall

In [5]:
if True:
    print("MSL:")
    print_prec_rec_sc('MSL')
    print("SMAP:")
    print_prec_rec_sc('SMAP')

MSL:
    Precision: 0.69
    Recall: 0.96
    F0.5: 0.74
    F1.0: 0.81
SMAP:
    Precision: 0.90
    Recall: 0.84
    F0.5: 0.89
    F1.0: 0.87


## Print SMAP and MSL streams

In [6]:

if True:
    chans = get_sc_from_chan_id_all('SMAP')
    print(f"SMAP: {len(chans)}")
    print(chans)

if True:
    chans = get_sc_from_chan_id_all('MSL')
    print(f"MSL: {len(chans)}")
    print(chans)

if True:
    chan_id = "A-1"
    sc = get_sc_from_chan_id(chan_id)
    print(f"Channel {chan_id} is {sc}")
    

SMAP: 55
['P-1', 'S-1', 'E-1', 'E-2', 'E-3', 'E-4', 'E-5', 'E-6', 'E-7', 'E-8', 'E-9', 'E-10', 'E-11', 'E-12', 'E-13', 'A-1', 'D-1', 'P-2', 'P-3', 'D-2', 'D-3', 'D-4', 'A-2', 'A-3', 'A-4', 'G-1', 'G-2', 'D-5', 'D-6', 'D-7', 'F-1', 'P-4', 'G-3', 'T-1', 'T-2', 'D-8', 'D-9', 'F-2', 'G-4', 'T-3', 'D-11', 'D-12', 'B-1', 'G-6', 'G-7', 'P-7', 'R-1', 'A-5', 'A-6', 'A-7', 'D-13', 'P-2', 'A-8', 'A-9', 'F-3']
MSL: 27
['M-6', 'M-1', 'M-2', 'S-2', 'P-10', 'T-4', 'T-5', 'F-7', 'M-3', 'M-4', 'M-5', 'P-15', 'C-1', 'C-2', 'T-12', 'T-13', 'F-4', 'F-5', 'D-14', 'T-9', 'P-14', 'T-8', 'P-11', 'D-15', 'D-16', 'M-7', 'F-8']
Channel A-1 is SMAP


## Summary of parameters and results

In [7]:
#run_ids = ["2018-05-19_15.00.10", "yes_avg", "no_avg"]
#run_ids = ["yes_avg", "no_avg"]
run_ids = ["2018-05-19_15.00.10"]

if True:
    print("Run-time parameters:")
    for run_id in run_ids:
        print(f"{run_id}:\n")
        print_run_params(run_id)
        print()

if True:
    print("Results summary:")
    for run_id in run_ids:
        print(run_id)
        print_run_tp_fp_fn(run_id)
        print()
        print_run_prec_rec_all(run_id)
        print()


Run-time parameters:
2018-05-19_15.00.10:

batch_size: 70
dropout: 0.3
epochs: 35
error_buffer: 100
l_s: 250
layers: [80, 80]
loss_metric: mse
lstm_batch_size: 64
min_delta: 0.0003
n_predictions: 10
optimizer: adam
p: 0.13
patience: 10
predict: False
smoothing_perc: 0.05
train: False
validation_split: 0.2
window_size: 30

Results summary:
2018-05-19_15.00.10
    True Positives: 87
    False Negatives: 18
    False Positives: 13

Total:
    Precision: 0.83
    Recall: 0.87
    F0.5: 0.84
    F1.0: 0.85



## Interactive inline Plotly charts for viewing `y_test`, `y_hat`, and `smoothed errors (e_s)`
- **Blue** highlighted regions indicate anomalous sequences detected by the system
- If available, **Red** highlighted regions indicate true anomalous regions
- Can also optionally plot training data by setting `plot_train=True`

In [8]:
plot_channel_split_params('P-1', plot_params=[0, 20], plot_both=True)

Parsing tracks for spacecraft SMAP.


In [9]:
run_ids = ["2018-05-19_15.00.10"]
for i in range(len(run_ids)):
    channel='A-1'
    print(run_ids[i])
    plotter = Plotter(run_ids[i])
    #plotter.channel_result_summary(channel)
    plotter.plot_channel(channel, plot_errors=True, plot_both=False, plot_test=True)
    print()

2018-05-19_15.00.10
Train shape 25.00%: (2880, 25)
Test shape 75.00%: (8640, 25)
Spacecraft: SMAP
Channel: A-1
Normalized prediction error: 0.01
Anomaly class(es): [point]
------------------
True Positives: 1
False Positives: 0
False Negatives: 0
------------------


Unnamed: 0,run_id,chan_id,num_train_values,num_test_values,n_predicted_anoms,normalized_pred_error,anom_scores,false_positives,false_negatives,true_positives,fp_sequences,tp_sequences,num_true_anoms,scores,spacecraft,anomaly_sequences,class
15,2018-05-19_15.00.10,A-1,2620,8380,1,0.012467,[3.2014438937526872],0,0,1,[],"[(4730, 4869)]",1,[3.2014438937526872],SMAP,"[[4690, 4774]]",[point]


Predicted anomaly scores: [3.2014438937526872]
Number of values: 8640





## Print Percentage of Track as Train/Test Data

In [10]:
print_split_stats(sc='MSL_SMAP')

MSL_SMAP training percentages:
        Training
count  82.000000
mean   30.263684
std    11.379793
min     3.791009
25%    25.008399
50%    25.288671
75%    33.525108
max    58.333333

MSL_SMAP testing percentages:
         Testing
count  82.000000
mean   69.736316
std    11.379793
min    41.666667
25%    66.474892
50%    74.711329
75%    74.991601
max    96.208991



## Get Channel Result Information

In [11]:

run_id = run_ids[0]
channel_id = 'T-5'
result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
display(result_df[result_df['chan_id'] == channel_id])


Unnamed: 0,run_id,chan_id,num_train_values,num_test_values,n_predicted_anoms,normalized_pred_error,anom_scores,false_positives,false_negatives,true_positives,fp_sequences,tp_sequences,num_true_anoms,scores,spacecraft,anomaly_sequences,class
61,2018-05-19_15.00.10,T-5,2012,1958,1,0.00788,[12.319131674587176],0,0,1,[],"[(1114, 1381)]",1,[12.319131674587176],MSL,"[[1200, 1225]]",[point]


## Parse data

In [12]:
if True:
    parse_tracks('SMAP')
    parse_tracks('MSL')


Parsing tracks for spacecraft SMAP.
Parsing 55 tracks with 25 parameters.


Iterating over tracks: 100%|██████████| 55/55 [00:00<00:00, 1027.62it/s]

Parsing tracks for spacecraft MSL.





Parsing 27 tracks with 55 parameters.


Iterating over tracks: 100%|██████████| 27/27 [00:00<00:00, 919.50it/s]


## Print nan info

In [13]:
print("MSL:")
print_nan_info('MSL')
print()
print("SMAP:")
print_nan_info('SMAP')


MSL:
Parameters without any nans (55):
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
Parameters with some nans (0):
[]
Parameters with all nans (0):
[]
Byte parameters (0):
[]

SMAP:
Parameters without any nans (25):
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Parameters with some nans (0):
[]
Parameters with all nans (0):
[]
Byte parameters (0):
[]


In [14]:
print("MSL")
print_length_stats('MSL')
print()
print("SMAP")
print_length_stats('SMAP')
print_length_stats('MSL_SMAP')

MSL
                  0
count     27.000000
mean    4890.592593
std     2035.411795
min     1535.000000
25%     3628.000000
50%     4422.000000
75%     6064.500000
max    10408.000000
Total data points: 132046

SMAP
                  0
count     55.000000
mean   10633.818182
std     1286.217266
min     5135.000000
25%    10308.500000
50%    11174.000000
75%    11395.000000
max    11521.000000
Total data points: 584860
                  0
count     82.000000
mean    8742.756098
std     3131.666974
min     1535.000000
25%     5706.750000
50%    10400.000000
75%    11301.750000
max    11521.000000
Total data points: 716906
