# Results 
- Plot training/test telemetry values, predictions, smoothed errors, and predicted and actual anomalies
- A specified results file from the `results` dir is used to highlight anomalous regions and read in data from the `data` dir

In [2]:
import sys
import numpy as np
import os
sys.path.insert(0, '..')
import telemanom.helpers as helpers
from telemanom.plotting import Plotter
import pandas as pd
import plotly as py
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode
import cufflinks as cf
import glob
cf.go_offline()
init_notebook_mode(connected=True)

%load_ext autoreload
%autoreload 2

In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Examine labeled_anomalies.csv

In [4]:
with pd.option_context('display.max_colwidth', 400):
    la_file = "../labeled_anomalies.csv"
    values = pd.read_csv(la_file)
    values = values[values["spacecraft"] == "SMAP"]
    print(values.columns)
    print(values.head())
    print()
    fn = "P-1"
    indices = values[values['chan_id'] == fn]["anomaly_sequences"].values[0]
    print(indices)
    print(type(indices))
    indices = indices.replace(']', '').replace('[', '').split(', ')
    print(indices)
    print(type(indices))
    val = np.load(f"/home/alexey/School/Research/submodules/telemanom/data/test/{fn}.npy")
    labels = np.zeros(val.shape)
    indices = [int(i) for i in indices]
    for i in range(0, len(indices), 2):
        print(f"[{indices[i]}:{indices[i+1]}, :]")
        labels[indices[i]:indices[i+1], :] = 1

Index(['chan_id', 'spacecraft', 'anomaly_sequences', 'class', 'num_values'], dtype='object')
  chan_id spacecraft                           anomaly_sequences  \
0     P-1       SMAP  [[2149, 2349], [4536, 4844], [3539, 3779]]   
1     S-1       SMAP                              [[5300, 5747]]   
2     E-1       SMAP                [[5000, 5030], [5610, 6086]]   
3     E-2       SMAP                              [[5598, 6995]]   
4     E-3       SMAP                              [[5094, 8306]]   

                                  class  num_values  
0  [contextual, contextual, contextual]        8505  
1                               [point]        7331  
2              [contextual, contextual]        8516  
3                               [point]        8532  
4                               [point]        8307  

[[2149, 2349], [4536, 4844], [3539, 3779]]
<class 'str'>
['2149', '2349', '4536', '4844', '3539', '3779']
<class 'list'>
[2149:2349, :]
[4536:4844, :]
[3539:3779, :]


In [5]:
fn = "P-1"
fn_labels = np.load(f"/home/alexey/School/Research/submodules/TranAD/processed/SMAP/{fn}_labels.npy")
print(fn_labels[2148:2150, :])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1.]]


## Print Precision and Recall

In [6]:
run_ids = ["2018-05-19_15.00.10", "yes_avg", "no_avg"]

for run_id in run_ids:
    result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))

    print(run_id)

    sum_df = result_df.loc[:55,["true_positives", "false_negatives", "false_positives"]]
    sum_df = sum_df.sum()

    print("  SMAP:")
    precision = float(sum_df['true_positives']) / float(sum_df['true_positives'] + sum_df['false_positives'])
    recall = float(sum_df['true_positives']) / float(sum_df['true_positives'] + sum_df['false_negatives'])
    f = lambda beta: (1. + beta**2) * ( (precision*recall) / ( ((beta**2)*precision) + recall )) 
    print('    Precision: {0:.2f}'.format(precision))
    print('    Recall: {0:.2f}'.format(recall))
    print('    F0.5: {0:.2f}'.format(f(0.5)))
    print('    F1.0: {0:.2f}'.format(f(1.0)))
    
    print()

2018-05-19_15.00.10
  SMAP:
    Precision: 0.84
    Recall: 0.90
    F0.5: 0.85
    F1.0: 0.87

yes_avg
  SMAP:
    Precision: 0.84
    Recall: 0.90
    F0.5: 0.85
    F1.0: 0.87

no_avg
  SMAP:
    Precision: 0.92
    Recall: 0.87
    F0.5: 0.91
    F1.0: 0.90



## Print SMAP and MSL streams

In [7]:
run_id = "2018-05-19_15.00.10"
result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
SMAP_IDs = result_df[result_df["spacecraft"] == "SMAP"]\
    .loc[:, ["chan_id"]]
print(f"SMAP ({len(SMAP_IDs)}):")
print(SMAP_IDs)
print()
MSL_IDs = result_df[result_df["spacecraft"] == "MSL"]\
    .loc[:, ["chan_id"]]
print(f"MSL ({len(MSL_IDs)}):")
print(MSL_IDs)

SMAP (55):
   chan_id
0      P-1
1      S-1
2      E-1
3      E-2
4      E-3
5      E-4
6      E-5
7      E-6
8      E-7
9      E-8
10     E-9
11    E-10
12    E-11
13    E-12
14    E-13
15     A-1
16     D-1
17     P-2
18     P-3
19     D-2
20     D-3
21     D-4
22     A-2
23     A-3
24     A-4
25     G-1
26     G-2
27     D-5
28     D-6
29     D-7
30     F-1
31     P-4
32     G-3
33     T-1
34     T-2
35     D-8
36     D-9
37     F-2
38     G-4
39     T-3
40    D-11
41    D-12
42     B-1
43     G-6
44     G-7
45     P-7
46     R-1
47     A-5
48     A-6
49     A-7
50    D-13
51     P-2
52     A-8
53     A-9
54     F-3

MSL (27):
   chan_id
55     M-6
56     M-1
57     M-2
58     S-2
59    P-10
60     T-4
61     T-5
62     F-7
63     M-3
64     M-4
65     M-5
66    P-15
67     C-1
68     C-2
69    T-12
70    T-13
71     F-4
72     F-5
73    D-14
74     T-9
75    P-14
76     T-8
77    P-11
78    D-15
79    D-16
80     M-7
81     F-8


In [8]:
chan_id = "S-1"
val = np.load(f"/home/alexey/School/Research/submodules/telemanom/data/test/{chan_id}.npy")
print(val.shape)


(7331, 25)


## Select set of results to visualize

In [50]:
#run_ids = ["2018-05-19_15.00.10", "yes_avg", "no_avg"]
#run_ids = ["yes_avg", "no_avg"]
run_ids = ["2018-05-19_15.00.10"]

In [10]:
for k in range(len(run_ids)):
    with open('../data/{}/params.log'.format(run_ids[k]), 'r') as f:
        print(run_ids[k])
        for i,row in enumerate(f.readlines()):
            if len(row) < 2:
                break
            if i > 0:
                print(' '.join(row.split(' ')[3:]).replace('\n',''))

2018-05-19_15.00.10
----------------
batch_size: 70
dropout: 0.3
epochs: 35
error_buffer: 100
l_s: 250
layers: [80, 80]
loss_metric: mse
lstm_batch_size: 64
min_delta: 0.0003
n_predictions: 10
optimizer: adam
p: 0.13
patience: 10
predict: False
smoothing_perc: 0.05
train: False
validation_split: 0.2
window_size: 30
----------------


## Summary of results

In [11]:
for k in range(len(run_ids)):
    print(run_ids[k])
    plotter = Plotter(run_ids[k])
    plotter.all_result_summary()
    print()

2018-05-19_15.00.10
True Positives: 87
False Positives: 13
False Negatives: 18

Precision: 0.87
Recall: 0.83



## Interactive inline Plotly charts for viewing `y_test`, `y_hat`, and `smoothed errors (e_s)`
- **Blue** highlighted regions indicate anomalous sequences detected by the system
- If available, **Red** highlighted regions indicate true anomalous regions
- Can also optionally plot training data by setting `plot_train=True`

In [48]:
for i in range(len(run_ids)):
    channel='T-5'
    print(run_ids[i])
    plotter = Plotter(run_ids[i])
    #plotter.channel_result_summary(channel)
    plotter.plot_channel(channel, plot_errors=True, plot_both=True)
    print()

2018-05-19_15.00.10
Train shape 50.60%: (2272, 55)
Test shape 49.40%: (2218, 55)
Spacecraft: MSL
Channel: T-5
Normalized prediction error: 0.01
Anomaly class(es): [point]
------------------
True Positives: 1
False Positives: 0
False Negatives: 0
------------------
Predicted anomaly scores: [12.319131674587176]
Number of values: 2218





## Print Percentage of Track as Train/Test Data

In [61]:
files = [f.split('.')[0] for f in os.listdir('../data/test')]
files.sort()

for channel_id in files:
    plot_values = {
        'test': np.load(os.path.join('..', 'data', 'test', '{}.npy'
                                        .format(channel_id))),
        'train': np.load(os.path.join('..', 'data', 'train', '{}.npy'
                                        .format(channel_id)))
    }

    train_data = plot_values['train'].shape[0]
    test_data = plot_values['test'].shape[0]
    total_data = train_data+test_data
    print(f"ID {channel_id}:")
    print(f"    Train shape {train_data/total_data*100:>5,.2f}%:", plot_values['train'].shape)
    print(f"    Test shape  {test_data/total_data*100:>5,.2f}%:", plot_values['test'].shape)

ID A-1:
    Train shape 25.00%: (2880, 25)
    Test shape  75.00%: (8640, 25)
ID A-2:
    Train shape 25.07%: (2648, 25)
    Test shape  74.93%: (7914, 25)
ID A-3:
    Train shape 25.01%: (2736, 25)
    Test shape  74.99%: (8205, 25)
ID A-4:
    Train shape 24.98%: (2690, 25)
    Test shape  75.02%: (8080, 25)
ID A-5:
    Train shape 13.06%: (705, 25)
    Test shape  86.94%: (4693, 25)
ID A-6:
    Train shape 13.28%: (682, 25)
    Test shape  86.72%: (4453, 25)
ID A-7:
    Train shape 25.01%: (2879, 25)
    Test shape  74.99%: (8631, 25)
ID A-8:
    Train shape  8.34%: (762, 25)
    Test shape  91.66%: (8375, 25)
ID A-9:
    Train shape  8.29%: (762, 25)
    Test shape  91.71%: (8434, 25)
ID B-1:
    Train shape 23.24%: (2435, 25)
    Test shape  76.76%: (8044, 25)
ID C-1:
    Train shape 48.80%: (2158, 55)
    Test shape  51.20%: (2264, 55)
ID C-2:
    Train shape 27.14%: (764, 55)
    Test shape  72.86%: (2051, 55)
ID D-1:
    Train shape 25.08%: (2849, 25)
    Test shape  74.92%: (8

## Get Channel Result Information

In [62]:

run_id = run_ids[0]
channel_id = 'T-5'
result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
display(result_df[result_df['chan_id'] == channel_id])


Unnamed: 0,run_id,chan_id,num_train_values,num_test_values,n_predicted_anoms,normalized_pred_error,anom_scores,false_positives,false_negatives,true_positives,fp_sequences,tp_sequences,num_true_anoms,scores,spacecraft,anomaly_sequences,class
61,2018-05-19_15.00.10,T-5,2012,1958,1,0.00788,[12.319131674587176],0,0,1,[],"[(1114, 1381)]",1,[12.319131674587176],MSL,"[[1200, 1225]]",[point]
