# Results 
- Plot training/test telemetry values, predictions, smoothed errors, and predicted and actual anomalies
- A specified results file from the `results` dir is used to highlight anomalous regions and read in data from the `data` dir

In [29]:
# Imports

# Update paths for custom modules
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '/home/alexey/School/Research/submodules')

import numpy as np
import os
import telemanom.helpers as helpers
from telemanom.plotting import Plotter
import pandas as pd
import plotly as py
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode
import cufflinks as cf
import glob
from tqdm import tqdm
import pickle
from mypkg import *

# Can remove later:
print(get_sc_from_chan_id("P-1"))
plot_channel_params('A-1', plot_both=True)

cf.go_offline()
init_notebook_mode(connected=True)

%load_ext autoreload
%autoreload 2

SMAP


<class 'plotly.graph_objs._figure.Figure'>


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Examine labeled_anomalies.csv

In [6]:
def examine_labels():
    # Show labeled_anomalies.csv file
    with pd.option_context('display.max_rows', 4, 'display.max_columns', 40):
        la_file = "../labeled_anomalies.csv"
        values = pd.read_csv(la_file)
        # Show full table
        display(values)

if True:
    examine_labels()
    print()
    print_anomaly_stats('MSL')
    print()
    print_anomaly_stats('SMAP')

Unnamed: 0,chan_id,spacecraft,anomaly_sequences,class,num_values
0,P-1,SMAP,"[[2149, 2349], [4536, 4844], [3539, 3779]]","[contextual, contextual, contextual]",8505
1,S-1,SMAP,"[[5300, 5747]]",[point],7331
...,...,...,...,...,...
80,M-7,MSL,"[[940, 1040]]",[point],2156
81,F-8,MSL,"[[1950, 2486]]",[contextual],2487



MSL:
       anomaly_sequences
count          27.000000
mean            1.333333
std             0.554700
min             1.000000
25%             1.000000
50%             1.000000
75%             2.000000
max             3.000000
Total anomalies: 36

SMAP:
       anomaly_sequences
count          55.000000
mean            1.254545
std             0.584307
min             1.000000
25%             1.000000
50%             1.000000
75%             1.000000
max             3.000000
Total anomalies: 69


## Don't remember what this is for...

In [7]:
def no_idea():
    fn = "P-1"
    fn_labels = np.load(f"/home/alexey/School/Research/submodules/TranAD/processed/SMAP/{fn}_labels.npy")
    print(fn_labels[2148:2150, :])

if True:
    no_idea()

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1.]]


## Print Precision and Recall

In [8]:
def test():
    run_id = "2018-05-19_15.00.10"
    result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
    sum_df = result_df[result_df["spacecraft"]=="SMAP"].loc[:,["true_positives", "false_negatives", "false_positives"]]
    sum_df = sum_df.sum()
    print(sum_df)
    print(sum_df['false_negatives'])

test()

true_positives     62
false_negatives     7
false_positives    12
dtype: int64
7


In [9]:
if True:
    run_ids = ["2018-05-19_15.00.10"]
    #run_ids = ["2018-05-19_15.00.10", "yes_avg", "no_avg"]
    for run_id in run_ids:
        print("MSL:")
        print_prec_rec_sc(run_id, 'MSL')
        print("SMAP:")
        print_prec_rec_sc(run_id, 'SMAP')

MSL:
    Precision: 0.69
    Recall: 0.96
    F0.5: 0.74
    F1.0: 0.81
SMAP:
    Precision: 0.90
    Recall: 0.84
    F0.5: 0.89
    F1.0: 0.87


## Print SMAP and MSL streams

In [10]:

if True:
    chans = get_sc_from_chan_id_all('SMAP')
    print(f"SMAP: {len(chans)}")
    print(chans)

if True:
    chans = get_sc_from_chan_id_all('MSL')
    print(f"MSL: {len(chans)}")
    print(chans)

if True:
    chan_id = "A-1"
    sc = get_sc_from_chan_id(chan_id)
    print(f"Channel {chan_id} is {sc}")
    

SMAP: 55
['P-1', 'S-1', 'E-1', 'E-2', 'E-3', 'E-4', 'E-5', 'E-6', 'E-7', 'E-8', 'E-9', 'E-10', 'E-11', 'E-12', 'E-13', 'A-1', 'D-1', 'P-2', 'P-3', 'D-2', 'D-3', 'D-4', 'A-2', 'A-3', 'A-4', 'G-1', 'G-2', 'D-5', 'D-6', 'D-7', 'F-1', 'P-4', 'G-3', 'T-1', 'T-2', 'D-8', 'D-9', 'F-2', 'G-4', 'T-3', 'D-11', 'D-12', 'B-1', 'G-6', 'G-7', 'P-7', 'R-1', 'A-5', 'A-6', 'A-7', 'D-13', 'P-2', 'A-8', 'A-9', 'F-3']
MSL: 27
['M-6', 'M-1', 'M-2', 'S-2', 'P-10', 'T-4', 'T-5', 'F-7', 'M-3', 'M-4', 'M-5', 'P-15', 'C-1', 'C-2', 'T-12', 'T-13', 'F-4', 'F-5', 'D-14', 'T-9', 'P-14', 'T-8', 'P-11', 'D-15', 'D-16', 'M-7', 'F-8']
Channel A-1 is SMAP


## Summary of parameters and results

In [6]:
#run_ids = ["2018-05-19_15.00.10", "yes_avg", "no_avg"]
#run_ids = ["yes_avg", "no_avg"]
run_ids = ["2018-05-19_15.00.10"]

if True:
    print("Run-time parameters:")
    for run_id in run_ids:
        print(f"{run_id}:\n")
        print_run_params(run_id)
        print()

if True:
    print("Results summary:")
    for run_id in run_ids:
        print(run_id)
        print_run_tp_fp_fn(run_id)
        print()
        print_prec_rec_all(run_id)
        print()


Run-time parameters:
2018-05-19_15.00.10:

batch_size: 70
dropout: 0.3
epochs: 35
error_buffer: 100
l_s: 250
layers: [80, 80]
loss_metric: mse
lstm_batch_size: 64
min_delta: 0.0003
n_predictions: 10
optimizer: adam
p: 0.13
patience: 10
predict: False
smoothing_perc: 0.05
train: False
validation_split: 0.2
window_size: 30

Results summary:
2018-05-19_15.00.10
    True Positives: 87
    False Negatives: 18
    False Positives: 13

Total:
    Precision: 0.83
    Recall: 0.87
    F0.5: 0.84
    F1.0: 0.85



## Interactive inline Plotly charts for viewing `y_test`, `y_hat`, and `smoothed errors (e_s)`
- **Blue** highlighted regions indicate anomalous sequences detected by the system
- If available, **Red** highlighted regions indicate true anomalous regions
- Can also optionally plot training data by setting `plot_train=True`

In [17]:
run_ids = ["2018-05-19_15.00.10"]
for i in range(len(run_ids)):
    channel='A-1'
    print(run_ids[i])
    plotter = Plotter(run_ids[i])
    #plotter.channel_result_summary(channel)
    plotter.plot_channel(channel, plot_errors=True, plot_both=False, plot_test=True)
    print()

2018-05-19_15.00.10
Train shape 25.00%: (2880, 25)
Test shape 75.00%: (8640, 25)
Spacecraft: SMAP
Channel: A-1
Normalized prediction error: 0.01
Anomaly class(es): [point]
------------------
True Positives: 1
False Positives: 0
False Negatives: 0
------------------
Predicted anomaly scores: [3.2014438937526872]
Number of values: 8640





## Print Percentage of Track as Train/Test Data

In [None]:
files = [f.split('.')[0] for f in os.listdir('../data/test')]
files.sort()

for channel_id in files:
    plot_values = {
        'test': np.load(os.path.join('..', 'data', 'test', '{}.npy'
                                        .format(channel_id))),
        'train': np.load(os.path.join('..', 'data', 'train', '{}.npy'
                                        .format(channel_id)))
    }

    train_data = plot_values['train'].shape[0]
    test_data = plot_values['test'].shape[0]
    total_data = train_data+test_data
    print(f"ID {channel_id}:")
    print(f"    Train shape {train_data/total_data*100:>5,.2f}%:", plot_values['train'].shape)
    print(f"    Test shape  {test_data/total_data*100:>5,.2f}%:", plot_values['test'].shape)

ID A-1:
    Train shape 25.00%: (2880, 25)
    Test shape  75.00%: (8640, 25)
ID A-2:
    Train shape 25.07%: (2648, 25)
    Test shape  74.93%: (7914, 25)
ID A-3:
    Train shape 25.01%: (2736, 25)
    Test shape  74.99%: (8205, 25)
ID A-4:
    Train shape 24.98%: (2690, 25)
    Test shape  75.02%: (8080, 25)
ID A-5:
    Train shape 13.06%: (705, 25)
    Test shape  86.94%: (4693, 25)
ID A-6:
    Train shape 13.28%: (682, 25)
    Test shape  86.72%: (4453, 25)
ID A-7:
    Train shape 25.01%: (2879, 25)
    Test shape  74.99%: (8631, 25)
ID A-8:
    Train shape  8.34%: (762, 25)
    Test shape  91.66%: (8375, 25)
ID A-9:
    Train shape  8.29%: (762, 25)
    Test shape  91.71%: (8434, 25)
ID B-1:
    Train shape 23.24%: (2435, 25)
    Test shape  76.76%: (8044, 25)
ID C-1:
    Train shape 48.80%: (2158, 55)
    Test shape  51.20%: (2264, 55)
ID C-2:
    Train shape 27.14%: (764, 55)
    Test shape  72.86%: (2051, 55)
ID D-1:
    Train shape 25.08%: (2849, 25)
    Test shape  74.92%: (8

## Get Channel Result Information

In [None]:

run_id = run_ids[0]
channel_id = 'T-5'
result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
display(result_df[result_df['chan_id'] == channel_id])


Unnamed: 0,run_id,chan_id,num_train_values,num_test_values,n_predicted_anoms,normalized_pred_error,anom_scores,false_positives,false_negatives,true_positives,fp_sequences,tp_sequences,num_true_anoms,scores,spacecraft,anomaly_sequences,class
61,2018-05-19_15.00.10,T-5,2012,1958,1,0.00788,[12.319131674587176],0,0,1,[],"[(1114, 1381)]",1,[12.319131674587176],MSL,"[[1200, 1225]]",[point]


In [None]:
# Get all of the useful information

run_id = "2018-05-19_15.00.10"
parse_tracks = True
parse_save_dir = '/home/alexey/School/Research/submodules/telemanom/EDA'
parse_sub_dir = os.path.join(parse_save_dir, 'data')

# NOTE: T-10 is neither SMAP or MSL
def parse_tracks_func(sc):
    print(f"Parsing tracks for spacecraft {sc}.")

    # Get file names of test and train
    test_files = [f.split('.')[0] for f in os.listdir('../data/test')]
    test_files.sort()
    train_files = [f.split('.')[0] for f in os.listdir('../data/train')]
    train_files.sort()

    # Sanity check: same file names
    # Corrolaries:  same number of training and testing tracks
    if test_files != train_files:
        raise Exception("Error: training and testing files aren't 1-to-1.")

    # Load labeled data
    result_df = pd.read_csv(os.path.join('..', 'results', '{}.csv'.format(run_id)))
    SMAP_IDs = result_df[result_df["spacecraft"] == "SMAP"].loc[:, ["chan_id"]]
    MSL_IDs = result_df[result_df["spacecraft"] == "MSL"].loc[:, ["chan_id"]]

    # Filter test and train files for specific spacecraft
    files_tmp = []
    for chan_id in test_files:
        if sc == 'SMAP' and chan_id in SMAP_IDs.values or \
                sc == 'MSL' and chan_id in MSL_IDs.values:
            files_tmp.append(chan_id)
    test_files = files_tmp
    train_files = files_tmp

    # Load training and testing data for this specific spacecraft
    train_data = []
    test_data = []

    for channel_id in train_files:
        train_data.append(np.load(os.path.join('..', 'data', 'train', '{}.npy'.format(channel_id))))
        test_data.append(np.load(os.path.join('..', 'data', 'test', '{}.npy'.format(channel_id))))

    # Constants (n_params populated at first hit)
    n_params = None
    n_tracks = len(train_data)

    # Values we're obtaining
    shapes = [] # Store number of shapes per track
    params = [] # Store number of parameters per track
    types = [] # Store parameter type per track
    num_nans = [[] for i in range(n_tracks)] # Store number of nans per parameter per track
    spacecraft = {} # Mapping from channel ID to spacecraft

    # Files we're saving to
    shapes_fn = f"shapes_{sc}.md"
    params_fn = f"params_{sc}.md"
    types_fn = f"types_{sc}.md"
    nans_fn = f"nans_{sc}.md"
    spacecraft_fn = f"spacecraft_{sc}.md"

    # Check if the folder already exists
    if not os.path.exists(parse_save_dir):
        # If it doesn't exist, create it
        os.mkdir(parse_save_dir)
        print(f"The folder '{parse_save_dir}' has been created.")
    else:
        print(f"The folder '{parse_save_dir}' already exists.")

    # Check if the subfolder already exists
    if not os.path.exists(parse_sub_dir):
        # If it doesn't exist, create it
        os.mkdir(parse_sub_dir)
        print(f"The folder '{parse_sub_dir}' has been created.")
    else:
        print(f"The folder '{parse_sub_dir}' already exists.")

    # Open files
    shapes_file = open(os.path.join(parse_save_dir, shapes_fn), 'w')
    params_file = open(os.path.join(parse_save_dir, params_fn), 'w')
    types_file = open(os.path.join(parse_save_dir, types_fn), 'w')
    nans_file = open(os.path.join(parse_save_dir, nans_fn), 'w')
    spacecraft_file = open(os.path.join(parse_save_dir, spacecraft_fn), 'w')

    # Write header per file
    shapes_file.write("track | unique shape\n")
    shapes_file.write("--- | ---\n")

    params_file.write("track | unique number of parameters\n")
    params_file.write("--- | ---\n")

    types_file.write("track | parameter | unique type\n")
    types_file.write("--- | --- | ---\n")

    nans_file.write("track | parameter | description\n")
    nans_file.write("--- | --- | ---\n")

    spacecraft_file.write("track | spacecraft\n")
    spacecraft_file.write("--- | ---\n")

    # Iterate over tracks to obtain data
    for i in tqdm(range(len(train_data)), desc="Iterating over tracks"):
    #for i in tqdm(range(1), desc="Iterating over tracks"):
        # Get channel ID
        chan_id = train_files[i]

        # Get shape
        shape = train_data[i].shape
        if shape not in shapes:
            shapes_file.write(f"{i:<4} | {shape}\n")
        shapes.append(shape)

        # Get number of parameters
        track_shape = train_data[i].shape
        n_track_params = track_shape[1]
        if n_params is None:
            n_params = train_data[i].shape[1]
            print(f"Parsing {n_tracks} tracks with {n_params} parameters")
        if n_track_params not in params:
            params_file.write(f"{chan_id:<4} | {n_track_params}\n")
        params.append(n_track_params)

        # Sanity check: each subsequent track should have the same number of parameters
        if n_params != n_track_params:
            raise Exception(f"ERROR: track {i} ({chan_id}) has {n_track_params} parameters but {n_params} was expected")

        # Iterate over each parameter
        for j in range(n_track_params):
            # Did the parameter (if float64) encounter a nan?
            nan_num = 0
            # Did the parameter (if float64) encounter a non-nan?
            num_num = 0

            # Get parameter type
            p_type = train_data[i].dtype
            if p_type not in types:
                types_file.write(f"{chan_id:<4} | {j:<4} | {p_type}\n")
            types.append(p_type)

            # Write to nans file since now we have all the data for this parameter
            if p_type == np.float64:
                if nan_num == 0:
                    desc = f"no nan"
                    nans_file.write(f"{chan_id:<4} | {j:<4} | {desc}\n")
                elif num_num == 0:
                    desc = f"all nan"
                    nans_file.write(f"{chan_id:<4} | {j:<4} | {desc}\n")
                else:
                    desc = f"{nan_num:>6}/{nan_num+num_num:<6} nan"
                    nans_file.write(f"{chan_id:<4} | {j:<4} | {desc}\n")
                num_nans[i].append(nan_num)
            else:
                raise Exception("Parameter type for track {i} ({chan_id}) is not np.float64, but is {p_type}.")


    # Close files
    shapes_file.close()
    params_file.close()
    types_file.close()
    nans_file.close()

    # Write counter variables as pickle files
    with open(os.path.join(parse_sub_dir, f'shapes_{sc}.pkl'), 'wb') as shapes_file:
        pickle.dump(shapes, shapes_file)
    with open(os.path.join(parse_sub_dir, f'params_{sc}.pkl'), 'wb') as params_file:
        pickle.dump(params, params_file)
    with open(os.path.join(parse_sub_dir, f'types_{sc}.pkl'), 'wb') as types_file:
        pickle.dump(types, types_file)
    with open(os.path.join(parse_sub_dir, f'num_nans_{sc}.pkl'), 'wb') as nans_file:
        pickle.dump(num_nans, nans_file)
    with open(os.path.join(parse_sub_dir, f'spacecraft_{sc}.pkl'), 'wb') as spacecraft_file:
        pickle.dump(spacecraft, spacecraft_file)

if parse_tracks:
    parse_tracks_func('SMAP')
    print()
    parse_tracks_func('MSL')

# Sanity checks (exception raised if not)
# 1) Each parameter has the same type for each parameter across tracks
# 2) Each track has the same number of parameters
# 1+2) Therefore, each track has the same types and number of parameters
# 3) Each parameter has the same time for each time step
# 4) Each track has a unique track_id

# Notes:
# - There are two types of values: float64 and bytes
# - Each track may have a different number of time series values


Parsing tracks for spacecraft SMAP.
The folder '/home/alexey/School/Research/submodules/telemanom/EDA' already exists.
The folder '/home/alexey/School/Research/submodules/telemanom/EDA/data' already exists.


Iterating over tracks: 100%|██████████| 54/54 [00:00<00:00, 22194.26it/s]

Parsing 54 tracks with 25 parameters

Parsing tracks for spacecraft MSL.





The folder '/home/alexey/School/Research/submodules/telemanom/EDA' already exists.
The folder '/home/alexey/School/Research/submodules/telemanom/EDA/data' already exists.


Iterating over tracks: 100%|██████████| 27/27 [00:00<00:00, 13268.45it/s]

Parsing 27 tracks with 55 parameters





## Print nan info

In [None]:
def print_nan_info(sc):
    print(parse_sub_dir)
    # Load pickles
    with open(os.path.join(parse_sub_dir, f'num_nans_{sc}.pkl'), 'rb') as nans_file:
        num_nans = np.asarray(pickle.load(nans_file)) # Stores nans per parameter per track

    with open(os.path.join(parse_sub_dir, f'shapes_{sc}.pkl'), 'rb') as shapes_file:
        shapes = np.asarray(pickle.load(shapes_file)) # Stores track shapes

    with open(os.path.join(parse_sub_dir, f'types_{sc}.pkl'), 'rb') as types_file:
        types = np.asarray(pickle.load(types_file)) # Stores track types

    nans_per_parameter = np.sum(num_nans,axis=0) # Get nans per parameter across all tracks
    total_values = np.sum(shapes[:,0]) # Get total number of values per parameter (total time series steps)

    no_nans = []
    yes_nans = []
    all_nans = []
    some_nans = []
    byte_params = []

    for i in range(nans_per_parameter.shape[0]):
        # Check if parameter is float64
        if types[i] != np.float64:
            byte_params.append(i)
            continue
        nans = nans_per_parameter[i]
        if nans != 0:
            yes_nans.append(i)
            if nans == total_values:
                all_nans.append(i)
            else:
                some_nans.append(i)
        else:
            no_nans.append(i)
    print(f"Parameters without any nans ({len(no_nans)}):")
    print(no_nans)
    print(f"Parameters with some nans ({len(some_nans)}):")
    print(some_nans)
    print(f"Parameters with all nans ({len(all_nans)}):")
    print(all_nans)
    print(f"Byte parameters ({len(byte_params)}):")
    print(byte_params)

    total_params = len(no_nans)+len(some_nans)+len(all_nans)+len(byte_params)
    if sc == 'MSL' and total_params != 55:
        raise Exception(f"Error: parameter groups don't add up to 55 for MSL, but add up to {total_params}")
    if sc == 'SMAP' and total_params != 25:
        raise Exception(f"Error: parameter groups don't add up to 25 for SMAP, but add up to {total_params}")

print("MSL:")
print_nan_info('MSL')
print()
print("SMAP:")
print_nan_info('SMAP')


MSL:
/home/alexey/School/Research/submodules/telemanom/EDA/data
Parameters without any nans (55):
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
Parameters with some nans (0):
[]
Parameters with all nans (0):
[]
Byte parameters (0):
[]

SMAP:
/home/alexey/School/Research/submodules/telemanom/EDA/data
Parameters without any nans (25):
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Parameters with some nans (0):
[]
Parameters with all nans (0):
[]
Byte parameters (0):
[]


In [None]:

print_track_data = True

## Print track data
def print_track_data_fn(sc):
    with open(os.path.join(parse_sub_dir, f'shapes_{sc}.pkl'), 'rb') as shapes_file:
        shapes = np.asarray(pickle.load(shapes_file)) # Stores track shapes
    
    print(pd.DataFrame(shapes[:,0]).describe())
    print(f"Total data points: {np.sum(shapes[:,0])}")

if print_track_data:
    print("MSL")
    print_track_data_fn('MSL')
    print()
    print("SMAP")
    print_track_data_fn('SMAP')

MSL
                 0
count    27.000000
mean   2159.888889
std    1009.267112
min     439.000000
25%    1508.000000
50%    2158.000000
75%    2554.500000
max    4308.000000
Total data points: 58317

SMAP
                 0
count    54.000000
mean   2555.629630
std     656.221784
min     312.000000
25%    2596.000000
50%    2851.000000
75%    2880.000000
max    2881.000000
Total data points: 138004
