In [3]:
import sys
import os
import time
import multiprocessing
import pprint 

from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt

sys.path.insert(0, '..')
from Packages.NoveltyDetection.setup.noveltyDetectionConfig import CONFIG
from NNNoveltyDetectionAnalysis import NNNoveltyDetectionAnalysis
from Functions.telegrambot import Bot

num_processes = multiprocessing.cpu_count()

my_bot = Bot("lisa_thebot")

# Enviroment variables
data_path = CONFIG['OUTPUTDATAPATH']
results_path = CONFIG['PACKAGE_NAME']

training_params = {
    "Technique": "NeuralNetwork",
    "DevelopmentMode": False,
    "DevelopmentEvents": 400,
    "NoveltyDetection": True,
    "InputDataConfig": {
        "database": "4classes",
        "n_pts_fft": 1024,
        "decimation_rate": 3,
        "spectrum_bins_left": 400,
        "n_windows": 1,
        "balance_data": True
    },
    "OptmizerAlgorithm": {
        "name": "Adam",
        "parameters": {
            "learning_rate": 0.001,
            "beta_1": 0.90,
            "beta_2": 0.999,
            "epsilon": 1e-08,
            "learning_decay": 1e-6,
            "momentum": 0.3,
            "nesterov": True
        }
    },
    "HyperParameters": {
        "n_folds": 4,
        "n_epochs": 300,
        "n_inits": 2,
        "batch_size": 256,
        "kernel_initializer": "uniform",
        "hidden_activation_function": "tanh", #"relu",
        "classifier_output_activation_function": "softmax",
        "norm": "mapstd",
        "metrics": ["accuracy"],
        "loss": "mean_squared_error",
        "dropout": False,
        "dropout_parameter": 0.0,
        "regularization": None,
        "regularization_parameter": 0.0
    },
    "callbacks": {
        "EarlyStopping": {
            "patience": 30,
            "monitor": "val_loss"
        }
    }
}
analysis = NNNoveltyDetectionAnalysis(parameters=training_params, model_hash="", load_hash=False, load_data=True, verbose=True)
all_data, all_trgt, all_trgt_sparse = analysis.getData()

trn_data = analysis.trn_data
trn_trgt = analysis.trn_trgt
trn_trgt_sparse = analysis.trn_trgt_sparse

models = analysis.models

Creating /home/vinicius.mello/Workspace/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/276498e1ea8f6cd0aee6dd08f8ddaae347d9913e0d27d5280df7829ac57bddeb
Creating /home/vinicius.mello/Workspace/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/276498e1ea8f6cd0aee6dd08f8ddaae347d9913e0d27d5280df7829ac57bddeb/AnalysisFiles
Creating /home/vinicius.mello/Workspace/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/276498e1ea8f6cd0aee6dd08f8ddaae347d9913e0d27d5280df7829ac57bddeb/Pictures
Saving /home/vinicius.mello/Workspace/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/276498e1ea8f6cd0aee6dd08f8ddaae347d9913e0d27d5280df7829ac57bddeb/parameters.json
[+] Time to read data file: 1.102820634841919 seconds
Qtd event of A is 12939
Qtd event of B is 29352
Qtd event of C is 11510
Qtd event of D is 23760

Biggest class is B with 29352 events
Total of events in the dataset is 77561
Balacing data...
DataHandler Class: CreateEventsForClass
Original Size:

In [3]:
pp = pprint.PrettyPrinter(indent=1)
print(analysis.model_hash)
print(analysis.getBaseResultsPath())
pp.pprint(analysis.parameters)

de98c7f6f7697195e32a85b733b4c5b263d4419c5fb84c3dd3bb259eb106d88c
/home/vinicius.mello/Workspace/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/de98c7f6f7697195e32a85b733b4c5b263d4419c5fb84c3dd3bb259eb106d88c
{'DevelopmentEvents': 400,
 'DevelopmentMode': True,
 'HyperParameters': {'batch_size': 256,
                     'classifier_output_activation_function': 'softmax',
                     'dropout': False,
                     'dropout_parameter': 0.0,
                     'hidden_activation_function': 'tanh',
                     'kernel_initializer': 'uniform',
                     'loss': 'mean_squared_error',
                     'metrics': ['accuracy'],
                     'n_epochs': 200,
                     'n_folds': 4,
                     'n_inits': 2,
                     'norm': 'mapstd',
                     'regularization': None,
                     'regularization_parameter': 0.0},
 'InputDataConfig': {'balance_data': True,
                     'data

### Models Training

In [None]:
import copy
num_values = 10
gridSearch = [copy.deepcopy(training_params) for i in range(num_values)]
i = 10
for params in gridSearch:
    params['HyperParameters']['batch_size'] = 2**i
    print ("batch_size: {}".format(params['HyperParameters']['batch_size']))
    i = i - 1
    for inovelty in range(len(analysis.class_labels)):
        startTime = time.time()
        analysis.setParameters(params)
        analysis.train(layer=1,
                       inovelty=inovelty,
                       trainingType="neuronSweep", #foldSweep, neuronSweep, normal
                       hidden_neurons=[50],
                       neurons_variation_step=5,
                       numThreads=8,
                       model_hash=analysis.model_hash, 
                       verbose=False)

        duration = str(timedelta(seconds=float(time.time() - startTime)))
        print "The training of the model for novelty class {0} took {1} to be performed\n".format(analysis.class_labels[inovelty], duration)


### Performance Analysis

In [None]:
best_sp = 0
best_param = None
best_topology = None

gridSearch_results = {}

for inovelty in range(len(analysis.class_labels)):
    print("Novelty Class: {}".format(analysis.class_labels[inovelty]))
    for ifold in range(analysis.n_folds):
        print("Fold: {}".format(ifold))
        for iparams, params in enumerate(gridSearch):
            analysis.setParameters(params)
            train_id, test_id = analysis.CVO[inovelty][ifold]

            # normalize known classes
            if analysis.parameters["HyperParameters"]["norm"] == "mapstd":
                scaler = preprocessing.StandardScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
            elif analysis.parameters["HyperParameters"]["norm"] == "mapstd_rob":
                scaler = preprocessing.RobustScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
            elif analysis.parameters["HyperParameters"]["norm"] == "mapminmax":
                scaler = preprocessing.MinMaxScaler().fit(all_data[all_trgt!=inovelty][train_id,:])

            known_data = scaler.transform(analysis.trn_data[inovelty][test_id,:])
            known_target = analysis.trn_trgt[inovelty][test_id]
            
            #Import model
            print("Loading model...")
            classifier = models[inovelty].get_model(data  = analysis.trn_data[inovelty],
                                                    trgt  = analysis.trn_trgt[inovelty], 
                                                    hidden_neurons = hidden_neurons[:layer],
                                                    layer = layer,
                                                    ifold = ifold
                                                   )

            print("Classifier loaded with success.")

            #Get model output
            output = classifier.predict(known_data)
            #Get SP
            sp_index_value = sp_index(known_target, np.argmax(output, axis=1))
            gridSearch_results['topology{}'.format(iparams)]= {
                'parameters': params, 
                'sp_index': sp_index_value
            }
            
            if sp_index_value > best_sp:
                best_sp = sp_index_value
                best_param = params
                best_topology = 'topology{}'.format(iparams)
                
print("Analysis has fished!")
my_bot.sendMessage("GridSearch Analysis has finished. Best topology was {}".format(best_topology))

In [None]:
%matplotlib inline 

from sklearn import metrics
from sklearn import preprocessing
from sklearn.externals import joblib
import matplotlib.pyplot as plt

# Choose layer 
layer = 1

# Choose neurons topology
hidden_neurons = [50]

step = 5
neurons_mat = [1] + range(step,hidden_neurons[layer-1]+step,step)
neurons_mat = neurons_mat[:len(neurons_mat)-layer+2]

analysis_name = 'sp_index_%i_layer'%(layer)
analysis_file = os.path.join(analysis.getBaseResultsPath(), "AnalysisFiles", analysis_name + ".jbl")    

verbose = True

# Plot parameters
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['legend.numpoints'] = 1
plt.rcParams['legend.handlelength'] = 3
plt.rcParams['legend.borderpad'] = 0.3
plt.rcParams['legend.fontsize'] = 14
m_colors = ['b', 'r', 'g', 'y']
figsize = (10,5)


results = {}
spIndex = np.zeros([len(analysis.class_labels), analysis.parameters["HyperParameters"]["n_folds"], len(neurons_mat)])

if not os.path.exists(analysis_file):
    for inovelty in range(len(analysis.class_labels)):
        folds = range(len(analysis.CVO[inovelty]))
        for ifold in folds:    
            class_eff_mat = np.zeros([analysis.parameters["HyperParameters"]["n_folds"],len(np.unique(all_trgt))])
            known_sp_mat = np.zeros([analysis.parameters["HyperParameters"]["n_folds"]])

            buff = np.zeros([len(np.unique(all_trgt))-1])
            class_eff = np.zeros([len(np.unique(all_trgt))], dtype=object)
            known_sp = np.zeros([len(np.unique(all_trgt))], dtype=object)

            def getSP(ineuron):
                train_id, test_id = analysis.CVO[inovelty][ifold]

                # normalize known classes
                if analysis.parameters["HyperParameters"]["norm"] == "mapstd":
                    scaler = preprocessing.StandardScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
                elif analysis.parameters["HyperParameters"]["norm"] == "mapstd_rob":
                    scaler = preprocessing.RobustScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
                elif analysis.parameters["HyperParameters"]["norm"] == "mapminmax":
                    scaler = preprocessing.MinMaxScaler().fit(all_data[all_trgt!=inovelty][train_id,:])

                known_data = scaler.transform(analysis.trn_data[inovelty][test_id,:])
                known_trgt = analysis.trn_trgt[inovelty][test_id]
                classifier = models[inovelty].get_model(data  = analysis.trn_data[inovelty],
                                                        trgt  = analysis.trn_trgt[inovelty], 
                                                        hidden_neurons = hidden_neurons[:layer-1]+[ineuron],
                                                        layer = layer,
                                                        ifold = ifold
                                                       )

                output = classifier.predict(known_data)

                num_known_classes = analysis.trn_trgt_sparse[inovelty].shape[1]
                thr_value = 0.2
                for iclass, class_id in enumerate(np.unique(all_trgt)):
                    if iclass == inovelty:
                        continue
                    output_of_class_events = output[known_trgt==iclass-(iclass>inovelty),:]
                    correct_class_output = np.argmax(output_of_class_events,axis=1)==iclass-(iclass>inovelty)
                    output_above_thr = output_of_class_events[correct_class_output,iclass-(iclass>inovelty)]>thr_value
                    class_eff = float(sum(output_above_thr))/float(len(output_of_class_events))
                    buff[iclass-(iclass>inovelty)] = class_eff

                sp_index = (np.sqrt(np.mean(buff,axis=0)*np.power(np.prod(buff),1./float(len(buff)))))
                
                return ineuron, sp_index

            # Start Parallel processing
            p = multiprocessing.Pool(processes=num_processes)

            if verbose:
                print '[*] Calculating SP Index ...'
            results = p.map(getSP, neurons_mat)

            for ineuron_index in range(len(neurons_mat)):
                spIndex[inovelty, ifold, neurons_mat.index(results[ineuron_index][0])] = results[ineuron_index][1]

            p.close()
            p.join()

    joblib.dump([neurons_mat,spIndex],analysis_file,compress=9)
else:
    [neurons_mat, spIndex] = joblib.load(analysis_file)

    
for inovelty in range(len(analysis.class_labels)):
    # Plot results    
    fig = plt.subplots(figsize=figsize)
    ax = plt.subplot(1,1,1)
    
    mean_sp = np.mean(spIndex[inovelty,:], axis=0)
    error_sp = np.std(spIndex[inovelty,:,:], axis=0)
    
    ax.plot(neurons_mat, mean_sp, color='b', alpha=0.7, linewidth=2.5, label='SP Index Test Data')
    
    ax.fill_between(neurons_mat, mean_sp+error_sp, mean_sp-error_sp, facecolor='blue', alpha=0.3)
    
    ax.set_title('SP Index x Neurons (Class {} as novelty)'.format(analysis.class_labels[inovelty]),
                                  fontsize=14, fontweight='bold')
    ax.set_ylabel('SP Index', fontsize=22)
    ax.set_xlabel('Neurons', fontsize=22)
    ax.grid()
    ax.legend()
    plt.tight_layout()
    #Save the figure
    neurons_str = models[inovelty].get_neurons_str(trn_data[inovelty],hidden_neurons=hidden_neurons)
    file_name = os.path.join(analysis.pictures_output_folder, analysis_name+"_{}_novelty_{}_neurons.png".format(inovelty,neurons_str))
    plt.savefig(file_name, format="png")
    try: 
        my_bot.sendMessage(imgPath=file_name)
    except Exception as e:
        print("Error when sending the image to the bot. Error: {}".format(str(e)))