In [1]:
import sys
import os
import time
import multiprocessing
import pprint 

from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt

sys.path.insert(0, '..')
from Packages.NoveltyDetection.setup.noveltyDetectionConfig import CONFIG
from NNNoveltyDetectionAnalysis import NNNoveltyDetectionAnalysis
from Functions.telegrambot import Bot

num_processes = multiprocessing.cpu_count()

my_bot = Bot("lisa_thebot")

# Enviroment variables
data_path = CONFIG['OUTPUTDATAPATH']
results_path = CONFIG['PACKAGE_NAME']

training_params = {
    "Technique": "NeuralNetwork",
    "DevelopmentMode": False,
    "DevelopmentEvents": 1600,
    "NoveltyDetection": True,
    "InputDataConfig": {
        "database": "4classes",
        "n_pts_fft": 1024,
        "decimation_rate": 3,
        "spectrum_bins_left": 400,
        "n_windows": 1,
        "balance_data": True
    },
    "OptmizerAlgorithm": {
        "name": "Adam",
        "parameters": {
            "learning_rate": 0.001,
            "beta_1": 0.90,
            "beta_2": 0.999,
            "epsilon": 1e-08,
            "learning_decay": 1e-6,
            "momentum": 0.3,
            "nesterov": True
        }
    },
    "HyperParameters": {
        "n_folds": 4,
        "n_epochs": 300,
        "n_inits": 2,
        "batch_size": 128,
        "kernel_initializer": "uniform",
        "hidden_activation_function": "tanh", #"relu",
        "classifier_output_activation_function": "softmax",
        "norm": "mapstd",
        "metrics": ["accuracy"],
        "loss": "mean_squared_error",
        "dropout": False,
        "dropout_parameter": 0.0,
        "regularization": None,
        "regularization_parameter": 0.0
    },
    "callbacks": {
        "EarlyStopping": {
            "patience": 50,
            "monitor": "val_loss"
        }
    }
}
analysis = NNNoveltyDetectionAnalysis(parameters=training_params, model_hash="", load_hash=False, load_data=True, verbose=True)
all_data, all_trgt, all_trgt_sparse = analysis.getData()

trn_data = analysis.trn_data
trn_trgt = analysis.trn_trgt
trn_trgt_sparse = analysis.trn_trgt_sparse

models = analysis.models

Using TensorFlow backend.


[+] Time to read data file: 1.0206046104431152 seconds
Qtd event of A is 12939
Qtd event of B is 29352
Qtd event of C is 11510
Qtd event of D is 23760

Biggest class is B with 29352 events
Total of events in the dataset is 77561
Balacing data...
DataHandler Class: CreateEventsForClass
Original Size: (12939, 400)
DataHandler Class: CreateEventsForClass
Original Size: (29352, 400)
DataHandler Class: CreateEventsForClass
Original Size: (11510, 400)
DataHandler Class: CreateEventsForClass
Original Size: (23760, 400)
Reading from /home/vinicius.mello/Workspace/LPS/SonarAnalysis/Results/NoveltyDetection/4_folds_cross_validation_balanced_data.jbl
Reading from /home/vinicius.mello/Workspace/LPS/SonarAnalysis/Results/NoveltyDetection/4_folds_cross_validation_balanced_data.jbl


In [2]:
pp = pprint.PrettyPrinter(indent=1)
print(analysis.model_hash)
print(analysis.getBaseResultsPath())
pp.pprint(analysis.parameters)

d7175ec280d30121856e19c075f00e5c48b862c9397484062d6c2612256f8198
/home/vinicius.mello/Workspace/LPS/SonarAnalysis/Results/NoveltyDetection/NeuralNetwork/outputs/d7175ec280d30121856e19c075f00e5c48b862c9397484062d6c2612256f8198
{'DevelopmentEvents': 1600,
 'DevelopmentMode': False,
 'HyperParameters': {'batch_size': 128,
                     'classifier_output_activation_function': 'softmax',
                     'dropout': False,
                     'dropout_parameter': 0.0,
                     'hidden_activation_function': 'tanh',
                     'kernel_initializer': 'uniform',
                     'loss': 'mean_squared_error',
                     'metrics': ['accuracy'],
                     'n_epochs': 300,
                     'n_folds': 4,
                     'n_inits': 2,
                     'norm': 'mapstd',
                     'regularization': None,
                     'regularization_parameter': 0.0},
 'InputDataConfig': {'balance_data': True,
                    

#### Perform the training of the model

In [14]:
for inovelty in range(len(analysis.class_labels)):
    startTime = time.time()
    
    analysis.train(layer=1,
                   inovelty=inovelty,
                   trainingType="neuronSweep", #foldSweep, neuronSweep, normal
                   hidden_neurons=[100],
                   neurons_variation_step=10,
                   numThreads=4,
                   model_hash=analysis.model_hash)
    
    duration = str(timedelta(seconds=float(time.time() - startTime)))
    print("The training of the model for novelty class {0} took {1} to be performed\n".format(analysis.class_labels[inovelty], duration))

python neuralnetwork_train.py --layer 1 --novelty 0 --threads 4 --type neuronSweep --hiddenNeurons 100 --neuronsVariationStep 10 --modelhash d7175ec280d30121856e19c075f00e5c48b862c9397484062d6c2612256f8198
The training of the model for novelty class A took 0:00:19.331133 to be performed

python neuralnetwork_train.py --layer 1 --novelty 1 --threads 4 --type neuronSweep --hiddenNeurons 100 --neuronsVariationStep 10 --modelhash d7175ec280d30121856e19c075f00e5c48b862c9397484062d6c2612256f8198
The training of the model for novelty class B took 0:00:19.625044 to be performed

python neuralnetwork_train.py --layer 1 --novelty 2 --threads 4 --type neuronSweep --hiddenNeurons 100 --neuronsVariationStep 10 --modelhash d7175ec280d30121856e19c075f00e5c48b862c9397484062d6c2612256f8198
The training of the model for novelty class C took 0:00:16.716187 to be performed

python neuralnetwork_train.py --layer 1 --novelty 3 --threads 4 --type neuronSweep --hiddenNeurons 100 --neuronsVariationStep 10 --mo

### Figures-of-Merit Analysis for a threshold variation at output layer

In [17]:
# Thresolds variation x Figures of Merit
%matplotlib inline 

from sklearn import metrics
from sklearn import preprocessing
from sklearn.externals import joblib
from Functions.StatisticalAnalysis import KLDiv, EstPDF
from Functions import FunctionsDataVisualization
import matplotlib.pyplot as plt

# Choose layer 
layer = 3

# Choose neurons topology
neurons_mat = [1] + list(range(10,110,10))
neurons_mat = [1]

for ineuron in neurons_mat:
    hidden_neurons = [400,300,200]
    neurons_str = models[0].get_neurons_str(trn_data[0],hidden_neurons=hidden_neurons)
    analysis_name = 'figures_of_merit_{}_layer_{}_neurons'.format(layer,neurons_str)
    analysis_file = os.path.join(analysis.getBaseResultsPath(), "AnalysisFiles", analysis_name + ".jbl")    

    verbose = True

    # Plot parameters
    plt.rcParams['font.weight'] = 'bold'
    plt.rcParams['xtick.labelsize'] = 14
    plt.rcParams['ytick.labelsize'] = 14
    plt.rcParams['legend.numpoints'] = 1
    plt.rcParams['legend.handlelength'] = 3
    plt.rcParams['legend.borderpad'] = 0.3
    plt.rcParams['legend.fontsize'] = 14
    m_colors = ['b', 'r', 'g', 'y']
    figsize = (20,15)


    if not os.path.exists(analysis_file):
        thr_mat = np.round(np.arange(0.0,1.05,0.05),3)
        thr_mat[thr_mat>-0.1] = abs(thr_mat[thr_mat>-0.1])
        n_folds = analysis.parameters["HyperParameters"]["n_folds"]
        class_eff_mat = np.zeros([n_folds,len(np.unique(all_trgt)),len(np.unique(all_trgt)),len(thr_mat)])
        novelty_eff_mat = np.zeros([n_folds,len(np.unique(all_trgt)),len(thr_mat)])
        known_acc_mat = np.zeros([n_folds,len(np.unique(all_trgt)),len(thr_mat)])
        known_sp_mat = np.zeros([n_folds,len(np.unique(all_trgt)),len(thr_mat)])
        known_trig_mat = np.zeros([n_folds,len(np.unique(all_trgt)),len(thr_mat)])

        for inovelty, novelty_class in enumerate(np.unique(analysis.all_trgt)):
            for ifold in range(len(analysis.CVO[inovelty])):
                train_id, test_id = analysis.CVO[inovelty][ifold]

                print('Novelty class: %01.0f - Topology: %s - fold %i'%(novelty_class,
                                                                        models[inovelty].get_neurons_str(data=trn_data[inovelty], hidden_neurons=hidden_neurons)+'x'+str(trn_trgt_sparse[inovelty].shape[1]),
                                                                        ifold))
                classifier = models[inovelty].get_model(data=analysis.trn_data[inovelty], trgt=analysis.trn_trgt[inovelty],
                                                        hidden_neurons=hidden_neurons, layer=layer, ifold=ifold)

                # normalize known classes
                if analysis.parameters["HyperParameters"]["norm"] == "mapstd":
                    scaler = preprocessing.StandardScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
                elif analysis.parameters["HyperParameters"]["norm"] == "mapstd_rob":
                    scaler = preprocessing.RobustScaler().fit(all_data[all_trgt!=inovelty][train_id,:])
                elif analysis.parameters["HyperParameters"]["norm"] == "mapminmax":
                    scaler = preprocessing.MinMaxScaler().fit(all_data[all_trgt!=inovelty][train_id,:])

                known_data = scaler.transform(analysis.trn_data[inovelty][test_id,:])
                known_target = analysis.trn_trgt[inovelty][test_id]

                novelty_data = scaler.transform(all_data[all_trgt==inovelty])

                known_output = classifier.predict(known_data)
                novelty_output = classifier.predict(novelty_data)

                for ithr,thr_value in enumerate(thr_mat): 
                    buff = np.zeros([len(np.unique(analysis.all_trgt))-1])
                    for iclass, class_id in enumerate(np.unique(analysis.all_trgt)):
                        if iclass == inovelty:
                            continue
                        output_of_class_events = known_output[known_target==iclass-(iclass>inovelty),:]
                        correct_class_output = np.argmax(output_of_class_events,axis=1)==iclass-(iclass>inovelty)
                        output_above_thr = output_of_class_events[correct_class_output,iclass-(iclass>inovelty)]>thr_value
                        class_eff_mat[ifold, inovelty, iclass, ithr] = float(sum(output_above_thr))/float(len(output_of_class_events))
                        buff[iclass-(iclass>inovelty)] = class_eff_mat[ifold, inovelty, iclass, ithr]

                    novelty_eff_mat[ifold, inovelty, ithr] = float(sum(1-(novelty_output>thr_value).any(axis=1)))/float(len(novelty_output))
                    known_acc_mat[ifold, inovelty, ithr] = np.mean(buff,axis=0)
                    known_sp_mat[ifold, inovelty, ithr]= (np.sqrt(np.mean(buff,axis=0)*np.power(np.prod(buff),1./float(len(buff)))))
                    known_trig_mat[ifold, inovelty, ithr]=float(sum(np.max(known_output,axis=1)>thr_value))/float(len(known_output))

    #             class_eff_mat, novelty_eff_mat, known_acc_mat, known_sp_mat, known_trig_mat = analysis.get_figures_of_merit(known_output=output,
    #                                                                                                                         known_target=known_trgt, 
    #                                                                                                                         novelty_output=novelty_output,
    #                                                                                                                         thr_mat=thr_mat, 
    #                                                                                                                         inovelty=inovelty,
    #                                                                                                                         ifold=ifold)
        joblib.dump([class_eff_mat, novelty_eff_mat, known_acc_mat, known_sp_mat, known_trig_mat, thr_mat],
                    analysis_file,compress=9)
    else:
        print('file exists')
        [class_eff_mat, novelty_eff_mat, known_acc_mat, known_sp_mat, known_trig_mat, thr_mat] = joblib.load(analysis_file) 

    # plot analysis
    import matplotlib.pyplot as plt
    %matplotlib inline  

    fig = plt.subplots(figsize=figsize)

    for inovelty, novelty_class in enumerate(np.unique(all_trgt)):
        ax = plt.subplot(2,2,inovelty+1)
        for iclass, m_class in enumerate(np.unique(all_trgt)):
            if novelty_class == m_class:
                #a = 0
                ax.errorbar(thr_mat,np.mean(novelty_eff_mat[:,int(novelty_class),:],axis=0),
                            np.std(novelty_eff_mat[:,int(novelty_class),:],axis=0),fmt='o-',
                            color='k',alpha=0.7,linewidth=3,
                            label='Det. Novidade')
                ax.errorbar(thr_mat,np.mean(known_acc_mat[:,int(novelty_class),:],axis=0),
                            np.std(known_acc_mat[:,int(novelty_class),:],axis=0),fmt='o--',
                            color='m',alpha=0.7,linewidth=3,
                            label='Acurácia')
                ax.errorbar(thr_mat,np.mean(known_sp_mat[:,int(novelty_class),:],axis=0),
                            np.std(known_sp_mat[:,int(novelty_class),:],axis=0),fmt='o:',
                            color='c',alpha=0.7,linewidth=3,
                            label='Índice SP')
                ax.errorbar(thr_mat,np.mean(known_trig_mat[:,int(novelty_class),:],axis=0),
                            np.std(known_trig_mat[:,int(novelty_class),:],axis=0),fmt='o-.',
                            color='k',alpha=0.7,linewidth=3,
                            label='Trigger')
            else:
                ax.errorbar(thr_mat,np.mean(class_eff_mat[:,int(novelty_class),int(m_class),:],axis=0),
                            np.std(class_eff_mat[:,int(novelty_class),int(m_class),:],axis=0),fmt='o-',
                            color=m_colors[int(m_class)],alpha=0.7,linewidth=3,
                           label='Eficiência Classe {}'.format(analysis.getClassLabels()[iclass]))
        ax.set_xticks(thr_mat)
        ax.set_xticklabels(thr_mat,rotation=45, fontsize=18)
        ax.set_title('MLP - Topologia {} - Classe {} como Novidade'.format(neurons_str, analysis.getClassLabels()[inovelty]),fontsize=18,weight='bold')
        ax.set_xlim([np.min(thr_mat), np.max(thr_mat)])

        ax.set_ylim([0.0, 1.3])
        y_ticks = np.arange(0.0,1.3,0.1)
        ax.set_yticks(y_ticks)
        y_tick_labels = 100*y_ticks[y_ticks<=1.0]
        y_tick_labels = y_tick_labels.astype(int)
        ax.set_yticklabels(y_tick_labels,fontsize=18)

        ax.grid()

        if inovelty > 1:
            ax.set_xlabel('Limiar',fontsize=18,weight='bold')
        if inovelty == 0 or inovelty == 2:
            ax.set_ylabel('Figuras de Mérito (%)',fontsize=18,weight='bold')

        handles, labels = ax.get_legend_handles_labels()
        # sort both labels and handles by labels
        labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
        ax.legend(handles, labels, ncol=3, loc='upper center')

        plt.tight_layout()
        rect = [0.1, 0.2, 0.3, 0.4]
        ax1 = FunctionsDataVisualization.add_subplot_axes(ax,rect)
        a = thr_mat>=0.8
        b = thr_mat<=1.0
        selected_thr = a & b
        
        ax1.errorbar(thr_mat[selected_thr],np.mean(novelty_eff_mat[:,int(novelty_class),:],axis=0)[selected_thr],
                    np.std(novelty_eff_mat[:,int(novelty_class),:],axis=0)[selected_thr],fmt='o-',
                    color='k',alpha=0.7,linewidth=3,
                    label='Det. Novidade')
        ax1.errorbar(thr_mat[selected_thr],np.mean(known_sp_mat[:,int(novelty_class),:],axis=0)[selected_thr],
                    np.std(known_sp_mat[:,int(novelty_class),:],axis=0)[selected_thr],fmt='o:',
                    color='c',alpha=0.7,linewidth=3,
                    label='Índice SP')
        ax1.errorbar(thr_mat[selected_thr],np.mean(known_trig_mat[:,int(novelty_class),:],axis=0)[selected_thr],
                    np.std(known_trig_mat[:,int(novelty_class),:],axis=0)[selected_thr],fmt='o-.',
                    color='k',alpha=0.7,linewidth=3,
                    label='Trigger')
        
        ax1.set_xticks(thr_mat[selected_thr])
        ax1.set_xticklabels(thr_mat[selected_thr],rotation=45, fontsize=22)

        ax1.set_ylim([0.0, 0.9])
        y_ticks = np.arange(0.0,1.1,0.1)
        y_ticks = np.round(y_ticks,2)
        ax1.set_yticks(y_ticks)
        ax1.set_yticklabels(100.*ax1.get_yticks(), fontsize=22)

        ax1.grid()

     #Save the figure
    file_name = os.path.join(analysis.pictures_output_folder, analysis_name+"_{}_novelty_{}_neurons.png".format(inovelty,neurons_str))
    plt.savefig(file_name, format="png")
    try: 
        my_bot.sendMessage(imgPath=file_name)
    except Exception as e:
        print("Error when sending the image to the bot. Error: {}".format(str(e)))

Novelty class: 0 - Topology: 400x400x300x200x3 - fold 0
Neural Network - Layer: 3 - Topology: 400x400x300x200 - Fold 1 of 4 Folds -  Init 1 of 2 Inits
Train on 66042 samples, validate on 22014 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
10368/66042 [===>..........................] - ETA: 2s - loss: 0.0110 - acc: 0.9805

KeyboardInterrupt: 