In [1]:
import os
import sys
import pprint

base_dir = 'C:\Documents\Thesis_ssd\MasterThesis'
os.chdir(base_dir)

from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.BaselineHelperFunctions import BaselineHelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.DataGenerator import DataGenerator
from Classes.Modeling.Models import Models
from Classes.Modeling.RandomGridSearch import RandomGridSearch
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
from Classes.Modeling.GridSearchResultProcessor import GridSearchResultProcessor
import json

helper = BaselineHelperFunctions()
%load_ext tensorboard

In [2]:
num_classes = 2
loadData = LoadData(num_classes, isBalanced = True)
shuffle = False
full_ds, train_ds, val_ds, test_ds = loadData.getDatasets(shuffle = shuffle)
handler = DataHandler()
pp = pprint.PrettyPrinter(indent=4)
helper = BaselineHelperFunctions()

In [3]:
hyper_grid = {
        "batch_size" : [8, 16, 32, 64, 128, 256, 512],
        "epochs" : [1],
        "learning_rate" : [0.1, 0.01, 0.001, 0.0001, 0.00001],
        "optimizer" : ["adam", "rmsprop", "sgd"]
    }
model_grid = {
    "activation" : ["relu", "sigmoid", "softmax", "tanh"],
    "dropout_rate" : [0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0],
    "filters" : [11, 13, 15, 17, 19, 21, 23, 25],
    "kernel_size" : [3, 5, 7, 9, 11, 13],
    "l1_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "l2_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "output_layer_activation" : ["softmax", "sigmoid"],
    "padding" : ["same"],
    "start_neurons" : [8,16, 32, 64, 128, 256, 512, 1024]
}
"""
{'activation': 'softmax', 
 'dropout_rate': 0.1, 
 'filters': 25, 
 'kernel_size': 9, 
 'l1_r': 0.01, 
 'l2_r': 0.0001, 
 'output_layer_activation': 'softmax', 
 'padding': 'same', 
 'start_neurons': 32}
"""


model_nr = 8

test_mode = True
use_scaler = False
use_noise_augmentor = False
detrend = False
use_minmax = False
use_highpass = False
highpass_freq = 0.1

n_picks = 2

use_tensorboard = False
use_liveplots = False
use_custom_callback = False
use_early_stopping = True
start_from_scratch = False

In [4]:
randomGridSearch = RandomGridSearch(train_ds, val_ds, test_ds, model_nr, test_mode, detrend,
                                    use_scaler, use_noise_augmentor, use_minmax, use_highpass, n_picks, 
                                    hyper_grid = hyper_grid, model_grid = model_grid, 
                                    num_classes = num_classes, use_tensorboard = use_tensorboard,
                                    use_liveplots = use_liveplots, use_custom_callback = use_custom_callback,
                                    use_early_stopping = use_early_stopping, highpass_freq = highpass_freq,
                                    start_from_scratch = start_from_scratch)

In [5]:
results, highest_test_accuracy_index, highest_train_accuracy_index, highest_test_precision_index, highest_test_recall_index= randomGridSearch.fit()

Parameters stored before fit
  batch_size epochs learning_rate optimizer activation dropout_rate filters  \
0         16      1         1e-05   rmsprop       relu          0.5      21   
1         64      1           0.1   rmsprop       tanh            0      17   
2          8      1         0.001      adam       relu            0      19   
3         16      1           0.1   rmsprop       tanh          0.3      17   
4        128      1          0.01   rmsprop    sigmoid          0.1      11   

  kernel_size   l1_r    l2_r  ... padding start_neurons    train_loss  \
0          11    0.3     0.2  ...    same          1024  6.391216e+06   
1           3    0.3  0.0001  ...    same           512  2.968888e+05   
2          13  0.001     0.3  ...    same           512  3.610073e+07   
3           3    0.1   0.001  ...    same           256  3.232169e+04   
4          13    0.1    0.01  ...    same            32           NaN   

   train_accuracy  train_precision  train_recall      val

{'train_loss': 613.35595703125, 'train_accuracy': 0.4912109375, 'train_precision': 0.4819159209728241, 'train_recall': 0.4814453125} {'val_loss': 613.3571166992188, 'val_accuracy': 0.48046875, 'val_precision': 0.47826087474823, 'val_recall': 0.47265625}
----------------------------------------------------LOSS----------------------------------------------------------
Min val loss: 447.4765625, at index: 4
Min training loss: 447.5163269042969, at index: 4
----------------------------------------------------ACCURACY------------------------------------------------------
Highest val accuracy: 0.5401041507720947, at index: 4
Highest training accuracy: 0.569852948189, at index: 0
----------------------------------------------------PRECISION-----------------------------------------------------
Highest val precision: 0.5401041507720947, at index: 4
Highest training precision: 0.554744541645, at index: 2
-----------------------------------------------------RECALL---------------------------------

In [6]:
results_df = randomGridSearch.results_df.copy()

In [7]:
results_df[results_df.columns[0:13]]

Unnamed: 0,batch_size,epochs,learning_rate,optimizer,activation,dropout_rate,filters,kernel_size,l1_r,l2_r,output_layer_activation,padding,start_neurons
0,16,1,1e-05,rmsprop,relu,0.5,21,11,0.3,0.2,sigmoid,same,1024
1,64,1,0.1,rmsprop,tanh,0.0,17,3,0.3,0.0001,sigmoid,same,512
2,8,1,0.001,adam,relu,0.0,19,13,0.001,0.3,softmax,same,512
3,16,1,0.1,rmsprop,tanh,0.3,17,3,0.1,0.001,softmax,same,256
4,128,1,0.01,rmsprop,sigmoid,0.1,11,13,0.1,0.01,softmax,same,32
5,256,1,1e-05,adam,tanh,0.4,23,7,0.1,0.0001,sigmoid,same,16


In [8]:
results_df[results_df.columns[13:]]

Unnamed: 0,train_loss,train_accuracy,train_precision,train_recall,val_loss,val_accuracy,val_precision,val_recall
0,6391216.0,0.569853,0.493274,0.101103,26173630.0,0.515625,0.4,0.09375
1,296888.8,0.499081,0.49863,0.501838,296888.9,0.463542,0.466321,0.46875
2,36100730.0,0.554745,0.554745,0.554745,32717930.0,0.526042,0.526042,0.526042
3,32321.69,0.499081,0.499081,0.499081,32322.24,0.463542,0.463542,0.463542
4,447.5163,0.507812,0.507812,0.507812,447.4766,0.540104,0.540104,0.540104
5,613.356,0.491211,0.481916,0.481445,613.3571,0.480469,0.478261,0.472656


In [14]:
''# Remove dependence on num_classes and model nr
def fit_from_index(self, results_df, index):
    values = list(results_df.iloc[index][1:14])
    keys = list(results_df.columns[1:14])
    params = {keys[i]: values[i] for i in range(len(keys))}
    
    build_model_args = self.helper.generate_build_model_args(self.model_nr, params['batch_size'], 
                                                             params['dropout_rate'], params['activation'], 
                                                             params['output_layer_activation'],params['l2_r'], 
                                                             params['l1_r'], params['start_neurons'],
                                                             params['filters'], params['kernel_size'],
                                                             params['padding'], self.num_classes)
    # Build model using args generated above
    model = Models(**build_model_args).model
    
    # Generate generator args using picks.
    gen_args = self.helper.generate_gen_args(params['batch_size'], self.test, self.detrend, 
                                             use_scaler = self.use_scaler, scaler = self.scaler, 
                                             use_noise_augmentor = self.use_noise_augmentor, 
                                             augmentor = self.augmentor, num_classes = self.num_classes)
    
    # Initiate generators using the args
    train_gen = self.data_gen.data_generator(self.train_ds, **gen_args)
    val_gen = self.data_gen.data_generator(self.val_ds, **gen_args)
    test_gen = self.data_gen.data_generator(self.test_ds, **gen_args)
    
    # Generate compiler args using picks
    opt = self.getOptimizer(params['optimizer'], params['learning_rate'])
    model_compile_args = self.helper.generate_model_compile_args(params['opt'], self.num_classes)
    # Compile model using generated args
    model.compile(**model_compile_args)
    
    # Generate fit args using picks.
    fit_args = self.helper.generate_fit_args(train_ds, self.val_ds, params['batch_size'], self.test, 
                                             params['epoch'], test_gen, use_tensorboard = self.use_tensorboard, 
                                             use_liveplots = self.use_liveplots, 
                                             use_custom_callback = self.use_custom_callback,
                                             use_early_stopping = self.use_early_stopping)
    # Fit the model using the generated args
    model_fit = model.fit(train_gen, **fit_args)
    
    helper.plot_confusion_matrix(model_fit, test_gen, self.test_ds, params['batch_size'], self.num_classes)
    
    # Evaluate the fitted model on the test set
    loss, accuracy, precision, recall = model.evaluate_generator(generator=test_gen,
                                                               steps=self.helper.get_steps_per_epoch(self.test_ds, 
                                                                                                     params['batch_size'], 
                                                                                                     False))
    
    pp = pprint.PrettyPrinter(indent=4)
    print(f'Test loss: {loss}')
    print(f'Test accuracy: {accuracy}')
    print(f'Test precision: {precision}')
    print(f'Test recall: {recall}')
    return model_fit
    

In [12]:
fit_from_index(results_df, 0)

{'epochs': 1, 'learning_rate': 1e-05, 'optimizer': 'rmsprop', 'activation': 'relu', 'dropout_rate': 0.5, 'filters': 21, 'kernel_size': 11, 'l1_r': 0.3, 'l2_r': 0.2, 'output_layer_activation': 'sigmoid', 'padding': 'same', 'start_neurons': 1024, 'train_loss': 6391216.0}


In [None]:
build_model_args = self.helper.generate_build_model_args(self.model_nr, batch_size, dropout_rate, 
                                                                     activation, output_layer_activation,
                                                                     l2_r, l1_r, start_neurons, filters, kernel_size, 
                                                                     padding, self.num_classes)

In [None]:
randomGridSearch.find_best_performers(loaded_result)

In [None]:
def find_best_performers(results_df):
    train_loss_index = results_df.columns.get_loc('train_loss')
    metrics = results_df[results_df.columns[train_loss_index:]]
    min_loss = {'train_loss' : min(metrics['train_loss']), 'val_loss' : min(metrics['val_loss']), 
                'train_index' : metrics_df[metrics_df['train_loss'] == min(metrics_df['train_loss'])].index[0], 
                'val_index' : metrics_df[metrics_df['val_loss'] == min(metrics_df['val_loss'])].index[0]}
    
    max_accuracy = {'train_accuracy' : max(metrics['train_accuracy']), 'val_accuracy' : max(metrics['val_accuracy']), 
                    'train_index' : metrics_df[metrics_df['train_accuracy'] == max(metrics_df['train_accuracy'])].index[0], 
                    'val_index' : metrics_df[metrics_df['val_accuracy'] == max(metrics_df['val_accuracy'])].index[0]}
    
    max_precision = {'train_precision' : max(metrics['train_precision']), 'val_precision' : max(metrics['val_precision']), 
                     'train_index' : metrics_df[metrics_df['train_precision'] == max(metrics_df['train_precision'])].index[0], 
                     'val_index' : metrics_df[metrics_df['val_precision'] == max(metrics_df['val_precision'])].index[0]}
    
    max_recall = {'train_recall' : max(metrics['train_recall']), 'val_recall' : max(metrics['train_recall']), 
                  'train_index' : metrics_df[metrics_df['train_recall'] == max(metrics_df['train_recall'])].index[0], 
                  'val_index' : metrics_df[metrics_df['val_recall'] == max(metrics_df['val_recall'])].index[0]}
    
    return min_loss, max_accuracy, max_precision, max_recall
    

In [None]:
find_best_performers(results_df)

In [None]:
results_df

In [None]:
print(keys)
print(keys2)

In [None]:
keys[0]

In [None]:
import numpy as np
import pandas as pd
hyper_keys = list(hyper_grid.keys())
model_keys = list(model_grid.keys())
metrics_train_keys = ["train_loss", "train_accuracy", "train_presicion", "train_recall"]
metrics_val_keys = ["val_loss", "val_accuracy", "val_presicion", "val_recall"]
header = np.concatenate((hyper_keys, model_keys, metrics_train_keys, metrics_val_keys))
info_table = pd.DataFrame(np.array([i for i in range(len(header))]).reshape(1, 21) ,columns = header)

In [None]:
info_table

In [None]:
testing = pd.DataFrame([[0, np.nan]], columns = ["a", "b"])

In [None]:
to_add = pd.DataFrame(np.array([0 ,1]).reshape(1,2), columns = ["a", "b"])
print(to_add)
testing = testing.append(to_add)
testing

In [None]:
nan_columns = testing.columns[testing.isnull().any()].tolist()

In [None]:
temp_df

In [None]:
nan_columns = temp_df.columns[temp_df.isna().any()].tolist()

In [None]:
nan_cols = [i for i in temp_df.columns if pd.isnull(temp_df[i].values)]

In [None]:
for column in temp_df.columns:
    print(temp_df[column].values)

In [None]:
temp_df['val_loss'].dtype