In [1]:
import os
import sys
import pprint

base_dir = 'C:\Documents\Thesis_ssd\MasterThesis'
os.chdir(base_dir)

from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.BaselineHelperFunctions import BaselineHelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.DataGenerator import DataGenerator
from Classes.Modeling.Models import Models
from Classes.Modeling.RandomGridSearch import RandomGridSearch
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
import json

helper = BaselineHelperFunctions()
%load_ext tensorboard

In [2]:
num_classes = 2
loadData = LoadData(num_classes, isBalanced = True)
shuffle = False
full_ds, train_ds, val_ds, test_ds = loadData.getDatasets(shuffle = shuffle)
handler = DataHandler()
pp = pprint.PrettyPrinter(indent=4)
helper = BaselineHelperFunctions()

In [3]:
hyper_grid = {
        "batch_size" : [8, 16, 32, 64, 128, 256, 512],
        "epochs" : [1],
        "learning_rate" : [0.1, 0.01, 0.001, 0.0001, 0.00001],
        "optimizer" : ["adam", "rmsprop", "sgd"]
    }
model_grid = {
    "activation" : ["relu", "sigmoid", "softmax", "tanh"],
    "dropout_rate" : [0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0],
    "filters" : [11, 13, 15, 17, 19, 21, 23, 25],
    "kernel_size" : [3, 5, 7, 9, 11, 13],
    "l1_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "l2_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "output_layer_activation" : ["softmax", "sigmoid"],
    "padding" : ["same"],
    "start_neurons" : [8,16, 32, 64, 128, 256, 512, 1024]
}
"""
{'activation': 'softmax', 
 'dropout_rate': 0.1, 
 'filters': 25, 
 'kernel_size': 9, 
 'l1_r': 0.01, 
 'l2_r': 0.0001, 
 'output_layer_activation': 'softmax', 
 'padding': 'same', 
 'start_neurons': 32}
"""


model_nr = 5

test_mode = True
use_scaler = False
use_noise_augmentor = False
detrend = False
use_minmax = False
use_highpass = False
highpass_freq = 0.1

n_picks = 2

use_tensorboard = False
use_liveplots = False
use_custom_callback = False
use_early_stopping = True

In [4]:
randomGridSearch = RandomGridSearch(train_ds, val_ds, test_ds, model_nr, test_mode, detrend,
                                    use_scaler, use_noise_augmentor, use_minmax, use_highpass, n_picks, 
                                    hyper_grid = hyper_grid, model_grid = model_grid, 
                                    num_classes = num_classes, use_tensorboard = use_tensorboard,
                                    use_liveplots = use_liveplots, use_custom_callback = use_custom_callback,
                                    use_early_stopping = use_early_stopping, highpass_freq = highpass_freq)

In [5]:
results, highest_test_accuracy_index, highest_train_accuracy_index, highest_test_precision_index, highest_test_recall_index= randomGridSearch.fit()

{'activation': 'softmax', 'dropout_rate': 0.01, 'filters': 23, 'kernel_size': 9, 'l1_r': 0.001, 'l2_r': 0.0001, 'output_layer_activation': 'sigmoid', 'padding': 'same', 'start_neurons': 64}
sigmoid
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (128, 3, 23)              1242230   
_________________________________________________________________
batch_normalization (BatchNo (128, 3, 23)              92        
_________________________________________________________________
flatten (Flatten)            (128, 69)                 0         
_________________________________________________________________
dense (Dense)                (128, 2)                  140       
Total params: 1,242,462
Trainable params: 1,242,416
Non-trainable params: 46
_________________________________________________________________
Starting: 
{'batch_size': 128, 'epochs': 1, '

In [6]:
results_df = randomGridSearch.results_df.copy()

In [34]:
results_df

Unnamed: 0,batch_size,epochs,learning_rate,optimizer,activation,dropout_rate,filters,kernel_size,l1_r,l2_r,...,padding,start_neurons,train_loss,train_accuracy,train_precision,train_recall,val_loss,val_accuracy,val_precision,val_recall
0,128,1,0.0001,adam,softmax,0.01,23,9,0.001,0.0001,...,same,64,6.777656,0.558594,0.552301,0.515625,6.800071,0.453646,0.451407,0.442708
1,256,1,0.001,adam,relu,0.5,15,9,0.001,0.0001,...,same,32,8.747196,0.538086,0.538086,0.538086,830.445435,0.472656,0.472656,0.472656


In [30]:
train_loss = results_df.columns.get_loc('train_loss')
metrics_df = results_df[results_df.columns[train_loss:]]


In [36]:
min(metrics_df['train_loss'])

6.777656078338623

In [42]:
metrics_df[metrics_df['train_loss'] == min(metrics_df['train_loss'])].index[0]

0

In [28]:
max_values

0      6.800071
1    830.445435
dtype: float64

In [43]:
def find_best_performers(results_df):
    train_loss_index = results_df.columns.get_loc('train_loss')
    metrics = results_df[results_df.columns[train_loss_index:]]
    min_loss = {'train_loss' : min(metrics['train_loss']), 'val_loss' : min(metrics['val_loss']), 
                'train_index' : metrics_df[metrics_df['train_loss'] == min(metrics_df['train_loss'])].index[0], 
                'val_index' : metrics_df[metrics_df['val_loss'] == min(metrics_df['val_loss'])].index[0]}
    
    max_accuracy = {'train_accuracy' : max(metrics['train_accuracy']), 'val_accuracy' : max(metrics['val_accuracy']), 
                    'train_index' : metrics_df[metrics_df['train_accuracy'] == max(metrics_df['train_accuracy'])].index[0], 
                    'val_index' : metrics_df[metrics_df['val_accuracy'] == max(metrics_df['val_accuracy'])].index[0]}
    
    max_precision = {'train_precision' : max(metrics['train_precision']), 'val_precision' : max(metrics['val_precision']), 
                     'train_index' : metrics_df[metrics_df['train_precision'] == max(metrics_df['train_precision'])].index[0], 
                     'val_index' : metrics_df[metrics_df['val_precision'] == max(metrics_df['val_precision'])].index[0]}
    
    max_recall = {'train_recall' : max(metrics['train_recall']), 'val_recall' : max(metrics['train_recall']), 
                  'train_index' : metrics_df[metrics_df['train_recall'] == max(metrics_df['train_recall'])].index[0], 
                  'val_index' : metrics_df[metrics_df['val_recall'] == max(metrics_df['val_recall'])].index[0]}
    
    return min_loss, max_accuracy, max_precision, max_recall
    

In [44]:
find_best_performers(results_df)

NameError: name 'etrics_df' is not defined

In [10]:
ex = {"A" : 1,
      "B" : 2}
print(ex["A"])

keys = list(hyper_grid.keys())
keys2 = list(model_grid.keys())

1


In [11]:
print(keys)
print(keys2)

['batch_size', 'epochs', 'learning_rate', 'optimizer']
['activation', 'dropout_rate', 'filters', 'kernel_size', 'l1_r', 'l2_r', 'output_layer_activation', 'padding', 'start_neurons']


In [12]:
keys[0]

'batch_size'

In [13]:
import numpy as np
import pandas as pd
hyper_keys = list(hyper_grid.keys())
model_keys = list(model_grid.keys())
metrics_train_keys = ["train_loss", "train_accuracy", "train_presicion", "train_recall"]
metrics_val_keys = ["val_loss", "val_accuracy", "val_presicion", "val_recall"]
header = np.concatenate((hyper_keys, model_keys, metrics_train_keys, metrics_val_keys))
info_table = pd.DataFrame(np.array([i for i in range(len(header))]).reshape(1, 21) ,columns = header)

In [14]:
info_table

Unnamed: 0,batch_size,epochs,learning_rate,optimizer,activation,dropout_rate,filters,kernel_size,l1_r,l2_r,...,padding,start_neurons,train_loss,train_accuracy,train_presicion,train_recall,val_loss,val_accuracy,val_presicion,val_recall
0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20


In [15]:
testing = pd.DataFrame([[0, np.nan]], columns = ["a", "b"])

In [16]:
to_add = pd.DataFrame(np.array([0 ,1]).reshape(1,2), columns = ["a", "b"])
print(to_add)
testing = testing.append(to_add)
testing

   a  b
0  0  1


Unnamed: 0,a,b
0,0,
0,0,1.0


In [17]:
nan_columns = testing.columns[testing.isnull().any()].tolist()

In [18]:
temp_df

NameError: name 'temp_df' is not defined

In [None]:
nan_columns = temp_df.columns[temp_df.isna().any()].tolist()

In [None]:
nan_cols = [i for i in temp_df.columns if pd.isnull(temp_df[i].values)]

In [None]:
for column in temp_df.columns:
    print(temp_df[column].values)

In [None]:
temp_df['val_loss'].dtype