In [1]:
import os
import sys
import pprint

base_dir = 'C:\Documents\Thesis_ssd\MasterThesis'
os.chdir(base_dir)

from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.BaselineHelperFunctions import BaselineHelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.DataGenerator import DataGenerator
from Classes.Modeling.Models import Models
from Classes.Modeling.RandomGridSearch import RandomGridSearch
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
from Classes.Modeling.GridSearchResultProcessor import GridSearchResultProcessor
import json

helper = BaselineHelperFunctions()
%load_ext tensorboard

In [2]:
num_classes = 2
loadData = LoadData(num_classes, isBalanced = True)
shuffle = False
full_ds, train_ds, val_ds, test_ds = loadData.getDatasets(shuffle = shuffle)
handler = DataHandler()
pp = pprint.PrettyPrinter(indent=4)
helper = BaselineHelperFunctions()

In [3]:
hyper_grid = {
        "batch_size" : [8, 16, 32, 64, 128, 256, 512],
        "epochs" : [1],
        "learning_rate" : [0.1, 0.01, 0.001, 0.0001, 0.00001],
        "optimizer" : ["adam", "rmsprop", "sgd"]
    }
model_grid = {
    "activation" : ["relu", "sigmoid", "softmax", "tanh"],
    "dropout_rate" : [0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0],
    "filters" : [11, 13, 15, 17, 19, 21, 23, 25],
    "kernel_size" : [3, 5, 7, 9, 11, 13],
    "l1_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "l2_r" : [0.3, 0.2, 0.1, 0.01, 0.001, 0.0001],
    "output_layer_activation" : ["softmax", "sigmoid"],
    "padding" : ["same"],
    "start_neurons" : [8,16, 32, 64, 128, 256, 512, 1024]
}
"""
{'activation': 'softmax', 
 'dropout_rate': 0.1, 
 'filters': 25, 
 'kernel_size': 9, 
 'l1_r': 0.01, 
 'l2_r': 0.0001, 
 'output_layer_activation': 'softmax', 
 'padding': 'same', 
 'start_neurons': 32}
"""


model_nr = 8

test_mode = True
use_scaler = False
use_noise_augmentor = False
detrend = False
use_minmax = False
use_highpass = False
highpass_freq = 0.1

n_picks = 2

use_tensorboard = False
use_liveplots = False
use_custom_callback = False
use_early_stopping = True
start_from_scratch = False

In [4]:
randomGridSearch = RandomGridSearch(train_ds, val_ds, test_ds, model_nr, test_mode, detrend,
                                    use_scaler, use_noise_augmentor, use_minmax, use_highpass, n_picks, 
                                    hyper_grid = hyper_grid, model_grid = model_grid, 
                                    num_classes = num_classes, use_tensorboard = use_tensorboard,
                                    use_liveplots = use_liveplots, use_custom_callback = use_custom_callback,
                                    use_early_stopping = use_early_stopping, highpass_freq = highpass_freq,
                                    start_from_scratch = start_from_scratch)

In [5]:
results, highest_test_accuracy_index, highest_train_accuracy_index, highest_test_precision_index, highest_test_recall_index= randomGridSearch.fit()

{'activation': 'relu', 'dropout_rate': 0, 'filters': 19, 'kernel_size': 13, 'l1_r': 0.001, 'l2_r': 0.3, 'output_layer_activation': 'softmax', 'padding': 'same', 'start_neurons': 512}
------------------------
Index(['batch_size', 'epochs', 'learning_rate', 'optimizer', 'activation',
       'dropout_rate', 'filters', 'kernel_size', 'l1_r', 'l2_r',
       'output_layer_activation', 'padding', 'start_neurons', 'train_loss',
       'train_accuracy', 'train_precision', 'train_recall', 'val_loss',
       'val_accuracy', 'val_precision', 'val_recall'],
      dtype='object')
Parameters stored before fit
  batch_size epochs learning_rate optimizer activation dropout_rate filters  \
0         16      1         1e-05   rmsprop       relu          0.5      21   
1         64      1           0.1   rmsprop       tanh            0      17   
2          8      1         0.001      adam       relu            0      19   

  kernel_size   l1_r    l2_r  ... padding start_neurons    train_loss  \
0       

{'train_loss': 32321.69140625, 'train_accuracy': 0.4990808963775635, 'train_precision': 0.4990808963775635, 'train_recall': 0.4990808963775635} {'val_loss': 32322.2421875, 'val_accuracy': 0.4635416567325592, 'val_precision': 0.4635416567325592, 'val_recall': 0.4635416567325592}
Unfinished columns: ['train_loss', 'train_accuracy', 'train_precision', 'train_recall', 'val_loss', 'val_accuracy', 'val_precision', 'val_recall']
----------------------------------------------------LOSS----------------------------------------------------------
Min val loss: 32322.2421875, at index: 3
Min training loss: 32321.69140625, at index: 3
----------------------------------------------------ACCURACY------------------------------------------------------
Highest val accuracy: 0.5260416865348816, at index: 2
Highest training accuracy: 0.569852948189, at index: 0
----------------------------------------------------PRECISION-----------------------------------------------------
Highest val precision: 0.5260416

In [6]:
results_df = randomGridSearch.results_df.copy()

In [7]:
results_df[results_df.columns[0:13]]

Unnamed: 0,batch_size,epochs,learning_rate,optimizer,activation,dropout_rate,filters,kernel_size,l1_r,l2_r,output_layer_activation,padding,start_neurons
0,16,1,1e-05,rmsprop,relu,0.5,21,11,0.3,0.2,sigmoid,same,1024
1,64,1,0.1,rmsprop,tanh,0.0,17,3,0.3,0.0001,sigmoid,same,512
2,8,1,0.001,adam,relu,0.0,19,13,0.001,0.3,softmax,same,512
3,16,1,0.1,rmsprop,tanh,0.3,17,3,0.1,0.001,softmax,same,256


In [8]:
results_df[results_df.columns[13:]]

Unnamed: 0,train_loss,train_accuracy,train_precision,train_recall,val_loss,val_accuracy,val_precision,val_recall
0,6391216.0,0.569853,0.493274,0.101103,26173630.0,0.515625,0.4,0.09375
1,296888.8,0.499081,0.49863,0.501838,296888.9,0.463542,0.466321,0.46875
2,36100730.0,0.554745,0.554745,0.554745,32717930.0,0.526042,0.526042,0.526042
3,32321.69,0.499081,0.499081,0.499081,32322.24,0.463542,0.463542,0.463542


In [9]:
result_file_name = 'results_8_sscale_noiseAug_earlyS.csv'
df = GridSearchResultProcessor().get_results_df_by_name(result_file_name, 2)



FileNotFoundError: [Errno 2] File C:\Documents/Thesis_ssd/MasterThesis/GridSearchResults/2_classes/results_8_sscale_noiseAug_earlyS.csv does not exist: 'C:\\Documents/Thesis_ssd/MasterThesis/GridSearchResults/2_classes/results_8_sscale_noiseAug_earlyS.csv'

In [None]:
df.iloc[0]

In [None]:
def fit_from_index(results_df, index):
    values = list(results_df.iloc[index][1:14])
    keys = list(results_df.columns[1:14])
    res = {keys[i]: values[i] for i in range(len(keys))} 
    print(res)
    

In [None]:
fit_from_index(df, 0)

In [None]:
randomGridSearch.find_best_performers(loaded_result)

In [None]:
def find_best_performers(results_df):
    train_loss_index = results_df.columns.get_loc('train_loss')
    metrics = results_df[results_df.columns[train_loss_index:]]
    min_loss = {'train_loss' : min(metrics['train_loss']), 'val_loss' : min(metrics['val_loss']), 
                'train_index' : metrics_df[metrics_df['train_loss'] == min(metrics_df['train_loss'])].index[0], 
                'val_index' : metrics_df[metrics_df['val_loss'] == min(metrics_df['val_loss'])].index[0]}
    
    max_accuracy = {'train_accuracy' : max(metrics['train_accuracy']), 'val_accuracy' : max(metrics['val_accuracy']), 
                    'train_index' : metrics_df[metrics_df['train_accuracy'] == max(metrics_df['train_accuracy'])].index[0], 
                    'val_index' : metrics_df[metrics_df['val_accuracy'] == max(metrics_df['val_accuracy'])].index[0]}
    
    max_precision = {'train_precision' : max(metrics['train_precision']), 'val_precision' : max(metrics['val_precision']), 
                     'train_index' : metrics_df[metrics_df['train_precision'] == max(metrics_df['train_precision'])].index[0], 
                     'val_index' : metrics_df[metrics_df['val_precision'] == max(metrics_df['val_precision'])].index[0]}
    
    max_recall = {'train_recall' : max(metrics['train_recall']), 'val_recall' : max(metrics['train_recall']), 
                  'train_index' : metrics_df[metrics_df['train_recall'] == max(metrics_df['train_recall'])].index[0], 
                  'val_index' : metrics_df[metrics_df['val_recall'] == max(metrics_df['val_recall'])].index[0]}
    
    return min_loss, max_accuracy, max_precision, max_recall
    

In [None]:
find_best_performers(results_df)

In [None]:
results_df

In [None]:
print(keys)
print(keys2)

In [None]:
keys[0]

In [None]:
import numpy as np
import pandas as pd
hyper_keys = list(hyper_grid.keys())
model_keys = list(model_grid.keys())
metrics_train_keys = ["train_loss", "train_accuracy", "train_presicion", "train_recall"]
metrics_val_keys = ["val_loss", "val_accuracy", "val_presicion", "val_recall"]
header = np.concatenate((hyper_keys, model_keys, metrics_train_keys, metrics_val_keys))
info_table = pd.DataFrame(np.array([i for i in range(len(header))]).reshape(1, 21) ,columns = header)

In [None]:
info_table

In [None]:
testing = pd.DataFrame([[0, np.nan]], columns = ["a", "b"])

In [None]:
to_add = pd.DataFrame(np.array([0 ,1]).reshape(1,2), columns = ["a", "b"])
print(to_add)
testing = testing.append(to_add)
testing

In [None]:
nan_columns = testing.columns[testing.isnull().any()].tolist()

In [None]:
temp_df

In [None]:
nan_columns = temp_df.columns[temp_df.isna().any()].tolist()

In [None]:
nan_cols = [i for i in temp_df.columns if pd.isnull(temp_df[i].values)]

In [None]:
for column in temp_df.columns:
    print(temp_df[column].values)

In [None]:
temp_df['val_loss'].dtype