In [1]:
import numpy as np
import os
import sys
import pandas as pd

import sklearn as sk
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

import pylab as pl
import h5py

import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
os.environ['CUDA_VISIBLE_DEVICES']="0" 

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)


from keras.utils import np_utils
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import ParameterGrid
import re
from sklearn.metrics import confusion_matrix


classes_dir = 'D:\Thesis_ssd\MasterThesis3.0'
os.chdir(classes_dir)
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.HelperFunctions import HelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.TimeAugmentor import TimeAugmentor
from Classes.Modeling.DynamicModels import DynamicModels
from Classes.Modeling.StaticModels import StaticModels
from Classes.Modeling.NarrowSearchRam import NarrowSearchRam
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Modeling.ResultFitter import ResultFitter
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
import json
#from Classes import Tf_shutup
#Tf_shutup.Tf_shutup()

from livelossplot import PlotLossesKeras



from matplotlib.colors import ListedColormap

plt.rcParams["figure.figsize"]= (15,15)
helper = HelperFunctions()

import sys
ISCOLAB = 'google.colab' in sys.modules

import random
import pprint

base_dir = 'D:\Thesis_ssd\MasterThesis3.0'
os.chdir(base_dir)

1 Physical GPUs, 1 Logical GPUs


Using TensorFlow backend.


In [2]:
load_args = {
    'earth_explo_only' : False,
    'noise_earth_only' : False,
    'noise_not_noise' : True,
    'downsample' : True,
    'upsample' : True,
    'frac_diff' : 0.3,
    'seed' : 1,
    'subsample_size' : 0.4,
    'balance_non_train_set' : True,
    'use_true_test_set' : False
}
loadData = LoadData(**load_args)
full_ds, train_ds, val_ds, test_ds = loadData.get_datasets()
noise_ds = loadData.noise_ds
handler = DataHandler(loadData)

if load_args['earth_explo_only']:
    full_and_noise_ds = np.concatenate((full_ds, noise_ds))
    timeAug = TimeAugmentor(handler, full_and_noise_ds, seed = load_args['seed'])
else:
    timeAug = TimeAugmentor(handler, full_ds, seed = load_args['seed'])

Mapping redundancy: [--------------------------------------->] 100 %

In [3]:
# Printing data stats:
print(len(train_ds), len(val_ds), len(test_ds))
print("All data:")
classes, counts = handler.get_class_distribution_from_ds(full_ds)
print("Train set:")
classes, counts = handler.get_class_distribution_from_ds(train_ds)
print("Validation set:")
classes, counts = handler.get_class_distribution_from_ds(val_ds)
print("Test set:")
classes, counts = handler.get_class_distribution_from_ds(test_ds)
print("Nr noise samples " + str(len(loadData.noise_ds)))


33854 6771 4514
All data:
Total: 45139, earthquake: 15036, explosion: 14787, noise: 15316
Train set:
Total: 33854, earthquake: 11291, explosion: 11127, noise: 11436
Validation set:
Total: 6771, earthquake: 2230, explosion: 2210, noise: 2331
Test set:
Total: 4514, earthquake: 1515, explosion: 1450, noise: 1549
Nr noise samples 11436


In [4]:
main_grid = {
            "num_layers" : [2],
            "batch_size" : [256],
            "epochs" : [40],
            "learning_rate" : [0.01],
            "optimizer" : ["sgd"],
            "start_neurons" : [4],
            "decay_sequence" : [[1,2,4,6,8,10]],
            "dropout_rate" : [0.3],
            "filters" : [17],
            "kernel_size" : [5],
            "padding" : ["same"],
            "use_layerwise_dropout_batchnorm" : [True],
            "l2_r" : [0.001],
            "l1_r" : [0.0001],
            "activation" : ["tanh"],
            "output_layer_activation" : ["sigmoid"]
           }

hyper_grid = {
        "num_layers" : [3,4,5,6],
        "batch_size" : [128, 512, 1028],
        "epochs" : [40],
        "learning_rate" : [0.1, 0.05, 0.025, 0.005],
        "optimizer" : ["sgd"]
    }
model_grid = {
    "start_neurons" : [2,4,6,7],
    "use_layerwise_dropout_batchnorm" : [False, True],
    "decay_sequence" : [[1,2,4,4,2,1], [1,4,8,8,4,1], [1,0.5,0.25,0.25,0.5,1], [1,1,1,1,1,1]],
    "dropout_rate" : [0.3, 0.25, 0.2, 0.15, 0.1],
    "filters" : [11],
    "kernel_size" : [5],
    "padding" : ["same"],
    "l2_r" : [0.005, 0.001, 0.0005, 0.0001, 0],
    "l1_r" : [0.0005, 0.0001, 0.00005],
    "activation" : ["tanh"],
    "output_layer_activation" : ["sigmoid"]
}


model_nr = "LSTM"
is_lstm = True
num_channels = 3

use_time_augmentor = True
use_scaler = True
use_noise_augmentor = True
detrend = False
use_minmax = False
use_highpass = False
highpass_freq = 0.1

use_tensorboard = False
use_liveplots = True
use_custom_callback = False
use_early_stopping = True
start_from_scratch = True

narrowSearch = NarrowSearchRam(loadData, train_ds, val_ds, test_ds, model_nr, detrend, use_scaler, use_time_augmentor, 
                                    use_noise_augmentor, use_minmax, use_highpass, main_grid, hyper_grid, 
                                    model_grid, use_tensorboard = use_tensorboard,use_liveplots = use_liveplots, 
                                    use_custom_callback = use_custom_callback, use_early_stopping = use_early_stopping, 
                                    highpass_freq = highpass_freq, start_from_scratch = start_from_scratch, is_lstm = is_lstm,
                                    num_channels = num_channels)

Detrend and highpass filters are not implemented in this class yet.


In [5]:
def clear_tensorboard_dir():
    import os
    import shutil
    path = f"{base_dir}/Tensorboard_dir/fit"
    files = os.listdir(path)
    print(files)
    for f in files:
        shutil.rmtree(os.path.join(path,f))
if use_tensorboard:
    clear_tensorboard_dir()
    %tensorboard --logdir tensorboard_dir/fit

In [None]:
results_df, min_loss, max_accuracy, max_precision, max_recall = narrowSearch.fit()

Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>                   ] 0 %Fitting time augmentor: [>     

Fit process completed after 606.5321826934814 seconds. Total datapoints fitted: 45139.
Average time per datapoint: 0.013436987587086145
[{'activation': 'tanh', [------------------> ] 99 %%
  'batch_size': 256,
  'decay_sequence': [1, 2, 4, 6, 8, 10],
  'dropout_rate': 0.3,
  'epochs': 40,
  'filters': 17,
  'kernel_size': 5,
  'l1_r': 0.0001,
  'l2_r': 0.001,
  'learning_rate': 0.1,
  'num_channels': 3,
  'num_layers': 2,
  'optimizer': 'sgd',
  'output_layer_activation': 'sigmoid',
  'padding': 'same',
  'start_neurons': 4,
  'use_layerwise_dropout_batchnorm': True},
 {'activation': 'tanh',
  'batch_size': 256,
  'decay_sequence': [1, 2, 4, 6, 8, 10],
  'dropout_rate': 0.3,
  'epochs': 40,
  'filters': 17,
  'kernel_size': 5,
  'l1_r': 0.0001,
  'l2_r': 0.001,
  'learning_rate': 0.05,
  'num_channels': 3,
  'num_layers': 2,
  'optimizer': 'sgd',
  'output_layer_activation': 'sigmoid',
  'padding': 'same',
  'start_neurons': 4,
  'use_layerwise_dropout_batchnorm': True},
 {'activation'

  'start_neurons': 4,
  'use_layerwise_dropout_batchnorm': True},
 {'activation': 'tanh',
  'batch_size': 256,
  'decay_sequence': [1, 2, 4, 6, 8, 10],
  'dropout_rate': 0.3,
  'epochs': 40,
  'filters': 17,
  'kernel_size': 5,
  'l1_r': 0.0001,
  'l2_r': 0.0001,
  'learning_rate': 0.01,
  'num_channels': 3,
  'num_layers': 2,
  'optimizer': 'sgd',
  'output_layer_activation': 'sigmoid',
  'padding': 'same',
  'start_neurons': 4,
  'use_layerwise_dropout_batchnorm': True},
 {'activation': 'tanh',
  'batch_size': 256,
  'decay_sequence': [1, 2, 4, 6, 8, 10],
  'dropout_rate': 0.3,
  'epochs': 40,
  'filters': 17,
  'kernel_size': 5,
  'l1_r': 0.0001,
  'l2_r': 0,
  'learning_rate': 0.01,
  'num_channels': 3,
  'num_layers': 2,
  'optimizer': 'sgd',
  'output_layer_activation': 'sigmoid',
  'padding': 'same',
  'start_neurons': 4,
  'use_layerwise_dropout_batchnorm': True},
 {'activation': 'tanh',
  'batch_size': 256,
  'decay_sequence': [1, 2, 4, 6, 8, 10],
  'dropout_rate': 0.3,
  'epo

  temp_df = pd.DataFrame(np.array(picks).reshape(1,len(results_df.columns)), columns = results_df.columns)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 6000, 4)           128       
_________________________________________________________________
dropout (Dropout)            (None, 6000, 4)           0         
_________________________________________________________________
batch_normalization (BatchNo (None, 6000, 4)           16        
_________________________________________________________________
lstm_1 (LSTM)                (None, 6000, 2)           56        
_________________________________________________________________
dropout_1 (Dropout)          (None, 6000, 2)           0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 6000, 2)           8         
_________________________________________________________________
flatten (Flatten)            (None, 12000)             0