In [1]:
import numpy as np
import pandas as pd

import sklearn as sk
import matplotlib.pyplot as plt
from obspy import Stream, Trace, UTCDateTime
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

import pylab as pl
from keras.callbacks import ModelCheckpoint

import keras

from keras.layers import Activation, Conv1D, Dense, Dropout, Flatten, MaxPooling3D, BatchNormalization, InputLayer, LSTM
from keras.layers import Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.losses import categorical_crossentropy
from keras.models import Sequential
from keras.utils import Sequence
from keras.optimizers import Adam
from tensorflow.keras import regularizers
from keras.utils import np_utils
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import ParameterGrid
import re
from sklearn.metrics import confusion_matrix

import os
import sys
classes_dir = 'C:\Documents\Thesis_ssd\MasterThesis-2.0'
os.chdir(classes_dir)
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.BaselineHelperFunctions import BaselineHelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.DataGenerator import DataGenerator
from Classes.DataProcessing.NoiseAugmentor import NoiseAugmentor
from Classes.Modeling.Models import Models
from Classes.Modeling.RandomGridSearch import RandomGridSearch
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
from Classes import Tf_shutup
Tf_shutup.Tf_shutup()

from livelossplot import PlotLossesKeras

import tensorflow as tf

from matplotlib.colors import ListedColormap
%matplotlib inline
plt.rcParams["figure.figsize"]= (15,15)
helper = BaselineHelperFunctions()

import sys
ISCOLAB = 'google.colab' in sys.modules

import random
import pprint

base_dir = 'C:\Documents\Thesis_ssd\MasterThesis-2.0'
os.chdir(base_dir)

ModuleNotFoundError: No module named 'Classes.DataProcesssing'

In [None]:
num_classes = 2
loadData = LoadData(num_classes = num_classes, isBalanced = True)
shuffle = True
full_ds, train_ds, val_ds, test_ds = loadData.getDatasets(shuffle = shuffle)
data_gen = DataGenerator()

In [None]:
"""
{'model_nr': 5, 'index': 10}
{'batch_size': 32, 'epochs': 35, 'learning_rate': 1e-05, 'optimizer': 'rmsprop'}
{'activation': 'relu', 'dropout_rate': 0.4, 'filters': 21, 'kernel_size': 7, 'l1_r': 0.001, 'l2_r': 0.2, 
'output_layer_activation': 'sigmoid', 'padding': 'same', 'start_neurons': 16}

"""
############ Model picker #############
model_nr = 5

########### Hyperparameters ###########
batch_size = 32
epochs = 100
learning_rate = 0.00001
#opt = tf.keras.optimizers.SGD(learning_rate=learning_rate, clipnorm=1.0, clipvalue=0.5)
opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
activation = 'relu'
output_layer_activation = 'sigmoid'
dropout_rate = 0.4
filters = 21
kernel_size = 7
l1_r = 0.001
l2_r = 0.2
padding = 'same'
start_neurons = 16

########### Preprocessing ###########
test = False
use_noise_augmentor = True
detrend = False
use_scaler = True
use_highpass = False
highpass_freq = 0.2

In [None]:
num_ds, channels, timesteps = data_gen.get_trace_shape_no_cast(train_ds)
input_shape = (batch_size, channels, timesteps)

build_model_args = {'model_nr' : model_nr,
                    'input_shape' : input_shape,
                    'num_classes' : num_classes,
                    'dropout_rate' : dropout_rate,
                    'activation' : activation,
                    'output_layer_activation' : output_layer_activation,
                    'l2_r' : l2_r,
                    'l1_r' : l1_r,
                    'full_regularizer' : True,
                    'start_neurons' : start_neurons,
                    'filters' : filters,
                    'kernel_size' : kernel_size,
                    'padding' : 'same'}
model = Models(**build_model_args).model

model_args = {'loss' : "binary_crossentropy",
              'optimizer' : opt,
              'metrics' : ["accuracy","MSE",
                           tf.keras.metrics.Precision(thresholds=None, top_k=None, class_id=None, name=None, dtype=None),
                           tf.keras.metrics.Recall(thresholds=None, top_k=None, class_id=None, name=None, dtype=None)]}
model.compile(**model_args)

In [None]:
scaler = StandardScalerFitter(train_ds).fit_scaler(test = test, detrend = detrend)
#scaler = None
aug = None
if use_noise_augmentor:
    aug = NoiseAugmentor(train_ds, use_scaler, scaler)
    
    

gen_args = {
    'batch_size' : batch_size,
    'test' : test,
    'detrend' : detrend,
    'use_scaler' : use_scaler,
    'scaler' : scaler,
    'use_noise_augmentor' : use_noise_augmentor
    'augmentor' : aug,
    'num_classes' : num_classes,
    'use_highpass' : use_highpass,
    'highpass_freq' : highpass_freq
}


train_gen = data_gen.data_generator(train_ds, **gen_args)
val_gen = data_gen.data_generator(val_ds, **gen_args)
test_gen = data_gen.data_generator(test_ds, **gen_args)

custom_callback = CustomCallback(data_gen)

args = {'steps_per_epoch' : helper.get_steps_per_epoch(train_ds, batch_size, test),
        'epochs' : epochs,
        'validation_data' : val_gen,
        'validation_steps' : helper.get_steps_per_epoch(val_ds, batch_size, test),
        'verbose' : 1,
        'use_multiprocessing' : False, 
        'workers' : 1,
        'callbacks' : [PlotLossesKeras()]
}

model_fit = model.fit(train_gen, **args)

In [None]:
full_logs = custom_callback.full_training_logs

In [None]:
helper.get_n_points_with_highest_training_loss(train_ds, 100, full_logs)

In [None]:
def get_n_points_with_highest_training_loss(full_logs, train_ds, n):
    train_ds_dict = {}
    for path, label in train_ds:
        train_ds_dict[path] = {'label' : label,
                               'loss': 0,
                               'average_loss' : 0,
                               'occurances' : 0}
    counter = 0
    for batch in full_logs:
        loss = batch['loss']
        for path_class in batch['batch_samples']:
            train_ds_dict[path_class[0]]['loss'] += loss
            train_ds_dict[path_class[0]]['occurances'] += 1
    
    train_ds_list = []
    for sample in np.array(train_ds[:,0]):
        if train_ds_dict[sample]['occurances'] == 0:
            continue
        train_ds_dict[sample]['average_loss'] = train_ds_dict[sample]['loss'] / train_ds_dict[sample]['occurances']
        train_ds_list.append((sample, train_ds_dict[sample]['label'],train_ds_dict[sample]['average_loss']))
    
    sorted_train_ds_list = sorted(train_ds_list, key=lambda x: x[2], reverse = True)
        
    
    return sorted_train_ds_list[0:n]
        
#get_n_points_with_highest_loss(full_logs, train_ds, 100)   
    

In [None]:
model.evaluate_generator(generator=test_gen, steps=helper.get_steps_per_epoch(test_ds, batch_size, test))

In [None]:
print(confusion_matrix(y_true_categorical.argmax(axis=1), predictions[0:1234].argmax(axis=1)))

In [None]:
helper.plot_confusion_matrix(model, test_gen, test_ds, batch_size, num_classes)

In [None]:
model.predict_generator(val_gen, 2)

In [None]:
helper.plot_confusion_matrix(test_ds, batch_size)