In [1]:
import numpy as np
import pandas as pd
import h5py
import gc

import sklearn as sk

from sklearn.model_selection import ParameterGrid




import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.utils import GeneratorEnqueuer

import os
base_dir = '/media/tord/T7/Thesis_ssd/MasterThesis3'
os.chdir(base_dir)

from Classes.Modeling.DynamicModels import DynamicModels
from Classes.Modeling.StaticModels import StaticModels
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.HelperFunctions import HelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.RamLoader import RamLoader
from Classes.Modeling.GridSearchResultProcessor import GridSearchResultProcessor
from Classes.DataProcessing.ts_RamGenerator import data_generator
from Classes.Modeling.TrainSingleModel import TrainSingleModel



import sys


import random
import pprint
import re
import json


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: GeForce RTX 3090, compute capability 8.6


In [2]:
load_args = {
    'earth_explo_only' : True,
    'noise_earth_only' : False,
    'noise_not_noise' : False,
    'downsample' : True,
    'upsample' : True,
    'frac_diff' : 1,
    'seed' : 1,
    'subsample_size' : 0.01,
    'balance_non_train_set' : True,
    'use_true_test_set' : False,
    'even_balance' : True
}
loadData = LoadData(**load_args)
train_ds, val_ds, test_ds = loadData.get_datasets()
noise_ds = loadData.noise_ds
handler = DataHandler(loadData)
helper = HelperFunctions()

2 2
2 2
2 2
Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 0 %Mapping train redundancy: : [>                                       ] 1 %Mapping train redundancy: : [>                                       ] 1 %Mapping train redundancy: : [>                                       ] 1 %Mapping train redundancy: : [>                                       ] 1 %Mapping train redundancy: : [>                                       ] 1 %Mapping train redundancy: : [>                                       ] 1 %Mapping train



Loaded explosion and earthquake dataset:
Evenly balanced among classes in the train set.
As well as non train sets.
Distribution (Label: (counts, proportion)) of
Train ds:
earthquake: (808, 0.4915)  |  explosion: (836, 0.5085)  
Val ds:
earthquake: (126, 0.5122)  |  explosion: (120, 0.4878)  
Test ds:
earthquake: (80, 0.4878)  |  explosion: (84, 0.5122)  


In [48]:
from itertools import chain
class NarrowOpt(GridSearchResultProcessor):
    def __init__(self, loadData, model_type, scaler_name, use_time_augmentor, use_noise_augmentor,
                filter_name, static_grid, search_grid, use_tensorboard = False, 
                use_liveplots = False, use_custom_callback = False, use_early_stopping = False, band_min = 2.0,
                band_max = 4.0, highpass_freq = 1, start_from_scratch = True, use_reduced_lr = False, num_channels = 3):
        self.loadData = loadData
        self.model_type = model_type
        
        
        self.num_classes = len(set(self.loadData.label_dict.values()))
        self.scaler_name = scaler_name
        self.use_noise_augmentor = use_noise_augmentor
        self.use_time_augmentor = use_time_augmentor
        self.filter_name = filter_name

        self.static_grid = static_grid
        self.search_grid = search_grid
        
        self.use_tensorboard = use_tensorboard
        self.use_liveplots = use_liveplots
        self.use_custom_callback = use_custom_callback
        self.use_early_stopping = use_early_stopping
        self.use_reduced_lr = use_reduced_lr
        
        self.highpass_freq = highpass_freq
        self.band_min = band_min
        self.band_max = band_max
        self.start_from_scratch = start_from_scratch
        self.num_channels = num_channels

        
        self.helper = HelperFunctions()
        self.handler = DataHandler(self.loadData)

    def fit(self):
        pp = pprint.PrettyPrinter(indent=4)
        # Creating grid:
        self.p = self.create_search_space(self.static_grid, self.search_grid)
        pp.pprint(self.p)
        if self.start_from_scratch:
            print("================================================================================================================================================")
            print("================================ YOU WILL BE PROMPTED AFTER DATA HAS BEEN LOADED TO CONFIRM CLEARING OF DATASET ================================")
            print("================================================================================================================================================")
        # Preprocessing and loading all data to RAM:
        self.ramLoader = RamLoader(self.loadData, 
                              self.handler, 
                              use_time_augmentor = self.use_time_augmentor, 
                              use_noise_augmentor = self.use_noise_augmentor, 
                              scaler_name = self.scaler_name,
                              filter_name = self.filter_name, 
                              band_min = self.band_min,
                              band_max = self.band_max,
                              highpass_freq = self.highpass_freq, 
                              load_test_set = False)
        self.x_train, self.y_train, self.x_val, self.y_val, self.noiseAug = self.ramLoader.load_to_ram()

        # Create name of results file, get initiated results df, either brand new or continue old.
        self.results_file_name = self.get_results_file_name()
        print(self.results_file_name)
        self.results_df = self.initiate_results_df_opti(self.results_file_name, self.num_classes, self.start_from_scratch, self.p[0])
        print(self.results_df)
        for i in range(len(self.p)):
            gc.collect()
            tf.keras.backend.clear_session()
            tf.compat.v1.reset_default_graph()
            tf.config.optimizer.set_jit(True)
            mixed_precision.set_global_policy('mixed_float16')
            try:
                trainSingleModel = TrainSingleModel(self.x_train, self.y_train, self.x_val, self.y_val,
                                                    None, None, self.noiseAug, self.helper, self.loadData,
                                                    self.model_type, self.num_channels, self.use_tensorboard,
                                                    self.use_liveplots, self.use_custom_callback, 
                                                    self.use_early_stopping, self.use_reduced_lr, self.ramLoader,
                                                    log_data = self.log_data, results_df = self.results_df,
                                                    results_file_name = self.results_file_name, index = i,
                                                    start_from_scratch = False)
                # Add try catch clauses here
                model, self.results_df = trainSingleModel.run(16, 15, evaluate_train = False, evaluate_val = False, evaluate_test = False, meier_load = False, **self.p[i])
                del model
            except Exception as e:
                print(str(e))
                continue
            finally:
                gc.collect()
                    
                tf.keras.backend.clear_session()
                tf.compat.v1.reset_default_graph()
                continue
        
    def create_search_space(self, static_grid, search_grid):
        pp = pprint.PrettyPrinter(indent=4)
        key_list = list(static_grid.keys())
        search_list = []
        for key in key_list:
            if len(search_grid[key]) > 1:
                one_model = static_grid.copy()
                one_model[key] = search_grid[key]
                #print(one_model)
                key_grid = list(ParameterGrid(one_model))
                search_list.append(key_grid)
            else:
                continue
        search_list = list(chain.from_iterable(search_list))
        pprint.pprint(search_list)
        return search_list
    

In [49]:
static_grid = {"as": [1],
               "bs": ["hey"],
               "cs": ["tits"]
              }

search_grid = {"as" : [1,2,3],
               "bs" : ["there", "mister"],
               "cs" : ["mcgee", "big boy"]
              }


narrowOpt = NarrowOpt(loadData,None,None,None,None,None,static_grid,search_grid)

In [50]:
narrowOpt.fit()

[{'as': 1, 'bs': 'hey', 'cs': 'tits'},
 {'as': 2, 'bs': 'hey', 'cs': 'tits'},
 {'as': 3, 'bs': 'hey', 'cs': 'tits'},
 {'as': 1, 'bs': 'there', 'cs': 'tits'},
 {'as': 1, 'bs': 'mister', 'cs': 'tits'},
 {'as': 1, 'bs': 'hey', 'cs': 'mcgee'},
 {'as': 1, 'bs': 'hey', 'cs': 'big boy'}]
[   {'as': 1, 'bs': 'hey', 'cs': 'tits'},
    {'as': 2, 'bs': 'hey', 'cs': 'tits'},
    {'as': 3, 'bs': 'hey', 'cs': 'tits'},
    {'as': 1, 'bs': 'there', 'cs': 'tits'},
    {'as': 1, 'bs': 'mister', 'cs': 'tits'},
    {'as': 1, 'bs': 'hey', 'cs': 'mcgee'},
    {'as': 1, 'bs': 'hey', 'cs': 'big boy'}]
