In [1]:
import numpy as np
import pandas as pd
import h5py
import gc

import sklearn as sk

from sklearn.model_selection import ParameterGrid




import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.utils import GeneratorEnqueuer

import os
base_dir = '/media/tord/T7/Thesis_ssd/MasterThesis3'
os.chdir(base_dir)

from Classes.Modeling.DynamicModels import DynamicModels
from Classes.Modeling.StaticModels import StaticModels
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.HelperFunctions import HelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.RamLoader import RamLoader
from Classes.Modeling.GridSearchResultProcessor import GridSearchResultProcessor
from Classes.DataProcessing.ts_RamGenerator import data_generator


import sys


import random
import pprint
import re
import json


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: GeForce RTX 3090, compute capability 8.6


In [2]:
class TrainSingleModel(GridSearchResultProcessor):
    
    def __init__(self, x_train, y_train, x_val, y_val, x_test, y_test, noiseAug, helper, loadData, 
                 model_type, num_channels, use_tensorboard, use_liveplots, use_custom_callback,
                 use_early_stopping, use_reduced_lr, log_data = True, results_df = None, 
                 results_file_name = None, index = None):
        self.x_train = x_train
        self.y_train = y_train
        self.x_val = x_val
        self.y_val = y_val
        self.x_test = x_test
        self.y_test = y_test
        
        self.num_channels = num_channels
        
        self.noiseAug = noiseAug
        self.helper = helper
        self.loadData = loadData
        
        self.use_tensorboard = use_tensorboard
        self.use_liveplots = use_liveplots
        self.use_custom_callback = use_custom_callback
        self.use_early_stopping = use_early_stopping
        self.use_reduced_lr = use_reduced_lr
        
        self.num_classes = len(set(self.loadData.label_dict.values()))
        
        self.model_type = model_type
        self.results_df = results_df
        self.log_data = log_data
        
        self.index = index
        
    def create_and_compile_model(self, **p):
        gc.collect()
        tf.keras.backend.clear_session()
        tf.config.optimizer.set_jit(True)
        mixed_precision.set_global_policy('mixed_float16')
        
        epoch = p["epochs"]
        batch_size = p["batch_size"]
        opt = self.helper.get_optimizer(p["optimizer"], p["learning_rate"])
        
        p = self.helper.handle_hyperparams(self.num_classes, **p)
        
        if self.index != None:
            model_info = {"model_type" : self.model_type, "index" : self.index}
        else:
            model_info = {"model_type" : self.model_type}
        current_picks = [model_info, p]
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(current_picks)
        if self.log_data and self.results_df != None and self.results_file_name != None:
            self.results_df = self.store_params_before_fit_opti(p, self.results_df, self.results_file_name)
        
        _,_,timesteps = self.x_train.shape
        input_shape = (timesteps, self.num_channels)
        
        model = DynamicModels(self.model_type, self.num_classes, input_shape, **p).model
        model_compile_args = self.helper.generate_model_compile_args(opt, self.num_classes)
        model.compile(**model_compile_args)
        return model
        
        
    
    
    
    def create_enqueuer(self, X, y, batch_size, noiseAug, num_channels):
        enq = GeneratorEnqueuer(data_generator(X, y, batch_size, noiseAug, num_channels = num_channels, is_lstm  = True), 
                                use_multiprocessing = False)
        return enq
        
    def fit_model(self, model, workers, max_queue_size, **p):
        train_enq = self.create_enqueuer(self.x_train, self.y_train, p["batch_size"], self.noiseAug, self.num_channels)
        val_enq = self.create_enqueuer(self.x_val, self.y_val, p["batch_size"], self.noiseAug, self.num_channels)
        train_enq.start(workers = workers, max_queue_size = max_queue_size)
        val_enq.start(workers = workers, max_queue_size = max_queue_size)
        train_gen = train_enq.get()
        val_gen = val_enq.get()
        
        fit_args = self.helper.generate_fit_args(self.loadData.train, self.loadData.val, self.loadData,
                                                 p["batch_size"], p["epochs"], val_gen,
                                                 use_tensorboard = self.use_tensorboard, 
                                                 use_liveplots = self.use_liveplots, 
                                                 use_custom_callback = self.use_custom_callback,
                                                 use_early_stopping = self.use_early_stopping,
                                                 use_reduced_lr = self.use_reduced_lr)
        try:
            print(f"Utilizes {self.helper.get_steps_per_epoch(self.loadData.val, p['batch_size'])*p['batch_size']}/{len(self.loadData.val)} validation points")
            print(f"Utilizes {self.helper.get_steps_per_epoch(self.loadData.train, p['batch_size'])*p['batch_size']}/{len(self.loadData.train)} training points")
            print("---------------------------------------------------------------------------------")

            # Fit the model using the generated args
            model.fit(train_gen, **fit_args)
            train_enq.stop()
            val_enq.stop()
            del train_gen, val_gen, train_enq, val_enq

        except Exception as e:
            print(str(e))
            print("Something went wrong.")
        return model
    
    def run(self, workers, max_queue_size, **p, evaluate_train = False, evaluate_val = False, evaluate_test = False):
        model = self.create_and_compile_model(**p)
        model = self.fit_model(model, 16, 15, **p)
        self.model = model
        if evaluate_train:
            self.helper.evaluate_model(model, self.x_train, self.y_train, self.loadData.label_dict, self.num_channels, self.noiseAug)
        if evaluate_val:
            self.helper.evaluate_model(model, self.x_val, self.y_val, self.loadData.label_dict, self.num_channels, self.noiseAug)
        if evaluate_test:
            self.helper.evaluate_model(model, self.x_test, self.y_test, self.loadData.label_dict, self.num_channels, self.noiseAug)
        
        return model

        
            
        
       

SyntaxError: invalid syntax (<ipython-input-2-25b33e2560aa>, line 104)

In [None]:
load_args = {
    'earth_explo_only' : True,
    'noise_earth_only' : False,
    'noise_not_noise' : False,
    'downsample' : True,
    'upsample' : True,
    'frac_diff' : 1,
    'seed' : 1,
    'subsample_size' : 0.05,
    'balance_non_train_set' : True,
    'use_true_test_set' : False,
    'even_balance' : True
}
loadData = LoadData(**load_args)
train_ds, val_ds, test_ds = loadData.get_datasets()
noise_ds = loadData.noise_ds
handler = DataHandler(loadData)
helper = HelperFunctions()

In [None]:
use_time_augmentor = True
use_noise_augmentor = True
scaler_name = "minmax"
filter_name = None
band_min = 2
band_max = 4
highpass_freq = 5

use_tensorboard = True
use_liveplots = False
use_custom_callback = True
use_early_stopping = True
start_from_scratch = False
use_reduced_lr = True
log_data = False

load_test_set = True


ramLoader = RamLoader(loadData, 
                      handler, 
                      use_time_augmentor = use_time_augmentor, 
                      use_noise_augmentor = use_noise_augmentor, 
                      scaler_name = scaler_name,
                      filter_name = filter_name, 
                      band_min = band_min,
                      band_max = band_max,
                      highpass_freq = highpass_freq, 
                      load_test_set = load_test_set)
x_train, y_train, x_val, y_val, x_test, y_test, noiseAug = ramLoader.load_to_ram()


In [None]:
model_type = "CNN_grow"
num_channels = 3

singleModel = TrainSingleModel(x_train, y_train, x_val, y_val, x_test, y_test, 
                               noiseAug, helper, loadData, 
                               model_type, 
                               num_channels, 
                               use_tensorboard, 
                               use_liveplots, 
                               use_custom_callback,
                               use_early_stopping, 
                               use_reduced_lr, 
                               log_data = log_data, 
                               results_df = None, 
                               results_file_name = None, 
                               index = None)

workers = 16
max_queue_size = 15


hyper_params = {
    "num_layers" : 2,
    "batch_size" : 64,
    "epochs" : 40,
    "learning_rate" : 0.001,
    "optimizer" : "sgd",
    "num_filters" : 68,
    "filter_size" : 42,
    "cnn_activation" : "relu",
    "dense_activation" : "relu",
    "padding" : "same",
    "use_layerwise_dropout_batchnorm" : True,
    "growth_sequence" : [1,2, 4],
    "dropout_rate" : 0.01,
    "l2_r" : 0.01,
    "l1_r" : 0.01,
    "first_dense_units" : 286,
    "output_layer_activation" : "sigmoid"
}
model = singleModel.run(workers = workers, max_queue_size = max_queue_size, **hyper_params)

In [None]:
from sklearn.metrics import classification_report
from tensorflow.keras.utils import OrderedEnqueuer

def evaluate_generator(model, x_test, y_test, batch_size, label_dict, num_channels, noiseAug, helper):
    pp = pprint.PrettyPrinter(indent=4)
    steps = helper.get_steps_per_epoch(x_test, batch_size)
    test_enq = OrderedEnqueuer(data_generator(x_test, y_test, batch_size, noiseAug, num_channels = num_channels, is_lstm  = True), use_multiprocessing = False)
    test_enq.start(workers = 1, max_queue_size = 15)
    test_gen = test_enq.get()
    predictions = predict_generator(model, test_gen, steps, label_dict)
    predictions = np.rint(predictions)
    model.evaluate(x = test_gen, steps = steps)
    y_test = np.rint(y_test)
    y_test = y_test[0:len(predictions)]
    num_classes = len(set(label_dict.values()))
    predictions = np.reshape(predictions,(predictions.shape[0]))
    y_test = np.reshape(y_test, (y_test.shape[0]))
    assert predictions.shape == y_test.shape
    print(y_test.shape)
    print(predictions.shape)
    conf = tf.math.confusion_matrix(y_test, predictions, num_classes=num_classes)
    class_report = classification_report(y_test, predictions, target_names = helper.handle_non_noise_dict(label_dict))
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(conf)
    
    print(class_report)
    return conf, class_report
    
    
    
    
    
def predict_generator(model, gen, steps, label_dict):
    predictions = model.predict(x = gen, steps = steps)
    return predictions

def round_predictions(predictions):
    predictions = np.rint(predictions)
    return predictions

def evaluate_model(model, x_test, y_test, label_dict, num_channels, noiseAug, helper, plot_confusion_matrix = True):
    pp = pprint.PrettyPrinter(indent = 4)
    if noiseAug != None:
        x_test = noiseAug.batch_augment_noise(x_test, 0, noiseAug.noise_std/10)
    x_test[:][:,:num_channels]
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[2], x_test.shape[1]))
    model.evaluate(x = x_test, y = y_test)
    predictions = model.predict(x_test)
    predictions = np.rint(predictions)
    y_test = np.rint(y_test)
    y_test = y_test[:len(predictions)]
    predictions = np.reshape(predictions, (predictions.shape[0]))
    y_test = np.reshape(y_test, (y_test.shape[0]))
    num_classes = len(set(label_dict.values()))
    assert predictions.shape == y_test.shape
    print(y_test.shape)
    print(predictions.shape)
    conf = tf.math.confusion_matrix(y_test, predictions, num_classes=num_classes)
    class_report = classification_report(y_test, predictions, target_names = helper.handle_non_noise_dict(label_dict))
    if plot_confusion_matrix:
        helper.plot_confusion_matrix(conf, helper.handle_non_noise_dict(label_dict))
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(conf)
    print(class_report)
    return conf, class_report, predictions, y_test
    

In [None]:
_, _, predictions, y_test= evaluate_model(model, x_val, y_val, loadData.label_dict, num_channels, noiseAug, helper)

In [None]:
helper.evaluate_generator(model, x_test, y_test, hyper_params["batch_size"], loadData.label_dict, num_channels, noiseAug, plot_confusion_matrix = True)

In [None]:
predictions[0:20]

In [None]:
y_test[0:20]