In [6]:
import numpy as np
import os
import sys
import pandas as pd


import seaborn as sns

import pylab as pl
import h5py

import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
os.environ['CUDA_VISIBLE_DEVICES']="0" 

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

base_dir = '/media/tord/T7/Thesis_ssd/MasterThesis3/'
os.chdir(base_dir)

from GlobalUtils import GlobalUtils
utils = GlobalUtils()
os.chdir(utils.base_dir)
from Classes.DataProcessing.HelperFunctions import HelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.RamLoader import RamLoader


helper = HelperFunctions()

import sys
ISCOLAB = 'google.colab' in sys.modules

import random
import pprint


1 Physical GPUs, 1 Logical GPUs


In [2]:
import numpy as np
import random

import os
import sys


class RamGenerator(DataHandler):

    """
    After further inspection in RamGeneratorDevelop.ipynb, this class behaves as expected.
    """
    
    def __init__(self, loadData, handler, noiseAug = None):
        super().__init__(loadData)
        self.handler = handler
        self.num_classes = len(set(loadData.label_dict.values()))
        self.noiseAug = noiseAug
        
    def data_generator(self, traces, labels, batch_size):
        """
        Creates a generator object which yields two arrays. One array for waveforms, and one array for labels
        """
        # Number of samples 
        num_samples = len(labels)
        while True:
            # Loop which goes from 0 to num_samples, jumping n number for each loop, where n is equal to batch_size
            for offset in range(0, num_samples, batch_size):
                # Initiates the arrays.
                batch_traces = np.empty((batch_size, traces.shape[1], traces.shape[2]))
                batch_labels = np.empty((batch_size, 1))
                # If condition that handles what happens when the funcion has been called k times, and k*batch_size > num_samples.
                # This makes sure that the shape of the arrays remain the same, even though there arent enough events to fill an entire batch.
                # when this condition is true, it will be the last iteration of the loop, so at next call the iterator will start at 0 again.
                if offset + batch_size > num_samples:
                    overflow = offset + batch_size - num_samples
                    
                    batch_traces[0:batch_size - overflow] = traces[offset:(offset+batch_size) - overflow]
                    batch_labels[0:batch_size - overflow] = labels[offset:(offset+batch_size) - overflow]
                    
                    i_start = random.randint(0, num_samples-overflow)
                    batch_traces[batch_size - overflow:batch_size] = traces[i_start:i_start + overflow]
                    batch_labels[batch_size - overflow:batch_size] = labels[i_start:i_start + overflow]
                # Regular trucking along here
                else:
                    batch_traces = traces[offset:offset + batch_size]
                    batch_labels = labels[offset:offset + batch_size]
                
                # Adds a little noise to each event, as a regulatory measure
                if self.noiseAug != None:
                    batch_traces = self.preprocess_data(batch_traces)
                
                yield batch_traces, batch_labels
                
    def preprocess_data(self, traces):
        return self.noiseAug.batch_augment_noise(traces, 0, self.noiseAug.noise_std/10)

In [3]:
load_args = {
    'earth_explo_only' : True,
    'noise_earth_only' : False,
    'noise_not_noise' : False,
    'downsample' : True,
    'upsample' : True,
    'frac_diff' : 1,
    'seed' : 1,
    'subsample_size' : 0.1,
    'balance_non_train_set' : False,
    'use_true_test_set' : False,
    'even_balance' : False
}
loadData = LoadData(**load_args)
full_ds, train_ds, val_ds, test_ds = loadData.get_datasets()
noise_ds = loadData.noise_ds
handler = DataHandler(loadData)

Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 0 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 1 %Mapping redundancy: [>                                       ] 2 %Mapping redundancy: [>                                       ]

Mapping redundancy: [--------->                              ] 27 %Mapping redundancy: [--------->                              ] 27 %Mapping redundancy: [--------->                              ] 27 %Mapping redundancy: [---------->                             ] 27 %Mapping redundancy: [---------->                             ] 27 %Mapping redundancy: [---------->                             ] 27 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 28 %Mapping redundancy: [---------->                             ] 29 %Mapping redundancy: [---------->                             ] 29 %Mapping redundancy: [---------->                

Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [-------------------->                   ] 54 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 55 %Mapping redundancy: [--------------------->                  ] 56 %Mapping redundancy: [--------------------->     

Mapping redundancy: [-------------------------------->       ] 82 %Mapping redundancy: [-------------------------------->       ] 82 %Mapping redundancy: [-------------------------------->       ] 82 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 83 %Mapping redundancy: [-------------------------------->       ] 84 %Mapping redundancy: [-------------------------------->       ] 84 %Mapping redundancy: [-------------------------------->       ] 84 %Mapping redundancy: [-------------------------------->       ] 84 %Mapping redundancy: [---------------------------

In [4]:
gen = RamGenerator(loadData, handler)

In [8]:
use_time_augmentor = True
use_noise_augmentor = True
use_scaler = True
use_minmax = False
filter_name = None
band_min = 2.0
band_max = 4.0
highpass_freq = 0.1

is_lstm = False
load_test_set = False
num_channels = 3


ramLoader = RamLoader(loadData, handler, use_time_augmentor = use_time_augmentor, use_noise_augmentor = use_noise_augmentor, 
                      use_scaler = use_scaler, use_minmax = use_minmax, filter_name = filter_name, 
                      band_min = band_min, band_max = band_max, highpass_freq = highpass_freq, load_test_set = load_test_set)
train_trace, train_label, val_trace, val_label, timeAug, scaler, noiseAug = ramLoader.load_to_ram(is_lstm, num_channels)


Fit process completed after 147.5186369419098 seconds. Total datapoints fitted: 26231.
Average time per datapoint: 0.005623828178182677


Stage one loading training set, timeAug: [--------------------------------------->] 100 %

Stage one loading validation set, timeAug: [--------------------------------------->] 100 %

Fitting scaler progress: [------------------->] 100 %

Stage two loading training set, labels and sscaler: [--------------------------------------->] 100 %

Stage two loading validation set, labels and sscaler: [--------------------------------------->] 100 %

Completed loading to RAM [--------------------------------------> ] 99 %


In [9]:
batch_size = 10
train_gen = gen.data_generator(train_trace, train_label, batch_size)
val_gen = gen.data_generator(val_trace, val_label, batch_size)

In [11]:
next(train_gen)

(array([[[ -4.28926040e-01,  -2.83145426e-01,  -9.08361777e-02, ...,
            4.44642549e-02,   1.31019080e-01,   1.80893020e-01],
         [  1.85999456e+00,   1.23532910e+00,   4.47964167e-01, ...,
            4.53149899e-02,  -2.40458441e-02,  -4.80226970e-02],
         [ -1.37308638e+00,  -1.26878414e+00,  -9.23753790e-01, ...,
           -1.21583297e-02,  -9.63912400e-03,  -2.97755707e-02]],
 
        [[  1.66536648e-03,   8.39106777e-03,   9.05941231e-03, ...,
            2.50062933e-02,   2.30109424e-02,   1.84717072e-02],
         [  9.43343271e-02,   7.00060670e-02,   4.12360215e-02, ...,
           -2.51466545e-03,  -1.33251946e-03,   5.47695106e-03],
         [  6.13500274e-02,   3.69266026e-02,   1.18902186e-02, ...,
            1.43828884e-02,   8.54509724e-03,   3.11816817e-03]],
 
        [[  1.21663817e-02,   1.63910995e-02,   2.43824700e-02, ...,
            2.34849911e-02,   2.06963720e-02,   1.40579206e-02],
         [ -1.74574315e-02,  -3.49530133e-02,  -2.316776