In [1]:
import numpy as np
import os
import sys
import pandas as pd

import sklearn as sk
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

import pylab as pl
import h5py

import tensorflow as tf
from tensorflow.keras import mixed_precision

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
os.environ['CUDA_VISIBLE_DEVICES']="0" 

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
mixed_precision.set_global_policy('mixed_float16')

from sklearn.metrics import confusion_matrix


classes_dir = '/media/tord/T7/Thesis_ssd/MasterThesis3.0'
os.chdir(classes_dir)
from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.HelperFunctions import HelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.NoiseAugmentor import NoiseAugmentor
from Classes.DataProcessing.RamLoader import RamLoader
from Classes.DataProcessing.RamGenerator import RamGenerator
from Classes.Modeling.DynamicModels import DynamicModels
from Classes.Modeling.StaticModels import StaticModels
from Classes.Modeling.InceptionTimeModel import InceptionTimeModel
from Classes.Modeling.NarrowSearchRam import NarrowSearchRam
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Modeling.ResultFitter import ResultFitter
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter
import json
#from Classes import Tf_shutup
#Tf_shutup.Tf_shutup()

from livelossplot import PlotLossesKeras



from matplotlib.colors import ListedColormap

plt.rcParams["figure.figsize"]= (15,15)
helper = HelperFunctions()

import sys
ISCOLAB = 'google.colab' in sys.modules

import random
import pprint


1 Physical GPUs, 1 Logical GPUs
INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: GeForce RTX 3090, compute capability 8.6


In [2]:
load_args = {
    'earth_explo_only' : False,
    'noise_earth_only' : False,
    'noise_not_noise' : True,
    'downsample' : True,
    'upsample' : True,
    'frac_diff' : 0.3,
    'seed' : 2,
    'subsample_size' : 0.3,
    'balance_non_train_set' : True,
    'use_true_test_set' : False
}
loadData = LoadData(**load_args)
full_ds, train_ds, val_ds, test_ds = loadData.get_datasets()
noise_ds = loadData.noise_ds
handler = DataHandler(loadData)

Mapping redundancy: [--------------------------------------->] 100 %

In [3]:
print(len(train_ds), len(val_ds), len(test_ds))
classes, counts = handler.get_class_distribution_from_ds(train_ds)
print("Nr noise samples " + str(len(loadData.noise_ds)))

25390 5078 3386
Total: 25390, earthquake: 8446, explosion: 8337, noise: 8607
Nr noise samples 8607


In [4]:
print(full_ds)

[[ '/media/tord/T7/Thesis_ssd/norsar_data_nov/noise/2005-02-14T17.37.06.000000Z.h5'
  'noise' 0]
 [ '/media/tord/T7/Thesis_ssd/norsar_data_nov/noise/2017-02-13T13.55.42.000000Z.h5'
  'noise' 0]
 [ '/media/tord/T7/Thesis_ssd/norsar_data_nov/earthquakes/2016-02-11T23.43.25.442000Z.h5'
  'earthquake' 0]
 ..., 
 [ '/media/tord/T7/Thesis_ssd/norsar_data_nov/explosions/2016-06-16T12.22.17.857000Z.h5'
  'explosion' 0]
 [ '/media/tord/T7/Thesis_ssd/norsar_data_nov/explosions/2011-11-07T00.23.15.537000Z.h5'
  'explosion' 0]
 [ '/media/tord/T7/Thesis_ssd/norsar_data_nov/explosions/1996-09-08T21.00.07.474000Z.h5'
  'explosion' 0]]


In [5]:
import numpy as np
from tensorflow.keras import utils
import math
import random
import datetime
from dateutil import parser
import time
import h5py
import json


class TimeAugmentor():
    
    def __init__(self, DataHandler, ds, seed = None):
        self.handler = DataHandler
        self.ds = ds
        self.fitted_dict = {}
        self.seed = seed
    
    def fit(self):
        time_start = time.time()
        path_red_ds = self.ds[:,[0,2]]
        len_ds = len(path_red_ds)
        _,_,pre_length = self.handler.get_trace_shape_no_cast(self.ds, False)
        post_length = 6000
        np.random.seed(0)
        print("Issues will occur if upsampling of explosions or noise is implemented")
        explo_ds = self.ds[self.ds[:,1] == "explosion"]
        earth_ds = self.ds[self.ds[:,1] == "earthquake"]
        noise_ds = self.ds[self.ds[:,1] == "noise"]
        max_redundancy = max(earth_ds[:,2])
        #fitted_dict = dict.fromkeys(set(self.ds[:,0]))
        fitted_dict = {}
        if len(explo_ds) > 0:
            i = 0
            explo_len = len(set(explo_ds[:,0]))
            for path in set(explo_ds[:,0]):
                random_start_index = np.random.randint(0,4500, 1)
                initial_index, info = self.find_initial_event_index(path)
                fitted_dict[path] = {'initial_index': initial_index,
                                     'random_start_index' : random_start_index}
                self.progress_bar(i + 1, explo_len)
                i += 1
        print("Finished explosions")
        if len(noise_ds) > 0:
            i = 0
            noise_len = len(set(noise_ds[:,0]))
            for path in set(noise_ds[:,0]):
                random_start_index = np.random.randint(0,4500, 1)
                initial_index, info = self.find_initial_event_index(path)
                fitted_dict[path] = {'initial_index': initial_index,
                                     'random_start_index' : random_start_index}
                self.progress_bar(i + 1, noise_len)
                i += 1
        print("Finished noise")
        if len(earth_ds) > 0:
            i = 0
            earth_len = len(set(earth_ds[:,0]))
            for path in set(earth_ds[:,0]):
                random_start_index = np.random.randint(0,4500, max_redundancy)
                initial_index, info = self.find_initial_event_index(path)
                fitted_dict[path] = {'initial_index': initial_index,
                                     'random_start_index' : random_start_index}
                self.progress_bar(i + 1, earth_len)
                i += 1

        print("Finished earthquakes")
        self.fitted_dict = fitted_dict
        time_end = time.time()
        print(f"Fit process completed after {time_end - time_start} seconds. Total datapoints fitted: {len(path_red_ds)}.")
        print(f"Average time per datapoint: {(time_end - time_start) / len(path_red_ds)}")
           
    def np_generator(self, path_red_ds):
        for row in path_red_ds:
            yield row
            
            
    def augment_event(self, path, redundancy_index):
        trace, info = self.handler.path_to_trace(path)
        fit = self.fitted_dict[path]
        augmented_trace = np.empty((3, 6000))
        
        random_start_index = fit['random_start_index'][int(redundancy_index)]
        initial_index = fit['initial_index']
        interesting_part_length = trace.shape[1] - initial_index
        missing_length = (augmented_trace.shape[1] - random_start_index) - interesting_part_length
        
        for i in range(augmented_trace.shape[0]):
            augmented_trace[i] = self.fill_start(trace, augmented_trace, random_start_index, initial_index, i)
            augmented_trace[i] = self.fill_interesting_part(trace, augmented_trace, random_start_index, interesting_part_length, initial_index, i)
            if missing_length > 0:
                # missing_length was intereting_part_length. Why? Error?
                augmented_trace[i] = self.fill_lacking_ends(trace, augmented_trace, random_start_index, missing_length, i)
        return augmented_trace
    
    def fill_start(self, trace, augmented_trace, random_start_index, initial_index, i_channel):
        if random_start_index < initial_index:
            augmented_trace[i_channel][0:random_start_index] = trace[i_channel][0:random_start_index]
            return augmented_trace[i_channel]
        else:
            augmented_trace[i_channel][0:initial_index] = trace[i_channel][0:initial_index]
            trace_interval_start = trace.shape[1] - (random_start_index - initial_index)
            trace_interval_end = trace.shape[1]
            augmented_trace[i_channel][initial_index:random_start_index] = trace[i_channel][trace_interval_start:trace_interval_end]
            return augmented_trace[i_channel]

    def fill_interesting_part(self, trace, augmented_trace, random_start_index, interesting_length, initial_index, i_channel):
        aug_interval_end = min(random_start_index + interesting_length, augmented_trace.shape[1])
        trace_interval_end = min(initial_index + interesting_length, initial_index + (augmented_trace.shape[1] - random_start_index))
        augmented_trace[i_channel][random_start_index:aug_interval_end] = trace[i_channel][initial_index:trace_interval_end]
        return augmented_trace[i_channel]
        
    def fill_lacking_ends(self, trace, augmented_trace, random_start_index, missing_length, i_channel):
        fill_interval_start = random_start_index
        fill_interval_end = random_start_index + missing_length
        augmented_trace[i_channel][augmented_trace.shape[1] - missing_length:augmented_trace.shape[1]] = trace[i_channel][fill_interval_start:fill_interval_end]
        return augmented_trace[i_channel]
    

    def find_initial_event_index(self, path):
        info = self.path_to_info(path)
        start_time = parser.isoparse(info['trace_stats']['starttime']).replace(tzinfo=None)
        if info['analyst_pick_time'] != None:
            event_time = parser.isoparse(info['analyst_pick_time']).replace(tzinfo=None)
            uncertainty = 0
        else:
            event_time = parser.isoparse(info['est_arrivaltime_arces']).replace(tzinfo=None)
            uncertainty = 0
            if 'origins' in info:
                if 'time_errors' in info['origins'][0]:
                    uncertainty = min(float(info['origins'][0]['time_errors']['uncertainty']), 15)
        sampling_rate = info['trace_stats']['sampling_rate']
        relative_seconds = (event_time - start_time).total_seconds()
        # Problem with uncertainty: Some events have very large uncertainty.
        # This can be so high that the interesting event could have potentially occured prior to the recording.          
        initial_index = max(math.floor((relative_seconds-uncertainty)*sampling_rate),0)
        return initial_index, info
    
    def path_to_info(self, path):
        with h5py.File(path, 'r') as dp:
            info = np.array(dp.get('event_info'))
            # This is a mess, but for some reason it works with this shitty code.
            info = str(info)
            info = info[2:len(info)-1]
            info = json.loads(info)
        return info
        
    
    def progress_bar(self, current, total, barLength = 20):
        percent = float(current) * 100 / total
        arrow   = '-' * int(percent/100 * barLength - 1) + '>'
        spaces  = ' ' * (barLength - len(arrow))
        print('Fitting time augmentor: [%s%s] %d %%' % (arrow, spaces, percent), end='\r')     
        

In [6]:
full_and_noise_ds = np.concatenate((full_ds, noise_ds))
timeAug = TimeAugmentor(handler, full_and_noise_ds)


In [7]:
timeAug.fit()

Issues will occur if upsampling of explosions or noise is implemented
Finished explosionstor: [------------------->] 100 %
Finished noiseugmentor: [------------------->] 100 %
Finished earthquakesor: [------------------->] 100 %
Fit process completed after 13.833172798156738 seconds. Total datapoints fitted: 42461.
Average time per datapoint: 0.000325785374771125


In [9]:
timeAug.augment_event(full_ds[0][0], full_ds[0][2])

array([[ 49.9425354 ,  36.26490021,  21.7385788 , ...,  30.713974  ,
         37.65123367,  40.28333664],
       [  1.04194868,  -8.6063633 , -28.35959053, ...,  68.97608185,
         54.63550949,  47.34148788],
       [ 58.01567459,  35.01927185,  30.62397385, ...,  24.47158623,
         19.21693802,  15.27928448]])

In [10]:
timeAug.fitted_dict[full_ds[0][0]]

{'initial_index': 2400, 'random_start_index': array([1980])}

In [None]:
i = 0 
for i in range(len(full_and_noise_ds)):
    next(gen)
    i += 1
print(i)
print(len(full_and_noise_ds))

In [None]:
path_set = set(full_ds[:,0])

In [None]:
path_dict = dict.fromkeys(path_set)

In [None]:
explo_ds = full_ds[full_ds[:,1] == "explosion"]
earth_ds = full_ds[full_ds[:,1] == "earthquake"]
noise_ds = full_ds[full_ds[:,1] == "noise"]

In [None]:
import time
def fit(self):
    time_start = time.time()
    path_red_ds = self.ds[:,[0,2]]
    len_ds = len(path_red_ds)
    _,_,pre_length = self.handler.get_trace_shape_no_cast(ds, False)
    post_length = 6000
    np.random.seed(0)
    print("Issues will occur if upsampling of explosions or noise is implemented")
    explo_ds = self.ds[self.ds[:,1] == "explosion"]
    earth_ds = self.ds[self.ds[:,1] == "earthquake"]
    noise_ds = self.ds[self.ds[:,1] == "noise"]
    max_redundancy = max(earth_ds[:,2])
    fitted_dict = dict.fromkeys(set(self.ds[:,0]))
    if len(explo_ds) > 0:
        i = 0
        explo_len = len(set(earth_ds[:,0]))
        for path in set(explo_ds[:,0]):
            random_start_index = np.random.randint(0,4500, 1)
            initial_index, info = self.find_initial_event_index(path)
            fitted_dict[path] = {'initial_index': initial_index,
                                 'random_start_index' : random_start_index}
            self.progress_bar(i + 1, explo_len)
            i += 1
    print("Finished explosions")
    if len(noise_ds) > 0:
        i = 0
        noise_len = len(set(noise_ds[:,0]))
        for path in set(noise_ds[:,0]):
            random_start_index = np.random.randint(0,4500, 1)
            initial_index, info = self.find_initial_event_index(path)
            fitted_dict[path] = {'initial_index': initial_index,
                                 'random_start_index' : random_start_index}
            self.progress_bar(i + 1, noise_len)
            i += 1
    print("Finished noise")
    if len(earth_ds) > 0:
        i = 0
        earth_len = len(set(earth_ds[:,0]))
        for path in set(earth_ds[:,0]):
            random_start_index = np.random.randint(0,4500, max_redundancy)
            initial_index, info = self.find_initial_event_index(path)
            fitted_dict[path] = {'initial_index': initial_index,
                                 'random_start_index' : random_start_index}
            self.progress_bar(i + 1, earth_len)
            i += 1
            
    print("Finished earthquakes")
    self.fitted_dict = fitted_dict
    time_end = time.time()
    print(f"Fit process completed after {time_end - time_start} seconds. Total datapoints fitted: {len(path_red_ds)}.")
    print(f"Average time per datapoint: {(time_end - time_start) / len(path_red_ds)}")
        
    

In [None]:
bar = fit(full_ds)

In [None]:
def sanity_check(ds_fitted_to):
    paths = ds_fitted_to[:,0]
    reds = ds_fitted_to[:,2]
    path_max = np.empty((len(set(paths)),2), dtype=object)
    for idx, path in enumerate(set(paths)):
        path_max[idx][0] = path
        path_max[idx][1] = max(np.array(ds_fitted_to[ds_fitted_to[:,0] == path][:,2], dtype=int))
    for i, p_m in enumerate(path_max):
        path = p_m[0]
        m = int(p_m[1])
        assert len(timeAug.fitted_dict[path]['random_start_index']) == m + 1, f"{timeAug.fitted_dict[path]['random_start_index']} should have length {m +1}, where entry is {ds_fitted_to[i]}"

sanity_check(full_and_noise_ds)

In [None]:
pprint.pprint(timeAug.fitted_dict)

In [None]:
import time
start = time.time()
i = 0
for path, label, redundency_index in full_and_noise_ds:
    _ = timeAug.augment_event(path, redundency_index)
    i += 1
    
end = time.time()
print(end - start)


In [None]:
print(full_and_noise_ds[np.where(full_and_noise_ds[:,2] == max(full_and_noise_ds[:,2]))])

In [None]:
timeAug.fitted_dict[full_and_noise_ds[np.where(full_and_noise_ds[:,2] == max(full_and_noise_ds[:,2]))][0][0]]

In [None]:
def find_initial_event_index(path):
    trace, info = handler.path_to_trace(path)
    start_time = parser.isoparse(info['trace_stats']['starttime']).replace(tzinfo=None)
    if info['analyst_pick_time'] != None:
        event_time = parser.isoparse(info['analyst_pick_time']).replace(tzinfo=None)
    else:
        event_time = parser.isoparse(info['est_arrivaltime_arces']).replace(tzinfo=None)
    sampling_rate = info['trace_stats']['sampling_rate']
    relative_seconds = (event_time - start_time).total_seconds()
    # Problem with uncertainty: Some events have very large uncertainty.
    # This can be so high that the interesting event could have potentially occured prior to the recording.
    if 'time_errors' in info['origins'][0]:
        uncertainty = float(info['origins'][0]['time_errors']['uncertainty'])
    else:
        uncertainty = 0
    initial_index = max(math.floor((relative_seconds-uncertainty)*sampling_rate),0)

    return initial_index, trace, info

def shift_event(path):
    initial_index, trace, info = find_initial_event_index(path)
    pre_length = trace.shape[1]
    random_start_index = np.random.randint(0, 5000)
    augmented_trace = np.empty((3, 6000))
    interesting_part_length = pre_length - initial_index
    # Handling what happens when the duration of the interesting event is shorter than what is needed to fill the array:
    ideal_length = augmented_trace.shape[1] - random_start_index
    missing_length = ideal_length - interesting_part_length
    if missing_length > 0:
        filler_index_start = np.random.randint(0, (initial_index - missing_length))
        filler_index_end = filler_index_start + missing_length
        # First index of what requires more filling
        required_fill_index_start = augmented_trace.shape[1] - missing_length
    
    for i in range(augmented_trace.shape[0]):
        augmented_trace[i][0:random_start_index] = trace[i][0:random_start_index]
        augmented_trace[i][random_start_index:random_start_index + interesting_part_length] = trace[i][initial_index: initial_index + (augmented_trace.shape[1] - random_start_index)]
        if missing_length > 0:
            augmented_trace[i][required_fill_index_start:augmented_trace.shape[1]] = trace[i][filler_index_start:filler_index_end]

    return augmented_trace, info

    
    

In [None]:
import time
start = time.time()
for i in range(1000):
    train_aug.augment_event(train_aug.ds[i][0])
end = time.time()
print(end-start)

In [None]:
i = 2
_, info = handler.path_to_trace(train_ds[i][0])
trace = timeAug.augment_event(train_ds[i][0])
trace = scaler.transform(trace)

In [None]:
plot_event(trace, info)

In [None]:
pprint.pprint(no_info)

In [None]:
shift_trace, shift_info = shift_event(broken_eq)
print(shift_trace.shape)
plot_event(shift_trace, shift_info)

In [None]:
initial_index, trace, info = find_initial_event_index(broken_eq)
print(initial_index)
pprint.pprint(info)
start_time = parser.isoparse(info['trace_stats']['starttime']).replace(tzinfo=None)
event_time = parser.isoparse(info['est_arrivaltime_arces']).replace(tzinfo=None)

relative_event_time = event_time - start_time
relative_seconds = relative_event_time.total_seconds()
initial_index = math.floor(relative_seconds*sampling_rate)
print(initial_index)

In [None]:
trace, info = handler.path_to_trace(some_eq)

In [None]:
def plot_event(trace, info):
    start_time = info['origins'][0]['time']
    channels = info['trace_stats']['channels']
    sampl_rate = info['trace_stats']['sampling_rate']
    station = info['trace_stats']['station']
    
    trace_BHE = Trace(
    data=trace[0],
    header={
        'station': station,
        'channel': channels[0],
        'sampling_rate': sampl_rate,
        'starttime': start_time})
    trace_BHN = Trace(
        data=trace[1],
        header={
            'station': station,
            'channel': channels[1],
            'sampling_rate': sampl_rate, 
            'starttime': start_time})
    trace_BHZ = Trace(
        data=trace[2],
        header={
            'station': station,
            'channel': channels[2],
            'sampling_rate': sampl_rate,
            'starttime': start_time})
    stream = Stream([trace_BHE, trace_BHN, trace_BHZ])
    stream.plot()

In [None]:
eq_trace, eq_info = handler.path_to_trace(broken_eq)
plot_event(eq_trace, eq_info)

In [None]:
from dateutil import parser
#pprint.pprint(eq_info)

start_time = parser.isoparse(eq_info['origins'][0]['time']).replace(tzinfo=None)
print(start_time)
event_time = parser.isoparse(eq_info['est_arrivaltime_arces'])
print(event_time)
sampling_rate = eq_info['trace_stats']['sampling_rate']

relative_event_time = event_time - start_time
relative_seconds = relative_event_time.total_seconds()
initial_index = relative_seconds*sampling_rate
print(initial_index)

 - Which time should i look at?
 - The time uncertainty is in seconds, right?
 - How long does earthquakes recorded by ARCES normally last?
 - Some uncertainties are so high that the event could have occured prior to recording. For events such as this I assume that the event starts at index = 0.
 - Some events lack uncertainty measure.
 - What does est_arrivaltime_arces actually mean? Assumed it meant event_start_time but then I saw noise has the same statistic.
 - Can I use noise augmentation when training a model which does not classify noise?
 - How do I draw vertical lines in Obspy?
 
For Pekka:
  - Can I use noise augmentation when training a model which does not classify noise?
  - Is noise augmentation even necessary anymore now that the time augmentation is here? 
  - Should I keep the test/validation set clear of events that are in the train set, even though their augmentation makes them different?
  - Do I need to make my upsampling/downsampling more sophisticated in order to balance the MSRD balanced?
  - Is using the large dataset worth it anymore?
      - Downsides:
          - Time consuming
          - Samples are very different from ACRES
              - No beamforming
              - Repeating channels
              - No explosion samples!
              - Destroys Noise Augmentor
          - Limited ROI
      - Upsides:
          - More real data
          - Metrics more reflective of reality, not impacted by the time augmentation process.
  - Do I need to write about why this work is important?
      - If so, why do you think it is important?
  - Making sure: Can report performances of the models prior to this new dataset and compare them to performance after the time augmentation implementation.
  - Do you have any suggestions of areas of improvement beyond what is already implemented?

In [None]:
"""
     Likely depreciated due to new fitting method.
         
    def find_initial_event_index(self, path):
        trace, info = self.handler.path_to_trace(path)
        start_time = parser.isoparse(info['trace_stats']['starttime']).replace(tzinfo=None)
        if info['analyst_pick_time'] != None:
            event_time = parser.isoparse(info['analyst_pick_time']).replace(tzinfo=None)
        else:
            event_time = parser.isoparse(info['est_arrivaltime_arces']).replace(tzinfo=None)
        sampling_rate = info['trace_stats']['sampling_rate']
        relative_seconds = (event_time - start_time).total_seconds()
        # Problem with uncertainty: Some events have very large uncertainty.
        # This can be so high that the interesting event could have potentially occured prior to the recording.
        if 'time_errors' in info['origins'][0]:
            uncertainty = float(info['origins'][0]['time_errors']['uncertainty'])
        else:
            uncertainty = 0
        initial_index = max(math.floor((relative_seconds-uncertainty)*sampling_rate),0)

        return initial_index, trace, info

    def shift_event(self, path):
        initial_index, trace, info = self.find_initial_event_index(path)
        pre_length = trace.shape[1]
        random_start_index = np.random.randint(0, 5000)
        augmented_trace = np.empty((3, 6000))
        interesting_part_length = pre_length - initial_index
        # Handling what happens when the duration of the interesting event is shorter than what is needed to fill the array:
        ideal_length = augmented_trace.shape[1] - random_start_index
        missing_length = ideal_length - interesting_part_length
        if missing_length > 0:
            filler_index_start = np.random.randint(0, (initial_index - missing_length))
            filler_index_end = filler_index_start + missing_length
            # First index of what requires more filling
            required_fill_index_start = augmented_trace.shape[1] - missing_length

        for i in range(augmented_trace.shape[0]):
            augmented_trace[i][0:random_start_index] = trace[i][0:random_start_index]
            augmented_trace[i][random_start_index:random_start_index + interesting_part_length] = trace[i][initial_index: initial_index + (augmented_trace.shape[1] - random_start_index)]
            if missing_length > 0:
                augmented_trace[i][required_fill_index_start:augmented_trace.shape[1]] = trace[i][filler_index_start:filler_index_end]
        return augmented_trace
"""

In [None]:
a = [1,1,1,1,1,1]
b = [2,2,2]
a[0:len(b)] = b
a

In [None]:
a = [['a',3],['b',6],['b',1],['b',5], ['b', 0],['a',1]]



def fit(a):
    temp = {}
    for path, red in a:
        if path in temp:
            if red+1 < len(temp[path]):
                continue
            else:
                temp[path] = np.random.randint(0,5, red+1)
        else:
            temp[path] = np.random.randint(0,5,red+1)
    return temp
fit(a)