In [2]:
import json
import h5py
import numpy as np
import pandas as pd
import h5py as h5
import json

from sklearn.model_selection import train_test_split

# Loading datasets:

### Dataset paths:

In [28]:

earthquake_samples = 'earthquake_sample_events.h5'
explosion_samples = 'explosion_sample_events.h5'

### Create mixed dataset

In [3]:
def create_mixed_dataset(earthquake_dataset, explosion_dataset):
    with h5py.File(earthquake_dataset, 'r') as fin:
        earthquake_info_dataset = fin.get('event_info')
        earthquake_trace_dataset = fin.get('traces')
        print(type(earthquake_info_dataset))
        print(type(earthquake_trace_dataset))

        # Convert to numpy arrays
        earthquake_info = np.array(earthquake_info_dataset)
        earthquake_trace = np.array(earthquake_trace_dataset)
    event_type = (earthquake_dataset.split('_'))[0]
    print(f'Loaded {earthquake_info.shape[0]} {event_type}s')
    
    with h5py.File(explosion_dataset, 'r') as fin:
        explosion_info_dataset = fin.get('event_info')
        explosion_trace_dataset = fin.get('traces')

        # Convert to numpy arrays
        explosion_info = np.array(explosion_info_dataset)
        explosion_trace = np.array(explosion_trace_dataset)
    event_type = (explosion_dataset.split('_'))[0]
    print(f'Loaded {explosion_info.shape[0]} {event_type}s')
    
    # Mixing:
    n = explosion_info.shape[0] + earthquake_info.shape[0]
    mixed_info = np.empty((n,), dtype = type(earthquake_info))
    k,d = earthquake_trace.shape[1:3]
    mixed_trace = np.empty((n,k,d), dtype = type(earthquake_trace))
    
    i = 0
    for j in range(len(explosion_info)):
        mixed_info[i] = explosion_info[j]
        mixed_trace[i] = explosion_trace[j]
        i += 1
    for k in range(len(earthquake_info)):
        mixed_info[i] = earthquake_info[k]
        mixed_trace[i] = earthquake_trace[k]
        i += 1
        
    randomize = np.arange(len(mixed_info))
    np.random.shuffle(randomize)
    mixed_info = mixed_info[randomize]
    mixed_trace = mixed_trace[randomize]
    return explosion_info, earthquake_info, explosion_trace, earthquake_trace, mixed_info, mixed_trace

In [20]:
 explosion_info, earthquake_info, explosion_trace, earthquake_trace, mixed_info, mixed_trace = create_mixed_dataset(earthquake_samples, explosion_samples)

<class 'h5py._hl.dataset.Dataset'>
<class 'h5py._hl.dataset.Dataset'>
Loaded 152 earthquakes
Loaded 198 explosions


## Splitting data

In [21]:
training_test_ratio = 0.8
seed = 69420

train_trace, test_trace, train_info, test_info = train_test_split(mixed_trace, mixed_info, train_size = 0.8, random_state= seed)

### Saving datasets

In [30]:
def save_data(dataset, file_name, save_path):
    np.save(save_path + '/' + file_name + '.npy', dataset)
    print(f"Saved to: {save_path}/{file_name}.npy")

In [23]:
root = 'train_test_mixed'
train_folder = f'{root}/train'
test_folder = f'{root}/test'
mixed_folder = f'{root}/mixed'

In [24]:
train_info_name = 'train_info'
train_trace_name = 'train_trace'
save_data(train_info, train_info_name, train_folder)
save_data(train_trace, train_trace_name, train_folder)

Saved to: train_test_mixed/train/train_info.npy
Saved to: train_test_mixed/train/train_trace.npy


In [25]:
test_info_name = 'test_info'
test_trace_name = 'test_trace'
save_data(test_info, test_info_name, test_folder)
save_data(test_trace, test_trace_name, test_folder)

Saved to: train_test_mixed/test/test_info.npy
Saved to: train_test_mixed/test/test_trace.npy


In [26]:
mixed_info_name = 'mixed_info'
mixed_trace_name = 'mixed_trace'
save_data(mixed_info, mixed_info_name, mixed_folder)
save_data(mixed_trace, mixed_trace_name, mixed_folder)

Saved to: train_test_mixed/mixed/mixed_info.npy
Saved to: train_test_mixed/mixed/mixed_trace.npy


### Loading datasets:

In [6]:
# Folders:
root = 'train_test_mixed'
train_folder = f'{root}/train/'
test_folder = f'{root}/test/'
mixed_folder = f'{root}/mixed/'

earthquake_samples = 'earthquake_sample_events.h5'
explosion_samples = 'explosion_sample_events.h5'

# Mixed, train, test sets:
mixed_info = np.load(mixed_folder + 'mixed_info.npy', allow_pickle=True)
mixed_trace = np.load(mixed_folder + 'mixed_trace.npy', allow_pickle=True)

train_info = np.load(train_folder + 'train_info.npy', allow_pickle=True)
train_trace = np.load(train_folder + 'train_trace.npy', allow_pickle=True)

test_info = np.load(test_folder + 'test_info.npy', allow_pickle=True)
test_trace = np.load(test_folder + 'test_trace.npy', allow_pickle=True)

#### Adding filters:

In [8]:
def filter_tracer(stream):
    # Remove 'trend', e.g. remove linear offset from 0:
    stream.detrend('demean')

    # Taper the traces, meaning the ends will go gradually to 0 -- this is required before filtering
    stream.taper(max_percentage=0.05, type='cosine')

    # Apply a bandpass filter, selecting frequencies from 3 to 5 Hz
    stream.filter('bandpass', freqmin=3.0, freqmax=5.0)
    return stream

In [10]:
def create_stream(tracer, info):
    info = json_loads(info)
    station = info['trace_stats']['station']
    channels = info['trace_stats']['channels']
    sampl_rate = info['trace_stats']['sampling_rate']

    trace_BHE = Trace(
        data=tracer[0],
        header={
            'station': station,
            'channel': channels[0],
            'sampling_rate': sampl_rate,
            'starttime': start_time
        }
    )
    trace_BHN = Trace(
        data=tracer[0],
        header={
            'station': station,
            'channel': channels[1],
            'sampling_rate': sampl_rate, 
            'starttime': start_time
        }
    )
    trace_BHZ = Trace(
        data=tracer[0],
        header={
            'station': station,
            'channel': channels[2],
            'sampling_rate': sampl_rate,
            'starttime': start_time
        }
    )
    # Assemple into a Stream
    stream = Stream([trace_BHE, trace_BHN, trace_BHZ])
    return stream

In [None]:
def convert_every_tracer(tracer_set, info_set):
    streams = np.empty((tracer_set.shape), dtype=type(tracer_set))
    for idx, tracer in enumerate(tracer_set):
        streams[idx] = filter_tracer(create_stream(tracer, info_set[idx]))
        