In [4]:
import h5py
import pandas as pd
import numpy as np
import os
import sys
base_dir = 'F:\Thesis_ssd\MasterThesis3.0'
os.chdir(base_dir)

from Classes.DataProcessing.LoadData import LoadData
from Classes.DataProcessing.BaselineHelperFunctions import BaselineHelperFunctions
from Classes.DataProcessing.DataHandler import DataHandler
from Classes.DataProcessing.DataGenerator import DataGenerator
from Classes.Modeling.Models import Models
from Classes.Modeling.RandomGridSearch import RandomGridSearch
from Classes.Modeling.CustomCallback import CustomCallback
from Classes.Scaling.ScalerFitter import ScalerFitter
from Classes.Scaling.MinMaxScalerFitter import MinMaxScalerFitter
from Classes.Scaling.StandardScalerFitter import StandardScalerFitter

import pprint 

In [2]:
def parse_csv(csv_file):
        col_names = pd.read_csv(csv_file, nrows=0).columns
        non_string_or_time = {
                      'receiver_latititude' : float,
                      'receiver_longitude' : float,
                      'p_weight' : float,
                      'p_travel_secs' : float,
                      'source_latitude' : float,
                      'source_longitude' : float,
                      'source_magnitude' : float,
                      'source_distance_deg' : float,
                      'source_distance_km' : float,
                      'back_azimuth_deg' : float,
                      'snr_db' : object,
                      'code_end_sample' : object}
        non_string_or_time.update({col: str for col in col_names if col not in non_string_or_time})
        df = pd.read_csv(csv_file, dtype = non_string_or_time)
        return df
    


In [5]:
source_path = "F:\Thesis_ssd"

filename = f"{source_path}\LargeDataset\merge.hdf5"
csv_file = f"{source_path}\LargeDataset\merge.csv"

info_file = parse_csv(csv_file)
data_file = h5py.File(filename, 'r',  rdcc_nbytes = 10**9).get('data')

loadData = LoadData(isBalanced = True)
shuffle = False
full_ds, train_ds, val_ds, test_ds = loadData.getDatasets(shuffle = shuffle)
handler = DataHandler()
pp = pprint.PrettyPrinter(indent=4)
helper = BaselineHelperFunctions()

In [7]:
se_trace, se_info = handler.path_to_trace(full_ds[0][0])

In [10]:
pp.pprint(se_info)

{   'az_to_arces': 162.34661979077435,
    'baz_to_arces': 348.7606209297071,
    'dist_to_arces': 861.0427349253382,
    'event_type': 'earthquake',
    'event_type_certainty': 'known',
    'magnitude_dist_ratio': 0.0035875106713116277,
    'magnitude_sqrtdist_ratio': 0.10527022591256095,
    'magnitudes': [   {   'mag': 3.089,
                          'magnitude_type': 'Mb',
                          'origin_id': 'smi:local/cdde3f4d-f31d-4b0c-863b-bdab6826dc8e',
                          'resource_id': 'smi:local/5926fd9a-02c3-4454-b56a-63593b742780'},
                      {   'mag': 2.58,
                          'magnitude_type': 'Ml',
                          'origin_id': 'smi:local/cdde3f4d-f31d-4b0c-863b-bdab6826dc8e',
                          'resource_id': 'smi:local/a68ee372-0546-4de0-aa7b-a823c630f506'}],
    'origins': [   {   'comments': [   {   'resource_id': 'smi:local/2c6226d7-e3a1-4697-83de-872fd95c9b22',
                                           'text': 'orid: 1

In [13]:
event_names = list(data_file.keys())

In [25]:
data_file.get(event_names[0])[:].shape

(6000, 3)

In [44]:
test_name = event_names[0]
test_trace = data_file.get(test_name)[:]

In [36]:
test_ds = h5py.File(f"{source_path}\h5py_test_dir\{test_name}.h5", 'w')

In [None]:
{   'event_type': 'earthquake',
    'event_type_certainty': 'known',
    'magnitude_dist_ratio': 0.0035875106713116277,
    'magnitude_sqrtdist_ratio': 0.10527022591256095,
    'magnitudes': [   {   'mag': 3.089,
                          'magnitude_type': 'Mb',
                          'origin_id': 'smi:local/cdde3f4d-f31d-4b0c-863b-bdab6826dc8e',
                          'resource_id': 'smi:local/5926fd9a-02c3-4454-b56a-63593b742780'},
                      {   'mag': 2.58,
                          'magnitude_type': 'Ml',
                          'origin_id': 'smi:local/cdde3f4d-f31d-4b0c-863b-bdab6826dc8e',
                          'resource_id': 'smi:local/a68ee372-0546-4de0-aa7b-a823c630f506'}],
    'origins': [   {   'comments': [   {   'resource_id': 'smi:local/2c6226d7-e3a1-4697-83de-872fd95c9b22',
                                           'text': 'orid: 12196'}],
                       'creation_info': {   'agency_id': 'NORSAR',
                                            'author': 'ARS:berit',
                                            'creation_time': '2009-05-11T08:37:39.000000Z'},
                       'depth': 0.0,
                       'depth_errors': {'uncertainty': -1.0},
                       'latitude': 77.013931,
                       'longitude': 18.827457,
                       'quality': {   'associated_phase_count': 7,
                                      'used_phase_count': 7},
                       'resource_id': 'smi:local/cdde3f4d-f31d-4b0c-863b-bdab6826dc8e',
                       'time': '2009-05-06T23:54:45.633960Z',
                       'time_errors': {'uncertainty': 0.96696748}}],
    'resource_id': 'smi:local/405afb35-e102-446e-93fa-f4cce3b717cf'}

In [37]:
test_info = test_ds.create_dataset("info")
test_traces = test_ds.create_dataset("traces")

In [38]:
list(test_ds.keys())

['info', 'traces']

In [45]:
test_traces = test_trace

In [46]:
test_ds.get("traces")

<HDF5 group "/traces" (0 members)>

In [None]:
test_ds.get()

In [47]:
info_file

Unnamed: 0,network_code,receiver_code,receiver_type,receiver_latitude,receiver_longitude,receiver_elevation_m,p_arrival_sample,p_status,p_weight,p_travel_sec,...,source_magnitude_author,source_mechanism_strike_dip_rake,source_distance_deg,source_distance_km,back_azimuth_deg,snr_db,coda_end_sample,trace_start_time,trace_category,trace_name
0,TA,109C,HH,32.8889,-117.1051,150.0,,,,,...,,,,,,,,2015-10-21 05:55:00,noise,109C.TA_201510210555_NO
1,TA,109C,HH,32.8889,-117.1051,150.0,,,,,...,,,,,,,,2015-11-06 14:50:00,noise,109C.TA_201511061450_NO
2,TA,109C,HH,32.8889,-117.1051,150.0,,,,,...,,,,,,,,2015-11-07 02:20:00,noise,109C.TA_201511070220_NO
3,TA,109C,HH,32.8889,-117.1051,150.0,,,,,...,,,,,,,,2015-11-14 05:15:00,noise,109C.TA_201511140515_NO
4,TA,109C,HH,32.8889,-117.1051,150.0,,,,,...,,,,,,,,2015-12-25 18:50:00,noise,109C.TA_201512251850_NO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1265652,WY,YHL,HH,44.8509,-111.1830,2691.0,400.0,manual,0.68,4.210000038146973,...,,,0.1253,13.93,124.9,[34.40000153 33. 36.70000076],[[1239.]],2017-06-21 09:40:43.810000,earthquake_local,YHL.WY_20170621094042_EV
1265653,WY,YHL,HH,44.8509,-111.1830,2691.0,600.0,manual,0.65,4.840000152587891,...,,,0.1553,17.26,121.7,[44.20000076 43.79999924 41.40000153],[[1551.]],2017-06-21 12:21:05.390000,earthquake_local,YHL.WY_20170621122104_EV
1265654,WY,YHL,HH,44.8509,-111.1830,2691.0,500.0,manual,0.65,4.820000171661377,...,,,0.1534,17.05,121.5,[47.5 39.90000153 42.70000076],[[1443.]],2017-06-21 12:23:40.200000,earthquake_local,YHL.WY_20170621122339_EV
1265655,WY,YHL,HH,44.8509,-111.1830,2691.0,500.0,manual,0.59,4.260000228881836,...,,,0.1267,14.08,126.6,[24.70000076 21.89999962 21. ],[[1322.]],2017-06-21 13:34:40.330000,earthquake_local,YHL.WY_20170621133439_EV
