In [None]:
import os
import sys
from datetime import date
from glob import glob
import shutil
import warnings
warnings.filterwarnings("ignore")

import numpy as np

sys.path.append('../..') # for running tests py-script
sys.path.append('../../..') # for running this notebook directly
from seismiqb import SeismicGeometry
from seismiqb.src.geometry import export

In [None]:
""" The following constants parametrize the behavior of the notebook:

DATESTAMP : str
    Execution date in "YYYY-MM-DD" format.
    Used for saving data.
OUTPUT_DIR : str
    Path to the directory for saving data.

You can manage parameters which relates to a cube generation:

CUBE_SHAPE : sequence of three integers
    Shape of a synthetic cube.
SEED: int or None
    Seed used for creation of random generator (check out `np.random.default_rng`).
    
    
...!!...
"""
# Workspace constants
DATESTAMP = date.today().strftime("%Y-%m-%d")
TESTS_ROOT_DIR = './'
LOGS_DIR = '../logs'

# Tests running parameters
USE_TMP_OUTPUT_DIR = False
REMOVE_OUTDATED_FILES = True

# Data creation parameters
CUBE_NAME = f'test_cube_{DATESTAMP}.sgy'
CUBE_SHAPE = (1000, 200, 400)
SEED = 42

# Data formats for which running tests
FORMATS = ['sgy', 'hdf5', 'qhdf5', 'blosc', 'qblosc']

# Prepare a workspace

Create directories for files and create a fake cube and save it.

**Storage structure:**
___


**geometry_test_files** (tests root directory)

&emsp;├── **test_array.npy**

&emsp;└── **test_cube.{DATAFORMAT}**


We save **geometry_timings.json** (file with loading data slides and crops timings) and executed notebooks in the **tests/logs** directory.

---
 * **{DATAFORMAT}** means each data format from **FORMATS** list

In [None]:
# (Re)create the test directory
OUTPUT_DIR = os.path.join(TESTS_ROOT_DIR, f"geometry_test_files")
LOGS_DIR = os.path.join(TESTS_ROOT_DIR, 'logs')

# Remove old logs
if not USE_TMP_OUTPUT_DIR and REMOVE_OUTDATED_FILES:
    paths_to_remove = glob(os.path.join(LOGS_DIR, 'geometry_timings_*.json'))

    for file_path in paths_to_remove:
        os.remove(file_path)

if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
CUBE_PATH = os.path.join(OUTPUT_DIR, CUBE_NAME)
TEST_PATH_DB = os.path.join(OUTPUT_DIR, 'geometry_test_db')

# Init logs
states = {f.upper(): 'In queue' for f in FORMATS}
states['preparation'] = 'In queue'

timings = {}

# Create a fake cube

In [None]:
rng = np.random.default_rng(SEED)
data_array = rng.normal(0, 1000, CUBE_SHAPE).astype(np.float32)

In [None]:
with open(os.path.join(OUTPUT_DIR, f'test_array_{DATESTAMP}.npy'), 'wb') as outfile:
    np.save(outfile, data_array)

In [None]:
%%time
export.make_segy_from_array(
    array=data_array, path_segy=CUBE_PATH, zip_segy=False,
    sample_rate=2., delay=50, pbar='t'
)

In [None]:
%%time
geometry_sgy = SeismicGeometry(
    path=CUBE_PATH, process=True, collect_stats=True, spatial=True, pbar='t'
)

# Check data loading

In [None]:
%%time
_ = SeismicGeometry(
    path=CUBE_PATH,
    headers=SeismicGeometry.HEADERS_POST_FULL,
    index_headers=SeismicGeometry.INDEX_CDP
)

In [None]:
%%time
_ = SeismicGeometry(
    path=CUBE_PATH,
    headers=SeismicGeometry.HEADERS_POST_FULL,
    index_headers=SeismicGeometry.INDEX_POST
)

# Conversion

In [None]:
for f in FORMATS:
    if f != 'sgy':
        _ = geometry_sgy.convert(format=f, quantize=False, store_meta=False, pbar='t')

In [None]:
states['preparation'] = 'OK'