In [None]:
# This notebook runs preparation for SeismicGeometry tests and SeismicGeometry tests for different cube formats
import os
import sys
from datetime import date
import warnings
warnings.filterwarnings("ignore")

from glob import glob
import json
import pprint
from matplotlib import pyplot as plt
from py.path import local
from tqdm.notebook import tqdm

sys.path.append('../..') # for running tests py-script
sys.path.append('../../..') # for running this notebook directly

from seismiqb.tests.utils import remove_savings, prepare_local, execute_test_notebook

In [None]:
""" The behaviour of the test is parametrized by the following constants:

DATESTAMP : str
    Execution date in "YYYY-MM-DD" format.
    Used for saving notebooks executions and temporary files.
NOTEBOOKS_DIR : str
    Path to the directory with test .ipynb files.
USE_TMP_OUTPUT_DIR: bool
    Whether to use pytest tmpdir as a workspace.
    If True, then all files are saved in temporary directories.
    If False, then all files are saved in local directories.
OUTPUT_DIR : str
    Path to the directory for saving results and temporary files
    (executed notebooks, logs, data files like cubes, etc.).
LOGS_DIR : str
    Path to the directory for saving log files (executed notebooks, timings, messages).

You can manage parameters which relates to a cube generation:

CUBE_SHAPE : sequence of three integers
    Shape of a synthetic cube.
SEED: int or None
    Seed used for creation of random generator (check out `np.random.default_rng`).

And you can manage test running with parameters:

REMOVE_OUTDATED_FILES: bool
    Whether to remove outdated files which relate to previous executions.
REMOVE_EXTRA_FILES : bool
    Whether to remove extra files after execution.
    Extra files are temporary files and execution savings that relate to successful tests.
SHOW_TEST_ERROR_INFO : bool
    Whether to show error traceback in outputs.
    Notice that it only works with SHOW_MESSAGE = True.

Visualizations in saved execution notebooks are controlled with:

SCALE : int
    Figures scale.

Text outputs in executed notebooks controlled with:

VERBOSE : bool
    Whether to print information about successful tests during the execution of the cycles.
"""

# Workspace constants
DATESTAMP = date.today().strftime("%Y-%m-%d")
NOTEBOOKS_DIR = './'
USE_TMP_OUTPUT_DIR = False
OUTPUT_DIR = './geometry_test_files/'
LOGS_DIR = '../logs'

# Execution parameters
REMOVE_OUTDATED_FILES = True
REMOVE_EXTRA_FILES = True
SHOW_TEST_ERROR_INFO = True

# Data generation parameters
CUBE_SHAPE = (1000, 200, 400)
SEED = 42

# Visualization parameters
SCALE = 1

# Output parameters
VERBOSE = True

# Preparation

Create directories for files and create a fake cube and save it.

**Storage structure:**
___



**geometry_test_files** (tests root directory)

&emsp;├── **notebooks** (directory with notebooks executions)

&emsp;│&emsp;&emsp;├── **geometry_test_data_format_{DATAFORMAT}_out.ipynb**

&emsp;│&emsp;&emsp;└── **geometry_test_preparation_out.ipynb**

&emsp;├── **tmp** (directory with temporary files)

&emsp;│&emsp;&emsp;├── **test_array.npy**

&emsp;│&emsp;&emsp;├── **test_cube.{DATAFORMAT}**

&emsp;└──&nbsp; └── **timings_{DATAFORMAT}.json**

We save **geometry_message.txt** (file with output message) and **geometry_timings.json** (file with loading data slides and crops timings) in the **tests/logs** directory.

---
 * **{DATAFORMAT}** means each data format from **FORMATS** list

In [None]:
# Constants and parameters:
CUBE_NAME = f'test_cube_{DATESTAMP}.sgy'
FORMATS = ['sgy', 'hdf5', 'qhdf5', 'blosc', 'qblosc']

# Storage preparation
if not USE_TMP_OUTPUT_DIR:
    logs_paths = glob(os.path.join(LOGS_DIR, 'geometry_message_*.txt'))
    logs_paths.extend(glob(os.path.join(LOGS_DIR, 'geometry_timings_*.json')))

    prepare_local(output_dir=OUTPUT_DIR, remove_outdated_files=REMOVE_OUTDATED_FILES,
                  dirs_to_remove=[OUTPUT_DIR], paths_to_remove=logs_paths)

# Clear and (re)create local workspace
for dir_name in ['tmp/', 'notebooks/']:
    dir_path = os.path.join(OUTPUT_DIR, dir_name)

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

message = DATESTAMP + '\n\n'

In [None]:
# Run the notebook with preparation for tests.
# It contains: data creation, data loading checking and cube conversion into different formats
timings = {}

nb_kwargs={
    # Workspace constants
    'DATESTAMP': DATESTAMP,
    'OUTPUT_DIR': OUTPUT_DIR,

    # Data creation parameters
    'CUBE_NAME': CUBE_NAME,
    'CUBE_SHAPE': CUBE_SHAPE,
    'SEED': SEED
}

path_ipynb = os.path.join(NOTEBOOKS_DIR, 'geometry_test_preparation.ipynb')
out_path_ipynb = os.path.join(OUTPUT_DIR, 'notebooks/geometry_test_preparation_out.ipynb')

current_message, failed = execute_test_notebook(path_ipynb=path_ipynb,
                                                nb_kwargs=nb_kwargs,
                                                out_path_ipynb=out_path_ipynb,
                                                show_test_error_info=SHOW_TEST_ERROR_INFO,
                                                remove_extra_files=REMOVE_EXTRA_FILES)

is_all_OK = not failed

print(current_message)

message += current_message

# Data format tests

In [None]:
# Run the test notebook for the cube in each data format.
# It contains: checking data; attributes, slides, crops loading test, data loading timings and visualization tests.
if is_all_OK:

    for f in tqdm(FORMATS):
        path_ipynb = os.path.join(NOTEBOOKS_DIR, 'geometry_test_data_format.ipynb')
        out_path_ipynb = os.path.join(
            OUTPUT_DIR,
            f'notebooks/geometry_test_data_format_{f.upper()}_out_{DATESTAMP}.ipynb'
        )

        nb_kwargs={
            # Workspace constants
            'DATESTAMP': DATESTAMP,
            'OUTPUT_DIR': OUTPUT_DIR,

            # Tests running parameters
            'REMOVE_EXTRA_FILES': REMOVE_EXTRA_FILES,

            # Tests parameters
            'CUBE_NAME': CUBE_NAME.replace('sgy', f),
            'N_SLIDE': 1000,
            'N_CROP': 300,
            'SEED': SEED,

            # Visualization parameters
            'SCALE': SCALE,

            # Output parameters
            'VERBOSE': VERBOSE
        }

        current_message, failed = execute_test_notebook(path_ipynb=path_ipynb,
                                                        nb_kwargs=nb_kwargs,
                                                        out_path_ipynb=out_path_ipynb,
                                                        show_test_error_info=SHOW_TEST_ERROR_INFO,
                                                        remove_extra_files=REMOVE_EXTRA_FILES)


        if not failed:
            with open(os.path.join(OUTPUT_DIR, f'tmp/timings_{f}_{DATESTAMP}.json'), "r") as infile:
                timings.update(json.load(infile))
        else:
            is_all_OK = False

        print(current_message)
        message += current_message

In [None]:
# Check output
print(message)

pp = pprint.PrettyPrinter()
pp.pprint(timings)

In [None]:
# Visualize timings
def plot_ax(dct, unit, title, ax):
    """ Plot axis for a benchmark unit. """
    _ = ax.bar(dct.keys(), dct.values())
    ax.set_title(title, fontsize=20)
    ax.set_xlabel('Storage format', fontsize=16)
    ax.set_ylabel(unit, fontsize=16)
    return ax

fig, axs = plt.subplots(1, 2, figsize=(15, 6))
axs[0] = plot_ax(dct={key: value['slide']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Slide loading timings", ax=axs[0])
axs[1] = plot_ax(dct={key: value['crop']['wall'] for key, value in timings.items()},
                 unit="Time, ms", title="Crop loading timings", ax=axs[1])
plt.show()

# Exit

In [None]:
# Dump timings and message and remove extra files
if is_all_OK:
    timings['state'] = 'OK'

    if REMOVE_OUTDATED_FILES and not USE_TMP_OUTPUT_DIR:
        paths_to_remove = glob(os.path.join(LOGS_DIR, 'geometry_timings*.json'))
        remove_savings(paths_to_remove=paths_to_remove)

    if REMOVE_EXTRA_FILES and not USE_TMP_OUTPUT_DIR:
        remove_savings(dirs_to_remove=[OUTPUT_DIR])

else:
    timings['state'] = 'FAIL'

    if REMOVE_EXTRA_FILES:
        # Remove timings for each data format
        timings_files = glob(os.path.join(str(OUTPUT_DIR), 'tmp/timings*.json'))
        remove_savings(paths_to_remove=timings_files)


# Dump timings            
with open(os.path.join(LOGS_DIR, f'geometry_timings_{DATESTAMP}.json'), "w") as outfile:
    json.dump(timings, outfile)

# Message: remove old and save new
if REMOVE_OUTDATED_FILES and not USE_TMP_OUTPUT_DIR:
    msg_files = glob(os.path.join(LOGS_DIR, 'geometry_message*.txt'))
    remove_savings(paths_to_remove=msg_files)

with open(os.path.join(LOGS_DIR, f'geometry_message_{DATESTAMP}.txt'), "w") as outfile:
    outfile.write(message)