# Metrics for horizons

This notebooks computes and shows various metrics for each horizon on each cube.

Pseudocode of this notebook looks like:

```python
for each cube:
    mkdir
    infer geometry

    for each horizon:
        mkdir
        if SHOW, log to std.out

        for each metric:
            compute metric
            save png
            if SAVE_POINT_CLOUDS, save point cloud
            if SHOW, draw in std.out  
```

#### There are following parameters: 

* `paths` controls which cubes and horizons are used
*  `metrics` allows to choose computed metrics
* `script_dir` is a location to save metric maps
* `local_kwargs`, `support_kwargs` define exact parameters of metric computation
* `ADD_PREFIX` stores parameters of metric computation in the name of saved metric map
* `SAVE_POINT_CLOUD` stores obtained values next to the metric map
* `SHOW` controls whether images are displayed in the notebook

In [None]:
import os
import sys
from tqdm.auto import tqdm
from copy import copy
from glob import glob
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

sys.path.append('..')
from seismiqb import SeismicGeometry, Horizon, HorizonMetrics

In [None]:
# Cubes and horizon dirs to evaluate
paths = [
    ('/data/seismic/CUBE_1/E_anon.hdf5', '/data/seismic/CUBE_1/RAW/*'),
    ('/data/seismic/CUBE_2/M_cube.hdf5', '/data/seismic/CUBE_2/RAW/*'),
    ('/data/seismic/CUBE_3/P_cube.hdf5', '/data/seismic/CUBE_3/RAW/prb*'),
    ('/data/seismic/CUBE_4/R_cube.hdf5', '/data/seismic/CUBE_4/BEST_HORIZONS/*'),
    ('/data/seismic/CUBE_5/AMP.hdf5', '/data/seismic/CUBE_5/RAW/*'),
    ('/data/seismic/CUBE_7/S_cube.hdf5', '/data/seismic/CUBE_7/RAW/*'),

    ('/data/seismic/CUBE_12/A_cube.hdf5', '/data/seismic/CUBE_12/FULL_CONVERTED/*'),

    ('/data/seismic/CUBE_15/15_cube.hdf5', '/data/seismic/CUBE_15/RAW/*'),
    
    ('/data/seismic/CUBE_16/PSDM_CUB_ANON.hdf5', '/data/seismic/CUBE_16/PREDICTIONS/saved_22_07/*'),
    ('/data/seismic/CUBE_17/ACH.hdf5', '/data/seismic/CUBE_17/PREDICTIONS/saved_01_07/*'),
    ('/data/seismic/CUBE_18/Cube.hdf5', '/data/seismic/CUBE_18/RAW/*'),
    ('/data/seismic/CUBE_19/Cube.hdf5', '/data/seismic/CUBE_19/RAW/*'),
    ('/data/seismic/CUBE_20/Cube.hdf5', '/data/seismic/CUBE_20/RAW/*'),
    ('/data/seismic/CUBE_21/Cube.hdf5', '/data/seismic/CUBE_21/RAW/*'),
    ('/data/seismic/CUBE_22/Cube.hdf5', '/data/seismic/CUBE_22/RAW/*'),
]

paths = [(c, glob(h)) for c, h in paths]

# Metrics to compute
metrics = [
#     'instantaneous_phase',
#     'local_corrs',
    'support_corrs',
#     'local_crosscorrs',
#     'support_crosscorrs',
]

In [None]:
# Directory for all the files to be saved in
# For each cube, a separate dir is created inside
script_dir = 'horizon_metrics'

# Parameters for `local` metrics like `local_correlation`
local_kwargs = {
    'agg': None,
    'kernel_size': 7,
    'reduce_func': 'mean',
}

# Parameters for `support` metrics like `support_hellinger`
support_kwargs = {
    'agg': 'nanmean',
    'supports': 100,
}

# Parameters of drawing
plot_kwargs = {
    'figsize': (15, 15),
}


# Whether file names contain info about parameters of metric computation
ADD_PREFIX = False

# Whether to save point clouds of metrics right next to the images
SAVE_POINT_CLOUDS = False

# Whether to log progress/draw images to std.out
SHOW = True

In [None]:
def save_point_cloud(metric, save_path, geometry=None):
    idx_1, idx_2 = np.asarray(~np.isnan(metric)).nonzero()
    points = np.hstack([idx_1.reshape(-1, 1),
                        idx_2.reshape(-1, 1),
                        metric[idx_1, idx_2].reshape(-1, 1)])
    
    if geometry is not None:
        points[:, 0] += geometry.ilines_offset
        points[:, 1] += geometry.xlines_offset

    df = pd.DataFrame(points, columns=['iline', 'xline', 'metric_value'])
    df.sort_values(['iline', 'xline'], inplace=True)
    df.to_csv(save_path+'.txt', sep=' ', columns=['iline', 'xline', 'metric_value'],
              index=False, header=False)
    
def safe_mkdir(path):
    try: os.mkdir(path)
    except FileExistsError: pass

In [None]:
%%time
safe_mkdir(script_dir)

for path_cube, horizon_paths in tqdm(paths, ncols=1100):
    geometry = SeismicGeometry(path_cube)
    
    cube_dir = '/'.join((script_dir, geometry.long_name)) #.split('.')[0]
    safe_mkdir(cube_dir)
    
    for horizon_path in tqdm(horizon_paths, ncols=800):
        horizon = Horizon(horizon_path, geometry=geometry)
        horizon.filter()
        hm = HorizonMetrics(horizon)
        
        horizon_dir = '/'.join((cube_dir, horizon.name))
        safe_mkdir(horizon_dir)
        
        with open('/'.join((horizon_dir, 'info.txt')), 'w') as result_txt:

            if SHOW:
                print('▆'*130); print('▆'*130);
                print(horizon)

            for metric_name in tqdm(metrics, ncols=600):        
                save_path = '/'.join((horizon_dir, metric_name))
                kwargs = copy(local_kwargs) if metric_name.startswith('local') else copy(support_kwargs)
                kwargs = {} if metric_name.startswith('insta') else kwargs

                if ADD_PREFIX:
                    save_path += '@'
                    save_path += '|'.join([':'.join([key,str(value)])
                                           for key, value in kwargs.items()])

                metric = hm.evaluate(metric_name, **kwargs,
                                     plot=True, show_plot=SHOW, plot_kwargs=plot_kwargs,
                                     savepath=save_path + '.png')
                plt.show()

                if SAVE_POINT_CLOUDS:
                    save_point_cloud(metric, save_path, geometry=geometry)
                
                print(f'{metric_name} avg value: {""*20} {np.nanmean(metric):5.5}', file=result_txt)

            horizon.show(show=False, savepath='/'.join((horizon_dir, 'depth_map.png')))

        
            corrs = horizon.evaluate(printer=lambda msg: print(msg, file=result_txt),
                                     plot=False)