# Iterative research

In this advanced notebook, we apply the [carcass interpolation model](./01_Demo_E.ipynb), as well as [horizon extension](./../Horizon_extension/Demo_E.ipynb) and enhancement ones in a quick succesion with the help of [research](./../Research_template.ipynb). It is adviced to check out our notebooks on these techniques prior to looking at this one. 

In [1]:
# Necessary imports
import os
import sys
import shutil
import random
import warnings

from glob import glob
from tqdm.auto import tqdm
from datetime import date
import matplotlib.pyplot as plt

import torch
import numpy as np

sys.path.append('../../..')
from seismiqb.batchflow import Pipeline, Dataset
from seismiqb.batchflow.models.torch import ResBlock
from seismiqb.batchflow.research import Research, Option, Domain, FileLogger
from seismiqb.batchflow.research import RP, RC, KV

from seismiqb import SeismicGeometry, Horizon, HorizonMetrics, plot_image

from seismiqb.src.controllers.torch_models import EncoderDecoder, ExtensionModel
from seismiqb import MODEL_CONFIG_DETECTION, MODEL_CONFIG_EXTENSION
from seismiqb import BaseController, Interpolator, Enhancer, Extender

warnings.filterwarnings("ignore")

In [2]:
def l1_metric_in_holes(horizon_with_holes, true_horizon, predicted_horizon):
    metric = np.where((true_horizon.full_matrix != true_horizon.FILL_VALUE) & \
                      (predicted_horizon.full_matrix != predicted_horizon.FILL_VALUE) & \
                      (horizon_with_holes.full_matrix == horizon_with_holes.FILL_VALUE),
                       np.abs(true_horizon.full_matrix - predicted_horizon.full_matrix), np.nan)
    return metric

In [3]:
# Global parameters
DEVICES = [1, 3, 4]         # physical device numbers
WORKERS = len(DEVICES)

RESEARCH_NAME = f'Research_horizons'

DUMP_NAME = date.today().strftime("%Y-%m-%d") + RESEARCH_NAME[8:]
N_REPS = 1

FREQUENCIES = (200, 200)
ITERATIONS = 1

In [4]:
BASE_CONFIG = {
    'savedir': None,
    'logger': None,
    'monitor': False,
    'bar': False,
    'plot': False,
}

# Extension

In [5]:
EXTENSION_CONFIG = {
    'savedir': None,
    'monitor': True,
    'bar': False,
    'plot': False,
    'sampler': {},
    'train': {
        'model_class': ExtensionModel,
        'model_config': {
            **MODEL_CONFIG_EXTENSION,
            'microbatch': 16,
        },
        
        'batch_size': 64,
        'crop_shape': (1, 128, 128),

        'adaptive_slices': False,
        'side_view': True,
        'width': 5,

        'rebatch_threshold': 0.7,
        'rescale_batch_size': True,
        
        'prefetch': 3,
        'n_iters': 300,
        'early_stopping': True,
    },
    'inference': {
        'batch_size': 128,
        'crop_shape': (1, 128, 128),
        'prefetch': 0,
        
        'width': 5,
        
        'n_steps': 50,
        'stride': 32,
    },
    'evaluate': {
        'n': 1,
        'supports': 100,
        'dump': True,
        'device': 'gpu',
    },
}

In [6]:
paths = [
    ('/data/seismic_data/seismic_interpretation/CUBE_01_ETP/amplitudes_01_ETP.hdf5',
     '/data/seismic_data/seismic_interpretation/CUBE_01_ETP/INPUTS/HORIZONS/RAW/etp*'),
    ('/data/seismic_data/seismic_interpretation/CUBE_18_UNKNOWN/amplitudes_18_UNKNOWN.hdf5',
     '/data/seismic_data/seismic_interpretation/CUBE_18_UNKNOWN/INPUTS/HORIZONS/RAW/*'),
]

In [7]:
unrolled = [
    (cube_path, horizon_path)
    for cube_path, horizon_dir in paths
    for horizon_path in glob(horizon_dir)
]

options = [
    KV((cube_path, horizon_path),
       '+'.join((cube_path.split('/')[-1].split('.')[0], horizon_path.split('/')[-1].split('.')[0])))
    for cube_path, horizon_path in unrolled
]
random.shuffle(options)

domain = (
    Option('cube_and_horizon', options) * 
    Option('seed', [0, 1, 2])
)

In [8]:
def perform_one_experiment(config):

    ###################################################################################
    ################################   PARSE CONFIGS   ################################
    ###################################################################################
    # Get all the params from configs
    config = config.config()
    cube_path, horizon_path = config['cube_and_horizon']
    seed = config['seed']
    n_rep = config['repetition']
    
    # Directory to save results to
    results_dir = os.path.join(RESEARCH_NAME, 'custom_results')
    
    short_name_cube = cube_path.split('/')[-1].split('.')[0]
    short_name_horizon = horizon_path.split('/')[-1].split('.')[0]
    alias = os.path.join(short_name_cube, short_name_horizon, f'{n_rep}')
    savedir = os.path.join(results_dir, alias)
    
    return_value = [[], [], [], []]   # coverages, window ratios, support corrs, phases
    

    ###################################################################################
    ####################################    BASE    ###################################
    ###################################################################################
    base_config = {
        **BASE_CONFIG,
        'savedir': savedir,
    }
    controller = BaseController(base_config)
    controller.log(f'Seed at this exp: {seed}')
    

    ###################################################################################
    #############################   HORIZON PREPARATION   #############################
    ###################################################################################
    interpolator = Interpolator()
    train_dataset = interpolator.make_dataset(cube_paths=cube_path, horizon_paths=horizon_path)
    horizon = train_dataset.labels[0][0].__copy__()
    filtering_matrix = horizon.make_random_holes_matrix(n=20, points_proportion=1e-5, points_shape=5, 
                                                        noise_level=25, seed=seed)
    horizon.filter(filtering_matrix=filtering_matrix)
    horizon_with_holes = horizon.__copy__()
    
    
    ###################################################################################
    ##################################   EXTENSION   ##################################
    ###################################################################################
    for i in range(ITERATIONS):
        torch.cuda.empty_cache()
        
        extension_config = {
            **EXTENSION_CONFIG,
            'savedir': f'{savedir}/{1+i}_extension',
            'logger': controller.filelogger,
        }
        extender = Extender(extension_config)
        
        model = extender.train(horizon=horizon)
        horizon = extender.inference(horizon, model)
        horizon = extender.postprocess(horizon)
        
        info = extender.evaluate(horizon, dataset=train_dataset)[0]
        
        return_value[0].append(horizon.coverage)
        return_value[1].append(info['window_rate'])
        return_value[2].append(info['corrs'])
        return_value[3].append(info['phase'])
        
        l1_metric_matrix = l1_metric_in_holes(horizon_with_holes, train_dataset.labels[0][0], horizon)
        l1 = np.nanmean(l1_metric_matrix)
        controller.log(f'l1 in holes at this exp: {l1}')
        plot_image(l1_metric_matrix, 
                   plot=True, show=extender.plot,
                   savepath=extender.make_savepath('my_custom_l1.png'))


    ###################################################################################
    ##############################   SAVE NEXT TO CUBE   ##############################
    ###################################################################################
    cube_dir = os.path.dirname(horizon.geometry.path)
    savepath = os.path.join(cube_dir, 'PREDICTIONS/HORIZONS', DUMP_NAME)
    os.makedirs(savepath, exist_ok=True)

    horizon.name = '+' + horizon.name.replace('enhanced_', '').replace('extended_', '')
    if N_REPS != 1:
        horizon.name += f'_{n_rep}'

    savepath = os.path.join(savepath, horizon.name)
    horizon.dump_float(savepath, add_height=False)
    interpolator.log(f'Dumped horizon to {savepath}')


    ###################################################################################
    ###################################   RETURNS   ###################################
    ###################################################################################
    
    msg = f'Finished experiment:\n{""*60}{horizon.name}\n'
    for name, value in zip(returned_values, return_value):
        msg += f'{""*60}{name} -> {value}\n'
    interpolator.log(msg)
    return return_value

In [9]:
!rm -rf {RESEARCH_NAME}

returned_values = [
    'coverages', 'window_rates', 'corrs', 'phases',
]

# Fake pipeline is needed to pass parameters around
fake_ppl = Pipeline().set_dataset(Dataset(10)).run_later(1, n_iters=1)

research = (
    Research()
    .add_logger(FileLogger)
    .init_domain(domain, n_reps=N_REPS)
    .add_pipeline(fake_ppl, run=True, name='fake')
    .add_callable(
        perform_one_experiment,                         # Callable to run
        returns=returned_values,                        # Names of returned results
        execute='#0',                                   # Execute immediately
        config=RC('fake'),                              # Pass config to the callable
        name='perform_one_experiment'                   # Name to be shown in the dataframe
    )
)

research.run(
    n_iters=1,
    name=RESEARCH_NAME,
    bar=True,
    workers=WORKERS,
    devices=DEVICES,
    timeout=10000,
)

Research Research_horizons is starting...


Domain updated: 0: 100%|██████████| 3/3.0 [07:51<00:00, 157.23s/it]


<seismiqb.batchflow.research.research.Research at 0x7f5174011390>

# Average horizons
If each carcass is interpolated multiple times, we can aggregate repetitions into an averaged surface:

In [10]:
if N_REPS > 1:

    for cube_path, _ in paths:
        # Parse paths and make directory for saving averaged horizons
        cube_dir = os.path.dirname(cube_path)
        horizon_dir = os.path.join(cube_dir, 'PREDICTIONS/HORIZONS', DUMP_NAME)
        savedir = horizon_dir + '_AVERAGED'
        os.makedirs(savedir, exist_ok=True)

        # Load all the predictions for a given cube
        geometry = SeismicGeometry(cube_path)
        horizons = [Horizon(path, geometry) for path in glob(horizon_dir + '/*')]
        names = set(['_'.join(horizon.name.split('_')[:-1]) for horizon in horizons])

        # Average
        for name in sorted(names):
            current_horizons = [horizon for horizon in horizons
                                if horizon.name.startswith(name)]

            averaged, dct = Horizon.average_horizons(current_horizons)
            plot_image(dct['std_matrix'], title=f'Averaged {name}')
            plt.show()

            savepath = os.path.join(savedir, name)
            averaged.dump_float(savepath)
            print('Dumped to', savepath)
        print()

# Select best horizon
If there are multiple repetitions, we can select the best one, based on metrics (support correlation)

In [11]:
if N_REPS > 1:

    for cube_path, _ in paths:
        # Parse paths and make directory for saving averaged horizons
        cube_dir = os.path.dirname(cube_path)
        horizon_dir = os.path.join(cube_dir, 'PREDICTIONS/HORIZONS', DUMP_NAME)
        averaged_dir = horizon_dir + '_AVERAGED'
        
        savedir = horizon_dir + '_BEST'
        os.makedirs(savedir, exist_ok=True)

        # Load all the predictions for a given cube, as well as averaged versions of them
        geometry = SeismicGeometry(cube_path)
        horizons = [Horizon(path, geometry) for path in glob(horizon_dir + '/*')]
        averaged = [Horizon(path, geometry) for path in glob(averaged_dir + '/*')]
        names = set(['_'.join(horizon.name.split('_')[:-1]) for horizon in horizons])

        # Select the best one
        for name in sorted(names):
            current_horizons = [horizon for horizon in horizons
                                if horizon.name.startswith(name)]
            current_horizons += [horizon for horizon in averaged
                                 if name in horizon.name]
            
            values = []
            for horizon in current_horizons:
                hm = HorizonMetrics(horizon)
                correlation_map = hm.evaluate('support_corrs', supports=100, device='gpu', plot=False, show=False)
                values.append(np.nanmean(correlation_map))
            idx = np.argmax(values)
            best = current_horizons[idx]

            savepath = os.path.join(savedir, name)
            best.dump_float(savepath)
            print(f'Dumped {idx} to {savepath}')
            print(f'MeanMetrics are {[round(item, 3) for item in values]}')
            print(f'Coverages   are {[round(horizon.coverage, 3) for horizon in current_horizons]}\n')
        print()