# Fault detection (Research)

Here we will demonstrate the research process of fault detection models. We will train multiple 2D and 3D models on all posible combinations of seismic cubes.

In [None]:
import sys
import os
from copy import copy
import itertools

import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook


sys.path.append('../../..')

from seismiqb import *

from seismiqb.batchflow import FilesIndex, Pipeline
from seismiqb.batchflow.research import Option, Research, RP, RC, RD, REP, KV, RI
from seismiqb.batchflow.models.torch import EncoderDecoder, ResBlock
from seismiqb.batchflow import D, B, V, P, R, L, W, C

Here we describe model configuration. In general it is UNet-like architecture where we fix some parameters but all of them are also is a subject of research.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Dice(nn.Module):
    def forward(self, input, target):
        input = torch.sigmoid(input)
        dice_coeff = 2. * (input * target).sum() / (input.sum() + target.sum() + 1e-7)
        return 1 - dice_coeff

ITERS = 2000
BATCH_SIZE = 96
FILTERS = [64, 96, 128, 192, 256]

MODEL_CONFIG = {
    # Model layout
    'initial_block': {
        'base_block': ResBlock,
        'filters': FILTERS[0] // 2,
        'kernel_size': 5,
        'downsample': False,
        'attention': 'scse'
    },

    'body/encoder': {
        'num_stages': 4,
        'order': 'sbd',
        'blocks': {
            'base': ResBlock,
            'n_reps': 1,
            'filters': FILTERS[:-1],
            'attention': 'scse',
        },
    },
    'body/embedding': {
        'base': ResBlock,
        'n_reps': 1,
        'filters': FILTERS[-1],
        'attention': 'scse',
    },
    'body/decoder': {
        'num_stages': 4,
        'upsample': {
            'layout': 'tna',
            'kernel_size': 2,
        },
        'blocks': {
            'base': ResBlock,
            'filters': FILTERS[-2::-1],
            'attention': 'scse',
        },
    },
    'head': {
        'base_block': ResBlock,
        'filters': [16, 8],
        'attention': 'scse'
    },
    'output': torch.sigmoid,
    # Train configuration
    'loss': Dice(),
    'optimizer': {'name': 'Adam', 'lr': 0.005,},
    "decay": {'name': 'exp', 'gamma': 0.1, 'frequency': 150},
    'microbatch': 8,
    'common/activation': 'relu6',
}

The whole training process is described by the following pipeline. We will vary crop shape (2D (1, 128, 256) and 3D (32, 128, 256)) so define it as a `C('crop')` to use with `Research` from `batchflow`.

In [None]:
PATH = '/data/seismic_data/seismic_interpretation/CUBE_16_PSDM/amplitudes_16_PSDM.hdf5'
LABELS_PATH = '/data/seismic_data/seismic_interpretation/CUBE_16_PSDM/INPUTS/FAULTS/HDF5/faults.hdf5'
dataset = SeismicCubeset(FilesIndex(path=PATH, no_ext=True))

dataset.load(label_dir={
    'amplitudes_01_ETP': '/INPUTS/FAULTS/NPY/*',
    'amplitudes_16_PSDM': '/INPUTS/FAULTS/NPY/*',
}, labels_class=Fault, transform=True, verify=True)

dataset.modify_sampler(dst='train_sampler', finish=True, low=0.0, high=1.0)
dataset.labels['amplitudes_16_PSDM'] = SeismicGeometry(LABELS_PATH, geometry=dataset.geometries[0])

In [None]:
train_pipeline = (
    Pipeline()
    # Initialize pipeline variables and model
    .init_variable('loss_history', [])
    .init_model('dynamic', EncoderDecoder, 'model', MODEL_CONFIG)
    # Load data/masks
    .crop(points=D('train_sampler')(BATCH_SIZE), shape=C('crop'), side_view=False)
    #.create_masks(dst='masks', width=1)
    #.mask_rebatch(src='masks', threshold=0.5, axis=(0, 1))
    .load_cubes(dst='images')
    .load_cubes(dst='masks', src_geometry='labels')
    .adaptive_reshape(src=['images', 'masks'], shape=C('crop'))
#     .scale(mode='q', src='images')

#     # Augmentations
#     .transpose(src=['images', 'masks'], order=(1, 2, 0))
#     .flip(axis=1, src=['images', 'masks'], seed=P(R('uniform', 0, 1)), p=0.3)
#     .additive_noise(scale=0.005, src='images', dst='images', p=0.3)
#     .rotate(angle=P(R('uniform', -15, 15)),
#             src=['images', 'masks'], p=0.3)
#     .scale_2d(scale=P(R('uniform', 0.85, 1.15)),
#               src=['images', 'masks'], p=0.3)
#     .transpose(src=['images', 'masks'], order=(2, 0, 1))

    # Training
    .train_model('model',
                 fetches='loss',
                 images=B('images'),
                 masks=B('masks'),
                 save_to=V('loss_history', mode='w'))
    .run_later(D('size'), n_iters=10, profile=True, bar=True)
)

In [None]:
train_pipeline.set_config({'crop': (1, 128, 128)})

In [None]:
ppl = train_pipeline << dataset

In [None]:
b = ppl.next_batch()

In [None]:
b.plot_components('images', 'masks',
                  slide=0, mode='overlap',
                  idx=9, alpha=[0.9, 0.5],
                  title='_e_psdm')

In [None]:
ppl.show_profile_info(per_iter=False, detailed=False)