In [None]:
!nvidia-smi

In [None]:
# Necessary imports
import os
import sys
import warnings
from datetime import date
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

import numpy as np
import torch
from torch import nn

from sklearn.linear_model import LinearRegression
from skimage.metrics import structural_similarity as ssim, peak_signal_noise_ratio, mean_squared_error

sys.path.append('../../seismiqb')
from seismiqb import SyntheticGenerator
from seismiqb import SeismicDataset, SeismicCropBatch, SeismicSampler
from seismiqb import plot_image, plot_loss

from seismiqb.batchflow import Dataset, Pipeline, NumpySampler as NS, B, C, V, P, R, I, M
from seismiqb.batchflow import set_gpus, get_notebook_name
from seismiqb.batchflow.models.torch import EncoderDecoder, ResBlock


from utils import make_data, impedance_to_seismic, adjust_seismic,\
                  normalize_seismic, normalize_impedance, denormalize_impedance,\
                  compute_metric, show_results_real, show_result_synthetic, show_progress
# Set GPU
set_gpus(n=1);

# Global definition

In [None]:
SYNTHETIC_SHAPE = (128, 256)
SHAPE = (1, *SYNTHETIC_SHAPE)

BATCH_SIZE = 32
REAL_BATCH_SIZE = 256
N_ITERS = 15000
TEST_FREQUENCY = 100

SAVE_DIR = date.today().strftime("%Y-%m-%d") + '_' + get_notebook_name().upper()
!rm -rf {SAVE_DIR}
os.mkdir(SAVE_DIR)

# Load real data

In [None]:
%%time
index = {
    '/notebooks/repos/seismic_data/seismic_interpretation/CUBE_02_M/amplitudes_02_M.qhdf5' : '~/INPUTS/HORIZONS/RAW/*',
#     '/data/seismic_data/seismic_interpretation/002_M/002_M.qblosc' : '~/INPUTS/HORIZONS/FINAL/*',
#     '/data/seismic_data/seismic_interpretation/001_YETYPUR/001_YETYPUR.qblosc' : '~/INPUTS/HORIZONS/FINAL/*',
}

real_dataset = SeismicDataset(index, labels_class='horizon')

real_sampler = SeismicSampler(labels=real_dataset.labels, crop_shape=SHAPE,
                              threshold=0.7, shift_height=(0.9, 0.1), mode='horizon')

real_pipeline = (
    Pipeline()
    .make_locations(generator=real_sampler, batch_size=C('batch_size', default=REAL_BATCH_SIZE))
    .create_masks(dst='masks', width=4)
    .mask_rebatch(src='masks', threshold=0.3)
    .load_cubes(dst='images')
    .adaptive_reshape(src=['images', 'masks'])
) << real_dataset

BIG_BATCH = real_pipeline.set_config({'batch_size' : 1000}).next_batch()
FIXED_BATCH = real_pipeline.set_config({'batch_size' : REAL_BATCH_SIZE}).next_batch()

# Data generation parameters

In [None]:
def parameter_generator():
    # Horizons
    n_reflections = np.random.randint(low=25, high=50)
    horizon_n = np.random.randint(low=1, high=6)
    horizon_heights = np.random.uniform(low=0.05, high=0.95, size=horizon_n)
    horizon_multipliers = np.random.choice(list(range(-13, -7)) + list(range(9, 16)), size=horizon_n, replace=False)
    
    
    # Impedance creation
    grid_size = np.random.randint(low=5, high=10)
    
    # Conversion to seismic
    ricker_width = np.random.uniform(low=3.3, high=5.5)
    ricker_points = np.random.randint(low=50, high=130)
    noise_mul = np.random.uniform(low=0.1, high=0.3)
    
    params = {
        # Horizons
        'num_reflections' : n_reflections,
        'horizon_n' : horizon_n,
        'horizon_height' : horizon_heights,
        'horizon_multipliers': horizon_multipliers,

        # Faults

        # Impedance creation
        'grid_size' : grid_size,

        # Conversion to seismic
        'ricker_width' : ricker_width,
        'ricker_points' : ricker_points,
        'noise_mul' : noise_mul,
    }
    return params

# Normalization / denormalization parameters

In [None]:
%%time
synthetic, impedance = make_data(1000, SYNTHETIC_SHAPE, parameter_generator)
IMPEDANCE_MEAN, IMPEDANCE_STD = impedance.mean(), impedance.std()


SYNTHETIC_SEISMIC_NORMALIZATION_MODE = {'function': 'mean-std'}
REAL_SEISMIC_NORMALIZATION_MODE = {'function': 'mean-std'}

# IMPEDANCE_NORMALIZATION_MODE = {'function': 'mean-std'}
IMPEDANCE_NORMALIZATION_MODE = {'function': 'global_mean-std',
                                'mean' : IMPEDANCE_MEAN,
                                'std' : IMPEDANCE_STD}
IMPEDANCE_DENORMALIZATION_MODE = {'function': 'mean-std',
                                  'mean' : IMPEDANCE_MEAN,
                                  'std' : IMPEDANCE_STD}

# Model architecture

In [None]:
from networks import ResNet
model = ResNet(start_channels=16, factor=2, kernel_size=(5, 5), padding=2)

MODEL_CONFIG = {
    'order': ['body'],
    'body': model,

    'loss': 'l1',
    'optimizer': {'name': 'Adam', 'lr': 0.0005},

    'benchmark': False,
    'amp': False,
}

# Load pipeline

In [None]:
logging = {
    'iteration': I(),
    'show' : False,
    'savedir' : SAVE_DIR,
    'trigger' : (I() % TEST_FREQUENCY == 0),
}

train_pipeline = (
    Pipeline()
    # Initialize pipeline variables and model
    .init_variable('loss_history', [])
    .init_variable('ssim_on_fixed_real', [0.0])
    .init_variable('ssim_on_random_real', [0.0])
    .init_model(name='model', model_class=EncoderDecoder, config=MODEL_CONFIG)

    # Load data/masks
    .make_data(size=B.size, synthetic_shape=SYNTHETIC_SHAPE,
               **samplers, save_to=(B.images, B.impedance))
    
    .normalize_seismic(array=B('images'), **SYNTHETIC_SEISMIC_NORMALIZATION_MODE,
                       save_to=B('images'))
    .normalize_impedance(array=B('impedance'), **IMPEDANCE_NORMALIZATION_MODE,
                         save_to=B('impedance'))

    # Augmentations
#     .transpose(src=['images', 'impedance'], order=(1, 2, 0))
#     .flip(axis=0, src=['images', 'impedance'],
#           seed=P(R('uniform', 0, 1)), p=0.3)
#     .flip(axis=1, src=['images', 'impedance'],
#           seed=P(R('uniform', 0, 1)), p=0.3)
#     .transpose(src=['images', 'impedance'], order=(2, 0, 1))

    # Training
    .train_model('model',
                 fetches='loss',
                 images=B('images'),
                 masks=B('impedance'),
                 save_to=V('loss_history', mode='a'))

    # Compute metrics
    .compute_metric(model=M('model'), batch=FIXED_BATCH,
                    seismic_normalize=REAL_SEISMIC_NORMALIZATION_MODE,
                    impedance_denormalize=IMPEDANCE_DENORMALIZATION_MODE,
                    **logging, save_to=V('ssim_on_fixed_real', mode='a'))

    .compute_metric(model=M('model'), pipeline=real_pipeline,
                    seismic_normalize=REAL_SEISMIC_NORMALIZATION_MODE,
                    impedance_denormalize=IMPEDANCE_DENORMALIZATION_MODE,
                    **logging, save_to=V('ssim_on_random_real', mode='a'))
    
    .show_results_real(model=M('model'), batch=FIXED_BATCH,
                       seismic_normalize=REAL_SEISMIC_NORMALIZATION_MODE,
                       impedance_denormalize=IMPEDANCE_DENORMALIZATION_MODE,
                       **logging)
    .show_results_real(model=M('model'), pipeline=real_pipeline,
                       seismic_normalize=REAL_SEISMIC_NORMALIZATION_MODE,
                       impedance_denormalize=IMPEDANCE_DENORMALIZATION_MODE,
                       **logging)
    .show_result_synthetic(model=M('model'), batch=B(), **logging)
    .show_progress(model=M('model'), metrics=V('ssim_on_random_real'), **logging)
) << Dataset(100000, batch_class=SeismicCropBatch)

In [None]:
%%time
SYNTHETIC_BATCH = train_pipeline.next_batch(BATCH_SIZE)

In [None]:
train_pipeline.m('model').iter_info

In [None]:
%%time
train_pipeline.run(batch_size=BATCH_SIZE, n_iters=N_ITERS, ignore_exceptions=False,
                   bar={'bar': 'n', 'frequency': 10,
                        'graphs': ['loss_history', 'ssim_on_random_real', 'ssim_on_fixed_real', 'gpu']})

plot_loss(train_pipeline.v('loss_history')[100:])

# Validation synthetic

In [None]:
val_pipeline = (
    Pipeline()
    # Initialize pipeline variables and model
    .import_model(name='model', source=train_pipeline)

    # Load data/masks
    .make_data(size=B.size, synthetic_shape=SYNTHETIC_SHAPE,
               **samplers, save_to=(B.images, B.impedance))
    
    .normalize_seismic(array=B('images'), **SYNTHETIC_SEISMIC_NORMALIZATION_MODE,
                       save_to=B('images'))
    .normalize_impedance(array=B('impedance'), function=lambda array: array,
                         save_to=B('impedance_original'))
    .normalize_impedance(array=B('impedance'), **IMPEDANCE_NORMALIZATION_MODE,
                         save_to=B('impedance'))
    
    # Predict with model
    .predict_model('model',
                   B('images'),
                   fetches='predictions',
                   save_to=B('predictions'))

) << Dataset(100000, batch_class=SeismicCropBatch)

In [None]:
batch = val_pipeline.next_batch(32)

In [None]:
for idx in range(4):
    # Make data
    synthetic = batch.images[idx, 0, :, :]
    true_impedance = batch.impedance[idx, 0, :, :]
    predicted_impedance = batch.predictions[idx, 0, :, :]
    diff_impedance = np.diff(predicted_impedance, axis=-1)
    
    denormalized_impedance = denormalize_impedance(predicted_impedance, **IMPEDANCE_DENORMALIZATION_MODE)
    predicted_seismic = impedance_to_seismic(denormalized_impedance, ricker_width=4.3)
    predicted_seismic = adjust_seismic(predicted_seismic, real=synthetic)
    
    ssim_coefficient = ssim(synthetic, predicted_seismic)

    # Actual plot
    plot_image([synthetic,
                true_impedance, predicted_impedance,
                diff_impedance, predicted_seismic],
               separate=True, figsize=(24, 8),
               title=['Synthetic',
                      'True\nimpedance', 'Predicted\nimpedance',
                      'Predicted\nimpedance diff', 'Restored seismic'],
               suptitle_label=f'idx={idx}\nSSIM={ssim_coefficient:2.2f}',
               labeltop=False, 
               cmap='gray', colorbar=True,)

### Traces

In [None]:
width = SYNTHETIC_SHAPE[0]
trace_nums = (width // 2,)


for idx in range(4):
    true_impedance = batch.impedance[idx, 0, :, :]
    predicted_impedance = batch.predictions[idx, 0, :, :]
    
    true_impedance_diff = np.diff(true_impedance, axis=-1)
    predicted_impedance_diff = np.diff(predicted_impedance, axis=-1)
    
    for trace_num in trace_nums:
        # Make data
        plot_data = [[true_impedance[trace_num],
                      predicted_impedance[trace_num]],
                     [true_impedance_diff[trace_num],
                      predicted_impedance_diff[trace_num]]]

        # Actual plot
        plot_image(plot_data,
                   mode='curve', figsize=(22, 4),
                   xlabel='HEIGHT',
                   ylabel=['Impedance', 'Impedance diff'],
                   title=['Impedance', 'Impedance diff'],
                   suptitle_label=f'idx={idx}, trace_num={trace_num}',
                   legend_label=[['True', 'Prediction']] * 2)

# Validation real (crops)

In [None]:
batch = real_pipeline.next_batch(32) # FIXED_BATCH
batch.images = normalize_seismic(batch.images, **REAL_SEISMIC_NORMALIZATION_MODE)
model = train_pipeline.m('model')

# Make predictions and seismic
seismic_images = batch.images
horizon_masks = batch.masks
predicted_impedance = model.predict(images=seismic_images, fetches='predictions')

denormalized_impedance = denormalize_impedance(predicted_impedance, **IMPEDANCE_DENORMALIZATION_MODE)
predicted_seismic = impedance_to_seismic(denormalized_impedance)

In [None]:
for idx in range(3):
    field_name = batch.unsalt(batch.indices[idx])
    
    # Make data
    seismic_image = seismic_images[idx, 0, :, :]
    horizon_mask = horizon_masks[idx, 0, :, :]
    predicted_impedance_ = predicted_impedance[idx, 0, :, :]
    diff_impedance = np.diff(predicted_impedance_, axis=-1)
    predicted_seismic_ = predicted_seismic[idx, :, :]
    predicted_seismic_ = adjust_seismic(predicted_seismic_, real=seismic_image)
    
    ssim_coefficient = ssim(seismic_image, predicted_seismic_)

    # Actual plot
    plot_image([seismic_image, horizon_mask,
                predicted_impedance_, diff_impedance,
                predicted_seismic_],
               separate=True, figsize=(24, 8),
               title=[f'Crop from\n{field_name}', 'Horizon mask',
                      'Predicted\nimpedance', 'Predicted\nimpedance diff',
                      'Restored seismic'],
               suptitle_label=f'idx={idx}\nSSIM={ssim_coefficient:2.2f}',
               labeltop=False, 
               cmap='gray', colorbar=True,)

### traces

In [None]:
width = SYNTHETIC_SHAPE[0]
trace_nums = (width // 3, width // 2, width // 2 + width // 3)

for idx in range(1):
    field_name = batch.unsalt(batch.indices[idx])
    
    # Make data
    predicted_impedance_ = predicted_impedance[idx, 0, :, :]
    diff_impedance = np.diff(predicted_impedance_, axis=-1)
    
    impedance_traces = [predicted_impedance_[trace_num] for trace_num in trace_nums]
    impedance_diff_traces = [diff_impedance[trace_num] for trace_num in trace_nums]
    
    # Actual plot
    plot_image([impedance_traces, impedance_diff_traces],
               mode='curve', figsize=(22, 4),
               xlabel='HEIGHT',
               ylabel=['Impedance', 'Impedance diff'],
               title=['Impedance', 'Impedance diff'],
               legend_label=[[f'Trace {trace_num}' for trace_num in trace_nums]] * 2)

# Crossplot: seismic VS re-created seismic

In [None]:
n = 3
fig, ax = plt.subplots(1, n, figsize=(22, 4))
ax = ax.ravel() if isinstance(ax, np.ndarray) else ax
trace_slice = None

for idx in range(n):
    
    # Make data
    seismic_image = seismic_images[idx, 0, :, :][trace_slice]
    predicted_seismic_ = predicted_seismic[idx, :, :][trace_slice]
    
    ax[idx].plot(seismic_image.ravel(), predicted_seismic_.ravel(), 'go', alpha=.5)
    plot_image([0.0], mode='curve', ax=ax[idx],
               title=f'idx={idx}, trace_slice={trace_slice}',
               xlabel='true seismic',
               ylabel='re-created seismic')

# Validation: slide

In [None]:
for field in real_dataset:
    for location, axis in [[100, 0], [100, 1]]:
        # Make data
        slide = field.load_slide(loc=location, axis=axis)
        slide = slide[np.newaxis, np.newaxis, :(slide.shape[0] // 32) * 32, :(slide.shape[1] // 32) * 32]
        slide = normalize_seismic(slide, **REAL_SEISMIC_NORMALIZATION_MODE)

        predicted_impedance = model.predict(images=slide, fetches='predictions')
        denormalized_impedance = denormalize_impedance(predicted_impedance, **IMPEDANCE_DENORMALIZATION_MODE)
        predicted_seismic = impedance_to_seismic(denormalized_impedance)
        predicted_seismic = adjust_seismic(predicted_seismic, real=slide)
        
        slide = slide[0, 0, :, :]
        predicted_impedance = predicted_impedance[0, 0, :, :]
        predicted_impedance_diff = np.diff(predicted_impedance, axis=-1)
        predicted_seismic = predicted_seismic[0, :, :]
        
        ssim_coefficient = ssim(slide, predicted_seismic)
        
        # Actual plot
        plot_image([slide, predicted_impedance, predicted_impedance_diff, predicted_seismic],
                   separate=True, figsize=(24, 8),
                   title=[f'Seismic slide', 
                          'Predicted\nimpedance', 'Predicted\nimpedance diff',
                          'Restored seismic'],
                   suptitle_label=f'Slide at {location} {"ILINE" if axis==0 else "CROSSLINE"}\nSSIM={ssim_coefficient:2.2f}',
                   labeltop=False, 
                   cmap='gray', colorbar=True,)