In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
import gc
import cv2
import sys
import json
import time
import pickle
import shutil
import numba
import numpy as np
import pandas as pd 
import tifffile as tiff
import rasterio
from rasterio.windows import Window
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
from tensorflow.keras import Model, Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import *
import segmentation_models as sm
from segmentation_models import Unet
from tqdm import tqdm
print('tensorflow version:', tf.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
    for gpu_device in gpu_devices:
        print('device available:', gpu_device)
pd.set_option('display.max_columns', None)

In [None]:
TEST = True
KAGGLE = False
VER = 'v0'
if KAGGLE:
    DATA_PATH = '../input/hubmap-kidney-segmentation'
    MDLS_PATH = f'../input/kidney-models-{VER}'
else:
    DATA_PATH = './data'
    MDLS_PATH = f'./models_{VER}'
THRESHOLD = .5
VOTERS = 3
PRED_BATCH_SIZE = 256
TTAS = [4, 5.25, 6.5, 8]
MIN_OVERLAP = 32
IDNT = rasterio.Affine(1, 0, 0, 0, 1, 0)
SUB_PATH = f'{DATA_PATH}/test' if TEST else f'{DATA_PATH}/train'

start_time = time.time()

In [None]:
def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s) // 2):
            start = int(s[2 * i]) - 1
            length = int(s[2 * i + 1])
            img[start : start + length] = 1 + m
    return img.reshape(shape).T

def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

@numba.njit()
def rle_numba(pixels):
    size = len(pixels)
    points = []
    if pixels[0] == 1: points.append(0)
    flag = True
    for i in range(1, size):
        if pixels[i] != pixels[i - 1]:
            if flag:
                points.append(i + 1)
                flag = False
            else:
                points.append(i + 1 - points[-1])
                flag = True
    if pixels[-1] == 1: points.append(size - points[-1] + 1)    
    return points

def rle_numba_encode(image):
    pixels = image.flatten(order = 'F')
    points = rle_numba(pixels)
    return ' '.join(str(x) for x in points)

In [None]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2 * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred, smooth=1):
    return (1 - dice_coef(y_true, y_pred, smooth))

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)

def get_model(backbone, path, input_shape, classes=1, learning_rate=.001):
    if backbone == 'efficientnetb0':
        weights = f'{path}/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
    elif backbone == 'efficientnetb1':
        weights = f'{path}/efficientnet-b1_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
    elif backbone == 'efficientnetb2':
        weights = f'{path}/efficientnet-b2_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
    else:
        raise AttributeError('mode parameter error')
    optimizer = Adam(lr=learning_rate)
    model = Unet(backbone_name=backbone,
                 input_shape=input_shape,
                 classes=classes, 
                 activation='sigmoid',
                 encoder_weights=weights)
    model.compile(optimizer=optimizer, 
                  loss=bce_dice_loss, 
                  metrics=[dice_coef])
    return model

In [None]:
def make_grid(shape, window=256, min_overlap=32):
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx, ny, 4), dtype=np.int64) 
    for i in range(nx):
        for j in range(ny):
            slices[i, j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx * ny, 4)

def flip(img, axis=0):
    if axis == 1:
        return img[::-1, :, ]
    elif axis == 2:
        return img[:, ::-1, ]
    elif axis == 3:
        return img[::-1, ::-1, ]
    else:
        return img

In [None]:
with open(f'{MDLS_PATH}/params.json') as file:
    params = json.load(file)
print('loaded params:', params)

In [None]:
img_files = [x for x in os.listdir(SUB_PATH) if '.tiff' in x]
print('images idxs:', img_files)

In [None]:
subm = {}
for i_img, img_file in enumerate(img_files):
    print('-' * 20, img_file, '-' * 20)
    img_data = rasterio.open(os.path.join(SUB_PATH, img_file), transform=IDNT)
    print('img shape:', img_data.shape)
    img_preds = np.zeros(img_data.shape, dtype=np.uint8)
    for i_mode, mode in enumerate(TTAS):
        tile_size = int(params['img_size'] * mode)
        tile_resized = int(tile_size * params['resize'])
        slices = make_grid(
            img_data.shape, 
            window=tile_resized, 
            min_overlap=MIN_OVERLAP
        )
        models = []
        folds = list(range(params['folds']))
        for n_fold in folds:
            checkpoint_path = f'{MDLS_PATH}/model_{n_fold}.hdf5'
            model = get_model(
                params['backbone'], 
                MDLS_PATH,
                input_shape=(tile_size, tile_size, 3)
            )
            model.load_weights(checkpoint_path)
            models.append(model)
            print('loaded:', checkpoint_path)
        for (x1, x2, y1, y2) in tqdm(slices, desc=f'{img_file} tta {i_mode}'):
            img = img_data.read(
                [1, 2, 3], 
                window=Window.from_slices((x1, x2), (y1, y2))
            )
            img = np.moveaxis(img, 0, -1)
            img = cv2.resize(img, (tile_size, tile_size))
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            img = flip(img, axis=i_mode)
            img = np.expand_dims(img, 0)
            img = img.astype(np.float32) / 255
            pred = np.zeros((tile_size, tile_size), dtype=np.float32)
            for model in models:
                pred += np.squeeze(model.predict(img)) / len(models)
            pred = flip(pred, axis=i_mode)
            pred = cv2.resize(pred, (tile_resized, tile_resized))
            img_preds[x1:x2, y1:y2] = img_preds[x1:x2, y1:y2] + \
                (pred > THRESHOLD).astype(np.uint8)
        del model, models, img, pred, slices; gc.collect()
    print('img max:', np.max(img_preds), '| voters:', VOTERS)
    rle_pred = rle_encode_less_memory((img_preds >= VOTERS).astype(np.uint8))
    del img_preds; gc.collect()
    subm[i_img] = {'id':img_file.replace('.tiff', ''), 'predicted': rle_pred}
    del img_data, rle_pred; gc.collect()

elapsed_time = time.time() - start_time
print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')

In [None]:
df_sub = pd.DataFrame(subm).T
df_sub

In [None]:
if not TEST:
    df_masks = pd.read_csv(f'{DATA_PATH}/train.csv').set_index('id')
    idx = df_sub.iloc[0].id
    img = tiff.imread(os.path.join(SUB_PATH, idx + '.tiff'))
    if len(img.shape) == 5: img = np.transpose(img.squeeze(), (1, 2, 0))
    msk_p = enc2mask([df_sub.iloc[0].predicted], (img.shape[1], img.shape[0]))
    msk = enc2mask([df_masks.loc[idx, 'encoding']], (img.shape[1], img.shape[0]))
    print(img.shape)
    print(msk_p.shape)
    print(msk.shape)
    plt.figure(figsize=(16, 16))
    plt.imshow(img)
    plt.imshow(msk_p, alpha=.4)
    plt.imshow(msk, alpha=.2)
    plt.title(idx)
    plt.show()
else:
    idx = df_sub.iloc[0].id
    img = tiff.imread(os.path.join(SUB_PATH, idx + '.tiff'))
    if len(img.shape) == 5: img = np.transpose(img.squeeze(), (1, 2, 0))
    msk_p = enc2mask([df_sub.iloc[0].predicted], (img.shape[1], img.shape[0]))
    print(img.shape)
    print(msk_p.shape)
    plt.figure(figsize=(16, 16))
    plt.imshow(img)
    plt.imshow(msk_p, alpha=.4)
    plt.title(idx)
    plt.show()
elapsed_time = time.time() - start_time
print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')

In [None]:
df_sub.to_csv('submission.csv', index=False)