In [1]:
% matplotlib inline
import matplotlib.pyplot as plt

In [2]:
import os
import sys
import random
from glob import glob

from multiprocessing import Pool
import numpy as np
import pandas as pd

from tqdm import tqdm
import scipy.misc
from skimage.morphology import label
import skimage

import warnings
warnings.filterwarnings('ignore')

In [3]:
# Set some parameters
IMG_CHANNELS = 3
DATA = {
    'TRAIN': '../data/train/',
    'TEST': '../data/test/'
}

train_paths = glob(os.path.join(DATA['TRAIN'], '*'))
test_paths = glob(os.path.join(DATA['TEST'], '*'))

MASK_POSTFIX = 'mask_ibn'
seed = 42
P_THRESHOLD = .5
BATCH_SIZE = 16
SIDE = 256
STEP = SIDE // 4

# Amount of categories predicted per pixels.
nb_classes = 1

np.random.seed = seed

SPLIT = .9

In [4]:
def load(path, test_mode=False):
    im = scipy.misc.imread(os.path.join(path, 'images', os.path.basename(path) + '.png'))[..., :IMG_CHANNELS]
    
    if im.std(-1).sum() > 10:
        im = (im - im.min((0, 1))) / (im.max((0, 1)) - im.min((0, 1)))
        im = np.abs(im.astype(np.float).mean(-1) - 1.)
    else:
        im = im[..., 0].astype(np.float)
        im = (im - im.min()) / (im.max() - im.min())
    
    if test_mode:
        return im
    
    return np.dstack([
        im,
        np.load(os.path.join(path, MASK_POSTFIX + '.npy')),
    ])

In [5]:
# std_paths = list()
# for n, path in tqdm(enumerate(train_paths), total=len(train_paths)):
#     img = imread(os.path.join(path, 'images', os.path.basename(path) + '.png'))[..., :IMG_CHANNELS]
#     mask = list()
#     for mask_file in next(os.walk(path + '/masks/'))[2]:
#         mask.append(imread(path + '/masks/' + mask_file))
#     std_paths.append((path, img.std(-1).sum(), len(mask)))
#     mask = np.array(mask).max(0)
#     np.save(path + '/mask', mask)


# std_paths = list()
# for n, path in tqdm(enumerate(test_paths), total=len(test_paths)):
#     img = scipy.misc.imread(os.path.join(path, 'images', os.path.basename(path) + '.png'))[..., :IMG_CHANNELS]
#     std_paths.append((path, img.std(-1).sum()))


# for n, path in tqdm(enumerate(train_paths), total=len(train_paths)):
#     mask = list()
#     for mask_file in next(os.walk(path + '/masks/'))[2]:
#         im = scipy.misc.imread(path + '/masks/' + mask_file)
#         im = scipy.ndimage.distance_transform_edt(im)
#         im[im == 0] = 1e+7
#         mask.append(im)
#     mask = np.array(mask).min(0)
#     mask[mask == 1e+7] = 0
#     mask[scipy.ndimage.binary_dilation((mask == 1), iterations=2) & np.logical_not(mask >= 1.1)] = -1
#     mask[mask > 0] = 1
#     np.save(path + '/mask_edt_bn', mask)


# for n, path in tqdm(enumerate(train_paths), total=len(train_paths)):
#     masks = list()
#     for mask_file in next(os.walk(path + '/masks/'))[2]:
#         masks.append(scipy.misc.imread(path + '/masks/' + mask_file) > 0)
#     mask = np.array(masks).max(0)
#     imask = mask.copy().astype(np.float)
#     borders = np.zeros(np.array(imask.shape) - 4, dtype=np.bool_)
#     borders = np.pad(borders, pad_width=2, mode='constant', constant_values=1)
#     for im in masks:
#         tmp = mask.copy()
#         tmp_borders = borders.copy()
#         tmp[im] = False
#         tmp_borders[im] = False
#         dilated_im = scipy.ndimage.binary_dilation(im)
#         intersection = (
#             (scipy.ndimage.binary_dilation(tmp) & dilated_im) |
#             (tmp_borders & scipy.ndimage.binary_dilation(dilated_im))
#         )
#         imask[intersection] = -1.5
#     np.save(path + '/mask_ibn', imask)


# glob_areas = list()
# for n, path in tqdm(enumerate(train_paths), total=len(train_paths)):

#     masks = list()
#     for mask_file in next(os.walk(path + '/masks/'))[2]:
#         masks.append(scipy.misc.imread(path + '/masks/' + mask_file) > 0)

#     mask = np.array(masks).max(0)
#     imask = mask.copy().astype(np.float)
#     borders = np.zeros(np.array(imask.shape) - 4, dtype=np.bool_)
#     borders = np.pad(borders, pad_width=2, mode='constant', constant_values=1)
    
#     areas = list()
#     for im in masks:
#         areas.append(im.sum())
        
#     mean = np.median(areas)
#     for i, im in enumerate(masks):
#         tmp = mask.copy()
#         tmp[im] = False
#         tmp_borders = borders.copy()
#         tmp_borders[im] = False
#         dilated_im = scipy.ndimage.binary_dilation(im)
#         intersection = (
#             (scipy.ndimage.binary_dilation(tmp) & dilated_im) |
#             (tmp_borders & scipy.ndimage.binary_dilation(dilated_im))
#         )

#         weight = (mean / intersection.sum()) ** (2 / 3)
#         imask[intersection] = -1 * weight

#     for i, im in enumerate(masks):
#         residual_im = im & (imask > 0)
#         imask[residual_im] = mean / residual_im.sum()

#     np.save(path + '/mask_ibn_rel', imask)
#     glob_areas.append(areas)


sizes_train = dict()
print('Getting and resizing test images ... ')
sys.stdout.flush()
for n, path in tqdm(enumerate(train_paths), total=len(train_paths)):
    img = scipy.misc.imread(os.path.join(path, 'images', os.path.basename(path) + '.png'))[..., :IMG_CHANNELS]
    sizes_train[path] = ([img.shape[0], img.shape[1]])
    

sizes_test = dict()
print('Getting and resizing test images ... ')
sys.stdout.flush()
for n, path in tqdm(enumerate(test_paths), total=len(test_paths)):
    img = scipy.misc.imread(os.path.join(path, 'images', os.path.basename(path) + '.png'))[..., :IMG_CHANNELS]
    sizes_test[path] = ([img.shape[0], img.shape[1]])

Getting and resizing test images ... 


100%|██████████| 670/670 [00:03<00:00, 212.00it/s]

Getting and resizing test images ... 



100%|██████████| 65/65 [00:00<00:00, 173.65it/s]


In [6]:
import pickle

# pickle.dump(std_paths, open('../data/test_std_paths.pkl', 'wb'))
std_paths = pickle.load(open('../data/std_paths.pkl', 'rb'))
test_std_paths = pickle.load(open('../data/test_std_paths.pkl', 'rb'))

In [7]:
paths = [path[0] for path in std_paths]
rs = np.random.RandomState(seed=12)
rs.shuffle(paths)

In [8]:
preds_15 = list()
preds_ib = list()
imgs = list()
for path in test_paths:
    x = [
        np.load(os.path.join(path, 'borders_mask_elu16_bce1dice_soft_merging_LB_34_zoom_x_1.3.npy'))]
    y = np.load(os.path.join(path, 'borders_mask_elu16_bce1dice_soft_merging_LB_34.npy'))
#     x = np.mean([y, scipy.ndimage.zoom(x, 1 / 1.3)], axis=0)
#     plt.imshow((x[..., 0] > .95) ^ (y[..., 0] > .95))
#     plt.show()
    preds_15.append(y)
    preds_ib.append(np.load(os.path.join(path, 'filled_mask_elu16_bce1dice_soft_merging_LB_34.npy')))
    imgs.append(load(path, test_mode=True))

In [40]:
seeds = list()
for img, pred9, pred5 in zip(imgs, preds_15, preds_ib):
    p9, colours9 = scipy.ndimage.label(pred9 > .9)
    p5, colours5 = scipy.ndimage.label(pred5 > .5)
    if colours5 <= 8:
        seeds.append(p5)
        continue
    for c5 in range(1, colours5 + 1):
        roi = p9[p5 == c5]
        roi = np.unique(roi[roi != 0])
        if len(roi) == 0:
            continue
        if len(roi) == 1:
            p9[p5 == c5] = roi[0]
        else:
            free_area = (p5 == c5) & np.logical_not(p9 > 0)
            while free_area.sum():
                for c9 in roi:
                    cp9_dilated = scipy.ndimage.binary_dilation(p9 == c9)
                    p9[free_area & cp9_dilated] = c9
                    free_area = free_area & np.logical_not(cp9_dilated)
                    
    seeds.append(p9)
#     plt.imshow(img)
#     plt.axis('off')
#     plt.show()
#     plt.imshow(seeds[-1][..., 0])
#     plt.axis('off')
#     plt.show()

In [41]:
# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b > prev + 1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x):
    for colour in range(1, x.max() + 1):
        yield rle_encoding(x == colour)

In [42]:
new_test_ids = []
rles = []
for n, path in tqdm(enumerate(test_paths)):
    rle = list(prob_to_rles(seeds[n]))
    rles.extend(rle)
    new_test_ids.extend([os.path.basename(path)] * len(rle))


65it [00:01, 51.29it/s]


In [43]:
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.to_csv('../data/dsb18.csv', index=False)