In [11]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline
import skimage.transform            
import skimage.morphology           
import os
from skimage.io import imread, imshow, imread_collection, concatenate_images
import pandas as pd    
import re

In [12]:
def sorted_aphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

def rle_of_binary(x):
    dots = np.where(x.T.flatten() == 1)[0]  
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def mask_to_rle(mask, cutoff=.1, min_object_size=1.):
    lab_mask = skimage.morphology.label(mask > cutoff)
    
    (mask_labels, mask_sizes) = np.unique(lab_mask, return_counts=True)
    if (mask_sizes < min_object_size).any():
        mask_labels = mask_labels[mask_sizes < min_object_size]
        for n in mask_labels:
            lab_mask[lab_mask == n] = 0
        lab_mask = skimage.morphology.label(lab_mask > cutoff) 
        
    for i in range(1, lab_mask.max() + 1):
        yield rle_of_binary(lab_mask == i)
        
def rle_to_mask(rle, img_shape):
    mask_rec = np.zeros(img_shape).flatten()
    for n in range(len(rle)):
        for i in range(0,len(rle[n]),2):
            for j in range(rle[n][i+1]): 
                mask_rec[rle[n][i]-1+j] = 1
    return mask_rec.reshape(img_shape[1], img_shape[0]).T

In [13]:
train_dir = '../data/stage1_train/'
img_dir_name = 'images'
mask_dir_name = 'masks'
for i,dir_name in enumerate(next(os.walk(train_dir))[1]):
    img_dir = os.path.join(train_dir, dir_name, img_dir_name)
    mask_dir = os.path.join(train_dir, dir_name, mask_dir_name) 
    dirlist = sorted_aphanumeric(os.listdir(mask_dir))
    for mask_file in dirlist:
        fl = train_dir+dir_name+'/masks/'+mask_file
        #print(fl)
        mask_ = cv2.imread(fl,0)
        rle = list(mask_to_rle(mask_, min_object_size=1))
        rle1 = pd.Series(rle).apply(lambda x: ' '.join(str(y) for y in x))
        print(dir_name, ',', rle1[0])
        #print(rle1[0])
    break

00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552 , 6908 1 7161 8 7417 8 7672 9 7928 9 8184 9 8440 9 8696 9 8952 9 9209 8 9465 8 9722 7 9978 7 10235 6 10493 4 10752 1
00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552 , 36269 7 36523 11 36778 13 37033 15 37288 17 37543 18 37799 18 38054 19 38310 19 38565 20 38821 20 39077 20 39333 19 39589 19 39845 18 40101 18 40357 17 40614 15 40870 15 41127 13 41384 10 41641 8 41899 4
00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552 , 19919 6 20174 8 20429 10 20685 11 20941 12 21197 12 21453 13 21709 13 21965 13 22221 13 22477 13 22734 12 22990 12 23246 11 23503 9 23761 6
00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552 , 18671 6 18926 8 19181 9 19436 10 19691 11 19947 11 20203 12 20459 12 20715 12 20971 13 21227 14 21483 14 21739 14 21995 14 22252 13 22510 10 22768 7 23025 5 23284 2 23541 1
00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552 , 40158 3 40413 5 40669 5 40925 5 4