In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import time
import shutil
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

In [2]:
model_name = 'SegUNet_new_version_coord_2channel_at_middle'
image_path = '../../data/test_masks/' + model_name + '/'
sample_submission_name = '../../data/test_masks/sample_submission.csv'
submission_name = '../../data/test_masks/' + model_name + '.csv'

In [3]:
with open(sample_submission_name) as f:
    lines = f.readlines()

In [4]:
mask_names = sorted(os.listdir(image_path))

In [None]:
with open(submission_name, 'w') as f:
    f.write('img,rle_mask')
    for i, mask_name in enumerate(tqdm(mask_names)):
        f.write('\n')
        image_name = mask_name.split('.')[0] + '.jpg'
        image_array = plt.imread(image_path + mask_name)
        image_str = rle_encode(image_array)
        f.write(image_name + ',' + image_str)

  9%|â–‰         | 8799/100064 [07:10<1:14:29, 20.42it/s]