In [4]:
import numpy as np
from matplotlib import pyplot as plt

import skimage
import skimage.io
from skimage import measure
from skimage import color
from skimage.util import view_as_blocks

In [5]:
def tail_contour(imgpath):
    tail_img = skimage.io.imread(imgpath)
    tail_img_gray = color.rgb2gray(tail_img)
    max_gray = tail_img_gray.max()
    min_gray = tail_img_gray.min()
    # Apply a mask of 10% darkest grayscale of gray image
    # There is no fixed threshold that fits all images
    mask_threshold = min_gray + (max_gray - min_gray)  * 0.2
    tail_img_mask = tail_img_gray < mask_threshold
    contours = measure.find_contours(tail_img_mask, 0.9) 
    # Find the largest contour in the list of contours
    largest_contour = contours[0]
    largest_contour_size = contours[0].shape[0]
    for c in contours:
        if c.shape[0] > largest_contour_size:
            largest_contour_size = c.shape[0]
            largest_contour = c
    return largest_contour

## Store all contours

### Function to find contour from tail image, then store contour to output path
#### Take only 1/2 bottom of contour image

In [61]:
def save_contour(source_img_path, output_path):
    # Find contour from tail image
    c = tail_contour(source_img_path)
    # Move contour to line towards root (0,0)
    c[:,0] -= c[:,0].min()
    c[:,1] -= c[:,1].min()
    # Cut the upper bottom of contour image
    middle_y = (c[:,0].max() - c[:,0].min())/2
    c_bottom = c[c[:,0] < middle_y]
    
    # Save image to output_path
    fig = plt.figure(figsize=(8,3)) 
    plt.plot(c_bottom[:, 1], c_bottom[:, 0],  linewidth=1, color = 'black')  
    plt.axis('off')
    plt.savefig(output_path, cmap = "gray", bbox_inches='tight')
    plt.close(fig) # do not plot the image to screen

In [57]:
# test with 1 file
in_file = "../data/test_val/PM-WWA-20170321-046.jpg"
out_file = "../experiments/PM-WWA-20170321-046.jpg"
save_contour(in_file, out_file)

(4715, 2)


### Store test-val files

In [62]:
import os
testval_path = "../data/test_val/"
testval_contour_path = "../experiments/contour_mobilenet_0.2_bottom_half/test-val/"

images = os.listdir(testval_path)

img_count = len(images)
i = 0

for img in images:
    in_file = testval_path + img
    out_file = testval_contour_path + img
    save_contour(in_file, out_file)
    
    #progress tracking
    i += 1
    if (i%50 == 0):
        print("processed {}/{} images".format(i,img_count))

processed 50/808 images
processed 100/808 images
processed 150/808 images
processed 200/808 images
processed 250/808 images
processed 300/808 images
processed 350/808 images
processed 400/808 images
processed 450/808 images
processed 500/808 images
processed 550/808 images
processed 600/808 images
processed 650/808 images
processed 700/808 images
processed 750/808 images
processed 800/808 images


### Store train files

In [63]:
train_path = "../data/train/"
train_contour_path = "../experiments/contour_mobilenet_0.2_bottom_half/train/"

subfolders = os.listdir(train_path)

train_file_count = 4539 # We know in advance the number of training images
i=0

for sf in subfolders:
    # Create sub-folder for train contours if not exist
    train_contour_path_subfolder = train_contour_path + sf
    if not os.path.exists(train_contour_path_subfolder):
        os.makedirs(train_contour_path_subfolder)  
    
    # Get list of images in each subfolder
    images = os.listdir(train_path + sf)
    # Find contour for each image
    for img in images:
        in_file = train_path + sf + "/" + img
        out_file = train_contour_path + sf + "/" + img
        save_contour(in_file, out_file)
        #progress tracking
        i += 1
        if (i%50 == 0):
            print("processed {}/{} images ({}%)".format(i,train_file_count,round(i/train_file_count*100,2)))

processed 50/4539 images (1.1%)
processed 100/4539 images (2.2%)
processed 150/4539 images (3.3%)
processed 200/4539 images (4.41%)
processed 250/4539 images (5.51%)
processed 300/4539 images (6.61%)
processed 350/4539 images (7.71%)
processed 400/4539 images (8.81%)
processed 450/4539 images (9.91%)
processed 500/4539 images (11.02%)
processed 550/4539 images (12.12%)
processed 600/4539 images (13.22%)
processed 650/4539 images (14.32%)
processed 700/4539 images (15.42%)
processed 750/4539 images (16.52%)
processed 800/4539 images (17.63%)
processed 850/4539 images (18.73%)
processed 900/4539 images (19.83%)
processed 950/4539 images (20.93%)
processed 1000/4539 images (22.03%)
processed 1050/4539 images (23.13%)
processed 1100/4539 images (24.23%)
processed 1150/4539 images (25.34%)
processed 1200/4539 images (26.44%)
processed 1250/4539 images (27.54%)
processed 1300/4539 images (28.64%)
processed 1350/4539 images (29.74%)
processed 1400/4539 images (30.84%)
processed 1450/4539 imag