In [9]:
def preproc(imgPath):
    """
    The function pre-process the image with the following pipeline:
    Read Image -> Histogram equalize -> Gaussian Blurring -> Otsu thresholding -> Morphology opening ->
    Labeling and Clustering -> Remove small clusters -> Morphology closing -> Save image
    
    Input
    imgPath: String for that path of image
    
    Output
    None
    
    """
    from scipy import misc
    from skimage import exposure, io, img_as_ubyte
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2
    from skimage.morphology import binary_opening, closing, square
    from scipy.ndimage import label
    import os
    %matplotlib inline
    
    # Read image
    img = misc.imread(imgPath)
    filename = os.path.basename(imgPath)
    dirname = os.path.dirname(imgPath)
    
    # Histogram equalize
    img_eq = img_as_ubyte(exposure.equalize_hist(img)) 
    
    # Gaussian Blurring with 5x5 kernel
    blur = cv2.GaussianBlur(img_eq,(5,5),0) 
    
    # Otsu thresholding
    ret,thresh = cv2.threshold(blur, 0, 1, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    # Using threshold 20 greater than the calculated otsu threshold, being more liberal in thresholding
    img_eq[img_eq <= ret + 20] = 0 
    
    # Morphology opening with 5x5 kernel
    img_opening = binary_opening(img_eq, square(5))
    
    # Labeling mask and then only keeping the first two biggest clusters
    labeled_img, nlabels = label(img_opening)
    cluster_size = np.bincount(labeled_img.ravel())
    third_large = sorted(cluster_size)[-3]
    for i in range(1, nlabels):
        coord = np.where(labeled_img == i)
        if cluster_size[i] <= third_large:
            labeled_img[coord] = 0
            
    # Make all the labels to 1    
    labeled_img[labeled_img >= 1] = 1
    img_after_label = img_eq * labeled_img

    # Morphology closing with 5x5 kernel
    img_closing = closing(img_after_label, square(5))

    # Save image
    from PIL import Image
    im = Image.fromarray(img_closing)
    if im.mode != 'RGB':
        im = im.convert('RGB')
    if not os.path.exists(os.path.join(dirname, '..', 'processed_data')):
        os.mkdir(os.path.join(dirname, '..', 'processed_data'))
        print("A new folder is created ", os.path.join(dirname, 'processed_data'))
    im.save(os.path.join(dirname, '..', 'processed_data', 'proc_' + filename))



In [12]:
from glob import glob
import os

paths = glob('./boneage-training-dataset/*.png')
total_set = len(paths)
num_testing_set = int(total_set/5)
num_training_set = total_set - num_testing_set


for path in paths:
    preproc(path)

  "%s to %s" % (dtypeobj_in, dtypeobj))
