In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

from skimage import feature, exposure
from skimage.morphology import closing
from skimage.morphology import disk
from skimage.morphology import label as sk_label
from tqdm.notebook import tqdm

In [2]:
def label_sort(in_img, cutoff = 0.01):
    total_cnt = np.sum(in_img > 0)
    lab_img = sk_label(in_img)
    new_image = np.zeros_like(lab_img)
    remap_index = []
    
    for k in np.unique(lab_img[lab_img > 0]):
        cnt = np.sum(lab_img == k) # get area of labelled object
        
        if cnt > total_cnt * cutoff:
            remap_index += [(k, cnt)]
            
    sorted_index = sorted(remap_index, key=lambda x: -x[1]) # reverse sort - largest is first
    
    for new_idx, (old_idx, idx_count) in enumerate(sorted_index, 1): #enumerate starting at id 1
        new_image[lab_img == old_idx] = new_idx
    
    return new_image

In [3]:
def preprocessing_img(df_dataset, show_sample_img = True):
    list_data = []
    list_labels = []
    list_temp = []
    show_img = False
    
    for index, row in tqdm(df_dataset.iterrows(), total = df_dataset.shape[0], desc = df_dataset.iloc[0]['validation'] + ' data load'):
        key_list = row['geometry_type'] + '_' + row['disease']
        if key_list not in list_temp:
            list_temp.append(key_list)
            show_img = True
                
        ori_img = cv2.imread(str(row['file_path']))
        label = row['disease']

        resize_img = cv2.resize(ori_img, (200, 200))
        gray_img = cv2.cvtColor(resize_img, cv2.COLOR_BGR2GRAY)
        threshold_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
        noiseless_img = closing(label_sort(threshold_img)>0, disk(1))

        hog_img = feature.hog(noiseless_img, orientations = 9, pixels_per_cell = (10, 10),
                                    cells_per_block = (2, 2), transform_sqrt = True, block_norm = "L1")

        list_data.append(hog_img)
        list_labels.append(label)
        
        if show_img and show_sample_img:
            # visualize the HOG image
            (H, hog_img) = feature.hog(noiseless_img, orientations = 9, pixels_per_cell = (10, 10),
                                cells_per_block = (2, 2), transform_sqrt = True, block_norm = "L1", visualize=True)
            hog_img = exposure.rescale_intensity(hog_img, out_range = (0, 255))
            hog_img = hog_img.astype("uint8")
        
            titles = [label + ' (Original)', label + ' (Gray)', label + ' (Threshold)', label + ' (Noiseless)', label + ' (Hog)']
            images = [resize_img, gray_img, threshold_img, noiseless_img, hog_img]
            plt.figure(figsize=(14, 4))
            
            for i in range(len(images)):
                plt.subplot(1, 5, i + 1)
                plt.imshow(images[i], cmap='gray')
                plt.title(titles[i])
                plt.xticks([])
                plt.yticks([])
                
            plt.tight_layout()
            plt.show()
            
            show_img = False
        
    return (np.array(list_data), np.array(list_labels))