In [1]:
import math
from pathlib import Path
import cv2
from PIL import Image
from PIL.ImageEnhance import Contrast
import numpy as np
import os
from tqdm import tqdm

import cv2
import numpy as np
from skimage.measure import regionprops

SRC_FOLDER = Path('./ThickBloodSmears_150/')
DST_FOLDER = Path('./preprocessed_images/')
GT_FOLDER = Path('./ThickBloodSmears_150/GT_updated/')
BINMASK_FOLDER = Path('./preprocessed_images/masks/')
LEISH_OUTPUT_PATH = './patches/leish/'
NO_LEISH_OUTPUT_PATH = './patches/no-leish/'

WITH_LEISH_STRIDE = 12
NO_LEISH_STRIDE = 96
ALPHA = 0.20
WINDOW_SIZE = 96

MIN_RADIUS = 1350 # distance from roi center to right border; obtained by manual measurement of one image example
CONTRAST_FACTOR = 1.5

# ROI Crop, Binary Masks with Parasite-only locations and Contrast Enhancement

In [4]:
def create_binary_mask(image_shape, annotations):
    mask = np.zeros(image_shape, dtype=np.uint8)
    for annotation in annotations:
        x, y = int(float(annotation[5])), int(float(annotation[6]))
        if annotation[3] == "Circle":
            radius = int(np.sqrt((x - int(float(annotation[7])))**2 + (y - int(float(annotation[8])))**2))
            cv2.circle(mask, (x, y), radius, 255, -1)
        elif annotation[3] == "Point":
            mask[y, x] = 255
    return mask

In [90]:
# preprocess step 1: Binary masks creation
# first line, 3 integers: total number of parasites and white blood cells in the image
#                         Second number is the image height
#                         Third number is the image width
# preprocess step 2: Hough Circles and Contrast Enhancement

def draw_parasite_bin_mask(h, w, annotations):
    bin_mask = np.zeros((h, w), dtype=np.uint8)
    parasite_count = 0

    for annotation in annotations:
        if annotation[1] == 'Parasite':
            parasite_count += 1
            x, y = int(float(annotation[5])), int(float(annotation[6])) # x, y coords
            if annotation[3] == 'Circle':
                radius = int(np.sqrt((x - int(float(annotation[7])))**2 + (y - int(float(annotation[8])))**2))
                cv2.circle(bin_mask, (x, y), radius, 255, -1)
            elif annotation[3] == 'Point':
                bin_mask[x, y] = 255

    return bin_mask, parasite_count

total_parasites = 0
for folder in SRC_FOLDER.iterdir():
    if not folder.name.endswith(('GT_updated', '.docx')): # only folders with patient prefix
        for image_file in folder.glob('*.JPG'): # image names
            patient_prefix = folder.name

            gt_file = GT_FOLDER / patient_prefix / (image_file.stem + '.txt')

            if gt_file.exists():

                with open(gt_file, 'r') as f:
                    lines = f.readlines()
                    _, image_height, image_width = map(int, lines[0].strip().split(','))
                    annotations = [line.strip().split(',') for line in lines[1:]]

                image = cv2.imread(str(image_file))
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                image_bin_mask, parasite_count = draw_parasite_bin_mask(image_height, image_width, annotations)
                total_parasites += parasite_count
                circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp=1, minDist=50,
                            param1=200, param2=30, minRadius=MIN_RADIUS, maxRadius=0)

                if circles is not None:
                    min_radius = math.inf
                    min_circle = None
                    circles = np.round(circles[0, :]).astype("int")
                    for (x, y, r) in circles:
                        if r < min_radius:
                            min_radius = r
                            min_circle = (x, y, r)

                    if min_circle is not None:
                        (x, y, r) = min_circle

                        mask = np.zeros_like(gray)
                        cv2.circle(mask, (x, y), r, 255, -1)
                        (x, y, w, h) = cv2.boundingRect(mask)
                        roi = image[y:y+h, x:x+w]
                        cropped_mask = image_bin_mask[y:y+h, x:x+w]

                        img_pil = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
                        contrast = Contrast(img_pil)
                        img_enhanced = contrast.enhance(CONTRAST_FACTOR)

                    out_filename = f"{patient_prefix}-{image_file.name}"
                    img_enhanced.save(DST_FOLDER / out_filename)
                    cv2.imwrite(str(BINMASK_FOLDER / out_filename), cropped_mask)
                else:
                    print(f'No circles found, thus, no saving. Stopped at {image_file}')
            else:
                print(f'No annotation file found for {gt_file}.')


No annotation file found for ThickBloodSmears_150\GT_updated\TF108_256\20170830_153525.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF108_256\20170830_160538.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF10_C65P26\20170612_145535.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF112_CJ10\20170830_203623.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF114_SC518\20170830_220137.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF125_S537\20170831_141701.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF130_S521\20170831_170959.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF133_SW322\20170831_182142.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF134_US28\20170831_202341.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF134_US28\20170831_202628.txt.
No annotation file found for ThickBloodSmears_150\GT_updated\TF141_CT

In [86]:
print(f'Total number of annotated parasites = {total_parasites}')

Total number of annotated objects = 119981


In [91]:
print(f'Total number of annotated parasites = {total_parasites}')

Total number of annotated parasites = 84509


# Patching

In [118]:
def is_alpha(mask_patch, tot_img_area):
    """
    Determines if the Leishmania area in a given mask patch exceeds a certain threshold.

    Parameters:
    - mask_patch (np.array): The patch of the mask image.
    - tot_img_area (int): Total area of the image patch.

    Returns:
    bool: True if the area of Leishmania in the mask patch is greater than or equal to the specified threshold, False otherwise.
    """
    leish_area = regionprops(mask_patch)[0].area
    return (leish_area/tot_img_area) >= ALPHA

def crop(img, mask, img_id, WITH_LEISH_STRIDE, NO_LEISH_STRIDE):
    """
    Crops an image and its corresponding mask into smaller patches and saves the patches with sufficient Leishmania area.

    Parameters:
    - img (np.array): The image to be cropped.
    - mask (np.array): The corresponding mask image.
    - img_id (str): Identifier of the image.
    - WITH_LEISH_STRIDE (int): The stride to use for cropping when Leishmania is present.
    - NO_LEISH_STRIDE (int): The stride to use for cropping when Leishmania is not present.

    Returns:
    None
    """
    x, y = 0,0
    end_h, end_w = img.shape[0], img.shape[1]
    stride = NO_LEISH_STRIDE
    tot_img_area = WINDOW_SIZE * WINDOW_SIZE

    is_looping = True
    while((x + WINDOW_SIZE) <= end_h):
        while((y + WINDOW_SIZE) <= end_w):
            if (x + WINDOW_SIZE) > end_h or (y + WINDOW_SIZE) > end_w:
                is_looping = False
                break

            img_patch = img[x:x+WINDOW_SIZE, y:y+WINDOW_SIZE]
            mask_patch = mask[x:x+WINDOW_SIZE, y:y+WINDOW_SIZE]

            out_name = f'{img_id[0:-4]}-{x}-{y}.png'
            # mask_out_name = f'{img_id[0:-4]}-{x}-{y}.png'
            if np.any(mask_patch == 1):
                has_enough_leish = is_alpha(mask_patch, tot_img_area)
                stride = WITH_LEISH_STRIDE
                if has_enough_leish and img_patch.size > 0:
                    cv2.imwrite(os.path.join(LEISH_OUTPUT_PATH, out_name), img_patch)
                    # cv2.imwrite(LEISH_MASKS_OUTPUT_PATH+mask_out_name, mask_patch)
            else:
                stride = NO_LEISH_STRIDE
                if np.count_nonzero(img_patch == 0) < 0.5 * tot_img_area and img_patch.size > 0: # change 255 to 0
                    cv2.imwrite(os.path.join(NO_LEISH_OUTPUT_PATH, out_name), img_patch)

            y += stride

        x += stride
        y = 0
        if not is_looping:
            break

def dynamic_patcher(
        IMGS_FOLDER_PATH=DST_FOLDER,
        MASKS_FOLDER_PATH=BINMASK_FOLDER,
        WITH_LEISH_STRIDE=WITH_LEISH_STRIDE,
        NO_LEISH_STRIDE=NO_LEISH_STRIDE,
    ):
    """
    Processes a set of images and their corresponding masks, cropping and saving patches based on the presence of Leishmania.

    Parameters:
    - IMGS_FOLDER_PATH (str): Path to the folder containing images. Default from `config` module.
    - MASKS_FOLDER_PATH (str): Path to the folder containing mask images. Default `config` module.
    - WITH_LEISH_STRIDE (int): Stride for cropping images with Leishmania. Default `config` module.
    - NO_LEISH_STRIDE (int): Stride for cropping images without Leishmania. Default `config` module.

    Returns:
    None
    """

    all_imgs = os.listdir(IMGS_FOLDER_PATH)
    all_imgs.remove('masks') # removed masks folder
    all_masks = os.listdir(MASKS_FOLDER_PATH)

    imgs_n_masks = zip(all_imgs, all_masks)
    print('Total images = ', len(all_imgs), '\nTotal masks = ', len(all_masks))

    for img_id, mask_id in tqdm(imgs_n_masks, total=len(all_imgs)):
        img = cv2.imread(os.path.join(IMGS_FOLDER_PATH,img_id))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img = img / 255.0

        mask = cv2.imread(os.path.join(MASKS_FOLDER_PATH,mask_id), 0)
        mask = (mask > 0).astype(int)

        crop(img, mask, img_id, WITH_LEISH_STRIDE, NO_LEISH_STRIDE)

In [119]:
dynamic_patcher()

Total images =  1825 
Total masks =  1825


 40%|███▉      | 728/1825 [16:23<24:42,  1.35s/it]  


KeyboardInterrupt: 

# Downsample

In [10]:
LEISH_OUTPUT_PATH = './patches-reduced/mal/'
NO_LEISH_OUTPUT_PATH = './patches-reduced/no-mal/'
leish_count = os.listdir(LEISH_OUTPUT_PATH)
no_leish_count = os.listdir(NO_LEISH_OUTPUT_PATH)

print(f'Total leish images = {len(leish_count)}\nTotal NO leish images = {len(no_leish_count)}')

Total leish images = 114789
Total NO leish images = 43400


In [5]:
k = 93089 # positive class
w = 224608 # negative class

In [11]:
import random
exclude_pos = random.sample(leish_count, k)
# exclude_neg = random.sample(no_leish_count, w)

for img_path in exclude_pos:
    os.remove(os.path.join(LEISH_OUTPUT_PATH,img_path))

print(f'All {k} images were removed from list of other images.')

# for img_path in exclude_neg:
#     os.remove(os.path.join(NO_LEISH_OUTPUT_PATH,img_path))

# print(f'All {w} images were removed from list of other images.')
# leish_count = os.listdir(LEISH_OUTPUT_PATH)
# no_leish_count = os.listdir(NO_LEISH_OUTPUT_PATH)

print(f'Total mal images = {len(leish_count)}\nTotal NO mal images = {len(no_leish_count)}')

All 93089 images were removed from list of other images.
Total mal images = 114789
Total NO mal images = 43400


In [142]:
# priori_imgs = []

# def is_all_black(img):
#     return cv2.countNonZero(img) == 0

# # Função para verificar se a imagem é totalmente branca
# def is_all_white(img):
#     return cv2.countNonZero(cv2.bitwise_not(img)) == 0

# # Função para verificar se pelo menos 30% dos pixels são pretos
# def has_threshold_black(img):
#     total_pixels = img.shape[0] * img.shape[1]
#     return cv2.countNonZero(img) / total_pixels < 0.7

# for filename in leish_count:
#     img_path = os.path.join(LEISH_OUTPUT_PATH, filename)

#     # Carrega a imagem
#     img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

#     # Verifica se a imagem é totalmente preta
#     if is_all_black(img) or is_all_white(img) or has_threshold_black(img):
#         priori_imgs.append(img_path)

# k = len(no_leish_count) - len(leish_count) * 2
k = 0
for img_path in priori_imgs:
    os.remove(img_path)
    k += 1

print(f'Removed {k} images from list of priority images.')

Removed 19215 images from list of priority images.
