In [109]:
import imgaug.augmenters as iaa
import imgaug
import os
import pandas as pd
import skimage
from sklearn.utils import shuffle
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from moles import MolesConfig

In [22]:
config = MolesConfig()

In [2]:
# Get all the training files
train_image_files = os.listdir("data/train-src/images/")
train_ids = [f[:12] for f in train_image_files]

In [7]:
# Read in the ground truth file
gt = pd.read_csv("data/ISIC-2017_Training_Part3_GroundTruth.csv")
gt["label"] = (gt["melanoma"] * 1 + gt["seborrheic_keratosis"] * 2) + 1

In [27]:
# Filter gts dataframe to just the training images
train_gt = gt[gt["image_id"].isin(train_ids)]

# And then into each label category
nevus_gt = train_gt[train_gt["label"] == 1]
melanoma_gt = train_gt[train_gt["label"] == 2]
sk_gt = train_gt[train_gt["label"] == 3]

In [142]:
# Number of times to iterate over these image type
nevus_iter = 1
melanoma_iter = 6
sk_iter = 6

In [20]:
# Augmentations to use 
augmentation = iaa.SomeOf((2,5), [
    iaa.Fliplr(1.0),
    iaa.OneOf([
        iaa.GaussianBlur((0, 2.0)),
        iaa.AverageBlur(k=(1, 3)),
        iaa.MedianBlur(k=(3, 5)),
    ]),
    iaa.Flipud(1.0),
    iaa.Rot90((1,3)),
    iaa.CropAndPad(percent=(-0.25, 0.25))
])

In [135]:
def aug_image(image_id, augmentation, label):
    global counter
    
    image = skimage.io.imread("data/train-src/images/{}.jpg".format(image_id))
    image = cv2.resize(image, (config.IMAGE_MIN_DIM, config.IMAGE_MIN_DIM))
    
    mask = skimage.io.imread("data/train-src/masks/{}_segmentation.png".format(image_id))
    mask = cv2.resize(mask, (config.IMAGE_MIN_DIM, config.IMAGE_MIN_DIM))
    
    new_image_id = "ISIC_{:07}".format(counter)
    
    
    MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                    "Fliplr", "Flipud", "CropAndPad",
                    "Affine", "PiecewiseAffine", "Rot90"]
    
    def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTERS
    
    if augmentation:
        det = augmentation.to_deterministic()

        image_aug = det.augment_image(image)

        mask_aug = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))
        skimage.io.imsave("data/train/images/{}.jpg".format(new_image_id), image_aug)
        skimage.io.imsave("data/train/masks/{}_segmentation.png".format(new_image_id), mask_aug)
    else:
        # No augmentation so just write the origincal image and mask
        skimage.io.imsave("data/train/images/{}.jpg".format(new_image_id), image)
        skimage.io.imsave("data/train/masks/{}_segmentation.png".format(new_image_id), mask)
        
    # Update lists of ids and labels as well as the counter
    augmented_X.append(new_image_id)
    augmented_y.append(label)
    counter += 1
    
    #plt.subplot(2,2,1)
    #plt.imshow(image)
    #plt.subplot(2,2,2)
    #plt.imshow(mask)
    #plt.subplot(2,2,3)
    #plt.imshow(image_aug)
    #plt.subplot(2,2,4)
    #plt.imshow(mask_aug)

In [89]:
X = nevus_gt["image_id"].to_list() * nevus_iter + \
    melanoma_gt["image_id"].to_list() * melanoma_iter + \
    sk_gt["image_id"].to_list() * sk_iter

y = nevus_gt["label"].to_list() * nevus_iter + \
    melanoma_gt["label"].to_list() * melanoma_iter + \
    sk_gt["label"].to_list() * sk_iter

In [90]:
X_s, y_s = shuffle(X,y)

In [136]:
augmented_X = []
augmented_y = []
counter = 0

# First make copies of original images without augmentations
for image_id, label in tqdm(list(zip(train_gt["image_id"].to_list(), train_gt["label"].to_list()))):
    aug_image(image_id, None, label)

for image_id, label in tqdm(list(zip(X_s,y_s))):
    aug_image(image_id, augmentation, label)



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1600/1600 [04:26<00:00,  6.01it/s]








100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5208/5208 [14:25<00:00,  6.02it/s]


In [139]:
labels = pd.DataFrame({"image_id": augmented_X, "label": augmented_y})

In [140]:
labels["label"].value_counts()

1.0    3294
2.0    2093
3.0    1421
Name: label, dtype: int64

In [141]:
labels.to_csv("data/train/labels.csv")