In [2]:
# Import the required libraries

import os
import zipfile
import random
import tensorflow as tf
from shutil import copyfile, rmtree, move
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from functools import partial
from matplotlib.image import imread
import matplotlib.image as mpimg
from tensorflow import keras
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPool2D

In [4]:
source_path = '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/source'
source_benign_path = os.path.join(source_path, 'benign')
source_melignant_path = os.path.join(source_path, 'melignant')
data_path = '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/data'
train_path = os.path.join(data_path, 'train')
train_benign_path = os.path.join(train_path, 'benign')
train_melignant_path = os.path.join(train_path, 'melignant')
validation_path = os.path.join(data_path, 'validation')
validation_benign_path = os.path.join(validation_path, 'benign')
validation_melignant_path = os.path.join(validation_path, 'melignant')

# Create new directories for small train and validation data
s_data_path = '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/s_data'
s_train_path = os.path.join(s_data_path, 's_train')
s_train_benign_path = os.path.join(s_train_path, 's_benign')
s_train_melignant_path = os.path.join(s_train_path, 's_melignant')
s_validation_path = os.path.join(s_data_path, 's_validation')
s_validation_benign_path = os.path.join(s_validation_path, 's_benign')
s_validation_melignant_path = os.path.join(s_validation_path, 's_melignant')

In [5]:
len_source_benign = len(os.listdir(source_benign_path))
len_source_melignant = len(os.listdir(source_melignant_path))
len_source_total = len_source_benign + len_source_melignant
source_melignant_benign_ratio =  len_source_melignant/len_source_benign

len_train_benign = len(os.listdir(train_benign_path))
len_train_melignant = len(os.listdir(train_melignant_path))
len_train_total = len_train_benign + len_train_melignant
train_melignant_benign_ratio =  len_train_melignant/len_train_benign

len_validation_benign = len(os.listdir(validation_benign_path))
len_validation_melignant = len(os.listdir(validation_melignant_path))
len_validation_total = len_validation_benign + len_validation_melignant
validation_melignant_benign_ratio =  len_validation_melignant/len_validation_benign

len_s_train_benign = len(os.listdir(s_train_benign_path))
len_s_train_melignant = len(os.listdir(s_train_melignant_path))
len_s_train_total = len_train_benign + len_train_melignant
train_s_melignant_benign_ratio =  len_s_train_melignant/len_s_train_benign

len_s_validation_benign = len(os.listdir(s_validation_benign_path))
len_s_validation_melignant = len(os.listdir(s_validation_melignant_path))
len_s_validation_total = len_validation_benign + len_validation_melignant
validation_s_melignant_benign_ratio =  len_s_validation_melignant/len_s_validation_benign

In [6]:
print('Total Source Benign:', len_source_benign)
print('Total Source Melignant:', len_source_melignant)
print('Source Total:', len_source_total)
print('Source Melignant/Benign:',source_melignant_benign_ratio)

print('\nTotal Train Benign:', len_train_benign)
print('Total Train Melignant:', len_train_melignant)
print('Train Total:', len_train_total)
print('Train Melignant/Benign:', train_melignant_benign_ratio)

print('\nTotal Validation Benign:', len_validation_benign)
print('Total Validation Melignant:', len_validation_melignant)
print('Validation Total:', len_validation_total)
print('Validation Melignant/Benign:',validation_melignant_benign_ratio)

print('\nTotal s_Train Benign:', len_s_train_benign)
print('Total s_Train Melignant:', len_s_train_melignant)
print('Train s_Total:', len_s_train_total)
print('Train s_Melignant/Benign:', train_s_melignant_benign_ratio)

print('\nTotal s_Validation Benign:', len_s_validation_benign)
print('Total s_Validation Melignant:', len_s_validation_melignant)
print('Validation s_Total:', len_s_validation_total)
print('Validation s_Melignant/Benign:',validation_s_melignant_benign_ratio)

Total Source Benign: 32542
Total Source Melignant: 584
Source Total: 33126
Source Melignant/Benign: 0.017946038965029807

Total Train Benign: 26033
Total Train Melignant: 467
Train Total: 26500
Train Melignant/Benign: 0.01793877002266354

Total Validation Benign: 6509
Total Validation Melignant: 117
Validation Total: 6626
Validation Melignant/Benign: 0.01797511138423721

Total s_Train Benign: 2603
Total s_Train Melignant: 46
Train s_Total: 26500
Train s_Melignant/Benign: 0.017671917018824434

Total s_Validation Benign: 651
Total s_Validation Melignant: 12
Validation s_Total: 6626
Validation s_Melignant/Benign: 0.018433179723502304


In [7]:
def calculate_avg_size(source):
    
    dim1 = []
    dim2 = []
    
    for item in os.listdir(source):
        img = os.path.join(source,item)
        img_pixels = imread(img)
        d1, d2, colors = img_pixels.shape
        dim1.append(d1)
        dim2.append(d2)
    
    avg_dim1 = np.mean(dim1)
    avg_dim2 = np.mean(dim2)
    image_shape = (avg_dim1,avg_dim2,3)
    
    return image_shape 

In [8]:
#calculate_avg_size(s_validation_benign_path)

(2676.304147465438, 4032.2273425499234, 3)

In [9]:
input_shape = (300, 300, 3)

In [13]:
# Prepare the data using ImageDataGenerator API from keras and also include data augmentation

s_train_datagen = ImageDataGenerator(rescale = 1./255)

s_train_generator = s_train_datagen.flow_from_directory(
    s_train_path,
    batch_size = 32,
    target_size = input_shape[:2],
    class_mode = 'binary'
) 

s_validation_datagen = ImageDataGenerator(rescale = 1./255)

s_validation_generator = s_validation_datagen.flow_from_directory(
    s_validation_path,
    batch_size = 32,
    target_size = input_shape[:2],
    class_mode = 'binary'
)

Found 2649 images belonging to 2 classes.
Found 663 images belonging to 2 classes.


In [14]:
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, activation='relu', padding="SAME")

model = keras.models.Sequential([
    DefaultConv2D(filters=64, kernel_size=3, input_shape=input_shape),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=1, activation='sigmoid'),
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC()])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 300, 300, 64)      1792      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 150, 150, 64)      0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 150, 150, 128)     73856     
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 150, 150, 128)     147584    
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 75, 75, 128)       0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 75, 75, 256)       295168    
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 75, 75, 256)      

In [16]:
history = model.fit(s_train_generator,
                    epochs=20,
                    validation_data=s_validation_generator)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 83 steps, validate for 21 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
