In [1]:
import os
import tensorflow as tf
from keras.models import Model
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Dropout, Flatten
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
cur_path = os.getcwd()

base_skin_dir = os.path.join('E:\\Research\\Skin-Cancer-Classification-Using-CNN-Deep-Learning-Algorithm-master\\archive\\', 'skin_img')

imageid_path_dict = {}

for images in os.listdir(base_skin_dir):
    # check if the image ends with png or jpg or jpeg
    if (images.endswith(".png") or images.endswith(".jpg")
        or images.endswith(".jpeg")):
        # display
        temp = os.path.join(base_skin_dir, images)
        # temp = os.path.splitext(os.path.basename(temp))[0]
        imageid_path_dict[temp] = str(images)

# imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
#                    for x in glob(os.path.join(base_skin_dir, '*.','jpg'))}
# print(imageid_path_dict)

lesion_dict = {     
                  'nv': 'Melanocytic nevi',
                  'mel': 'Melanoma',
                  'bkl': 'Benign keratosis-like lesions ',
                  'bcc': 'Basal cell carcinoma',
                  'akiec': 'Actinic keratoses',
                  'vasc': 'Vascular lesions',
                  'df': 'Dermatofibroma'
              }

In [3]:
dataset = pd.read_csv('E:\\Research\\Skin-Cancer-Classification-Using-CNN-Deep-Learning-Algorithm-master\\archive\\HAM10000_metadata.csv')
temp = imageid_path_dict.keys()
path = []
id = []
for i in temp:
    head_tail = os.path.split(i)
    path.append(str(i))
    id.append(str(head_tail[1]))
dataset['path'] = path
dataset['image_id'] = id 
dataset['cell_type'] = dataset['dx'].map(lesion_dict.get)
dataset['cell_codes'] = pd.Categorical(dataset['cell_type']).codes
dataset['age'].fillna((dataset['age'].mean()), inplace=True)
dataset.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_codes
0,HAM_0000118,ISIC_0024306.jpg,bkl,histo,80.0,male,scalp,E:\Research\Skin-Cancer-Classification-Using-C...,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0024307.jpg,bkl,histo,80.0,male,scalp,E:\Research\Skin-Cancer-Classification-Using-C...,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0024308.jpg,bkl,histo,80.0,male,scalp,E:\Research\Skin-Cancer-Classification-Using-C...,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0024309.jpg,bkl,histo,80.0,male,scalp,E:\Research\Skin-Cancer-Classification-Using-C...,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0024310.jpg,bkl,histo,75.0,male,ear,E:\Research\Skin-Cancer-Classification-Using-C...,Benign keratosis-like lesions,2


In [4]:
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_dataframe(dataframe=dataset,
x_col='path',
y_col='cell_type',
batch_size=15, target_size=(224, 224), class_mode='categorical',subset = 'training')

# validation_generator =  train_datagen.flow_from_dataframe(dataframe=dataset, 
# x_col='path',
# y_col='cell_type',
# batch_size=15, target_size=(224, 224), class_mode='categorical',subset = 'validation')

# test_generator =  test_datagen.flow_from_dataframe(dataframe=test_df, 
# x_col='path',
# y_col='cell_type',
# batch_size=15, target_size=(224, 224), class_mode='categorical')

Found 10015 validated image filenames belonging to 7 classes.


In [5]:
input_shape = (224, 224, 3)
n_classes=7
n_steps = train_generator.samples // 15
# n_val_steps = validation_generator.samples // 15
n_epochs = 3

# VGG16 featture extraction

In [6]:
conv_base_vgg16 = VGG16(include_top=False,
                     weights='imagenet', 
                     input_shape=input_shape)
conv_base_vgg16.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [6]:
from sklearn.preprocessing import LabelEncoder
import h5py

In [7]:
label = train_generator.labels
BATCH_SIZE = 15

In [10]:
features_vgg16 = conv_base_vgg16.predict(train_generator, batch_size=15)



In [11]:
print(type(features_vgg16))
features_vgg16.flatten()
print(features_vgg16.shape)
features_vgg16 = features_vgg16.reshape((features_vgg16.shape[0], 7 * 7 * 512))
print(features_vgg16.shape)

<class 'numpy.ndarray'>
(10015, 7, 7, 512)
(10015, 25088)


In [12]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit vào dữ liệu
scaler.fit(features_vgg16)

# Thực hiện transform scale
scale_features_vgg16 = scaler.transform(features_vgg16)

In [13]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(scale_features_vgg16)
pca_features_vgg16 = pca.transform(scale_features_vgg16)

In [18]:
hf = h5py.File('vgg16_data.h5', 'w')

In [19]:
hf.create_dataset('features', data=pca_features_vgg16, dtype="uint16", compression="gzip")
hf.create_dataset('label', data=label, compression="gzip")
hf.close()

In [21]:
hf = h5py.File('vgg16_data.h5', 'r')
hf.keys()
n1 = hf.get('features')
print(np.array(n1))
n2 = hf.get('label')
print(np.array(n2))
hf.close()

[[ 0  0]
 [ 0  2]
 [ 0 47]
 ...
 [ 1 50]
 [ 0  0]
 [ 0  0]]
[2 2 2 ... 0 0 5]


# VGG19 feature extraction

In [9]:
from keras.applications.vgg19 import VGG19, preprocess_input

In [10]:
conv_base_vgg19 = VGG19(include_top=False,
                weights='imagenet', 
                input_shape=input_shape)

In [11]:
features_vgg19 = conv_base_vgg19.predict(train_generator, batch_size=15)



In [12]:
print(type(features_vgg19))
features_vgg19.flatten()
print(features_vgg19.shape)
features_vgg19 = features_vgg19.reshape((features_vgg19.shape[0], 7 * 7 * 512))
print(features_vgg19.shape)

<class 'numpy.ndarray'>
(10015, 7, 7, 512)
(10015, 25088)


In [15]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit vào dữ liệu
scaler.fit(features_vgg19)
# Thực hiện transform scale
scale_features_vgg19 = scaler.transform(features_vgg19)

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(scale_features_vgg19)
pca_features_vgg19 = pca.transform(scale_features_vgg19)

In [16]:
hf = h5py.File('vgg19_data.h5', 'w')
hf.create_dataset('features', data=pca_features_vgg19, dtype="uint16", compression="gzip")
hf.create_dataset('label', data=label, compression="gzip")
hf.close()

In [17]:
hf = h5py.File('vgg19_data.h5', 'r')
hf.keys()
n1 = hf.get('features')
print(np.array(n1))
n2 = hf.get('label')
print(np.array(n2))
hf.close()

[[ 0  0]
 [ 0  0]
 [ 2 22]
 ...
 [ 0 18]
 [ 0  7]
 [ 0  0]]
[2 2 2 ... 0 0 5]


# Resnet 50 feature extraction

In [18]:
model_resnet50 = tf.keras.applications.ResNet50(include_top=False,

                   input_shape=(180,180,3),

                   pooling='avg',classes=7,

                   weights='imagenet')

In [19]:
train_generator_1 = train_datagen.flow_from_dataframe(dataframe=dataset,
x_col='path',
y_col='cell_type',
batch_size=15, target_size=(180, 180), class_mode='categorical',subset = 'training')

Found 10015 validated image filenames belonging to 7 classes.


In [20]:
features_resnet50 = model_resnet50.predict(train_generator_1, batch_size=BATCH_SIZE)



In [21]:
print(type(features_resnet50))
features_resnet50.flatten()
print(features_resnet50.shape)
# features_resnet50 = features_resnet50.reshape((features_resnet50.shape[0], 7 * 7 * 512))
print(features_resnet50.shape)

<class 'numpy.ndarray'>
(10015, 2048)
(10015, 2048)


In [22]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit vào dữ liệu
scaler.fit(features_resnet50)
# Thực hiện transform scale
scale_features_resnet50 = scaler.transform(features_resnet50)

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(scale_features_resnet50)
pca_features_resnet50 = pca.transform(scale_features_resnet50)

In [23]:
hf = h5py.File('resnet50_data.h5', 'w')
hf.create_dataset('features', data=pca_features_resnet50, dtype="uint16", compression="gzip")
hf.create_dataset('label', data=label, compression="gzip")
hf.close()

In [24]:
hf = h5py.File('resnet50_data.h5', 'r')
hf.keys()
n1 = hf.get('features')
print(np.array(n1))
n2 = hf.get('label')
print(np.array(n2))

[[7 0]
 [4 0]
 [0 0]
 ...
 [0 0]
 [0 5]
 [0 0]]
[2 2 2 ... 0 0 5]


# Alexnet feature extraction

In [25]:
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
import keras

In [26]:
def gen_model():
    # Defines &amp; compiles the model
    classifier=keras.models.Sequential()
    classifier.add(Conv2D(96, kernel_size=(11,11), strides= 4,
                            padding= 'valid', activation= 'relu',
                            input_shape= (227,227,3),
                            kernel_initializer= 'he_normal'))
    classifier.add(MaxPool2D(pool_size=(3,3), strides= (2,2),
                            padding= 'valid', data_format= None))

    classifier.add(Conv2D(256, kernel_size=(5,5), strides= 1,
                    padding= 'same', activation= 'relu',
                    kernel_initializer= 'he_normal'))
    classifier.add(MaxPool2D(pool_size=(3,3), strides= (2,2),
                            padding= 'valid', data_format= None)) 

    classifier.add(Conv2D(384, kernel_size=(3,3), strides= 1,
                    padding= 'same', activation= 'relu',
                    kernel_initializer= 'he_normal'))

    classifier.add(Conv2D(384, kernel_size=(3,3), strides= 1,
                    padding= 'same', activation= 'relu',
                    kernel_initializer= 'he_normal'))

    classifier.add(Conv2D(256, kernel_size=(3,3), strides= 1,
                    padding= 'same', activation= 'relu',
                    kernel_initializer= 'he_normal'))

    classifier.add(MaxPool2D(pool_size=(3,3), strides= (2,2),
                            padding= 'valid', data_format= None))
    return classifier

In [27]:
train_generator_2 = train_datagen.flow_from_dataframe(dataframe=dataset,
x_col='path',
y_col='cell_type',
batch_size=15, target_size=(227, 227), class_mode='categorical',subset = 'training')

Found 10015 validated image filenames belonging to 7 classes.


In [28]:
alexnet_model = gen_model()
features_alexnet = alexnet_model.predict(train_generator_2, batch_size=BATCH_SIZE)



In [31]:
print(type(features_alexnet))
features_alexnet.flatten()
print(features_alexnet.shape)
features_alexnet = features_alexnet.reshape((features_alexnet.shape[0], 6 * 6 * 256))
print(features_alexnet.shape)

<class 'numpy.ndarray'>
(10015, 6, 6, 256)
(10015, 9216)


In [32]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit vào dữ liệu
scaler.fit(features_alexnet)
# Thực hiện transform scale
scale_features_alexnet = scaler.transform(features_alexnet)

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(scale_features_alexnet)
pca_features_alexnet = pca.transform(scale_features_alexnet)

In [33]:
hf = h5py.File('alexnet_data.h5', 'w')
hf.create_dataset('features', data=pca_features_alexnet, dtype="uint16", compression="gzip")
hf.create_dataset('label', data=label, compression="gzip")
hf.close()

In [34]:
hf = h5py.File('alexnet_data.h5', 'r')
hf.keys()
n1 = hf.get('features')
print(np.array(n1))
n2 = hf.get('label')
print(np.array(n2))

[[26  5]
 [48  0]
 [ 0  0]
 ...
 [ 0 22]
 [20  0]
 [ 0  0]]
[2 2 2 ... 0 0 5]
