In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras import optimizers
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.applications import VGG16
import os
import subprocess as sbp
from google.colab import files
from zipfile import ZipFile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

In [2]:
!pip install kaggle



In [3]:
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"risingsun20","key":"e28e911d6662461fad8a0914a14d1497"}'}

In [4]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

#change permission
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.20G/5.20G [01:48<00:00, 44.6MB/s]
100% 5.20G/5.20G [01:48<00:00, 51.3MB/s]


In [6]:
file_name = "skin-cancer-mnist-ham10000.zip"

with ZipFile(file_name, 'r') as zip1:
  zip1.extractall()
  print("Done")

Done


In [7]:
path='/content/ham10000_images_part_1'
fol = os.listdir(path)
p2 = '/content/ham10000_images_part_2'

for i in fol:
    p1 = os.path.join(path,i)
    p3 = 'cp -r ' + p1 +' ' + p2+'/.'
    sbp.Popen(p3,shell=True)

In [8]:
data=pd.read_csv("HAM10000_metadata.csv")
data['image_name']=data['image_id']+'.jpg'
X=data[['image_name','dx','lesion_id']]

In [9]:
Y=X.pop('dx').to_frame()
X_train, X_test, y_train, y_test   = train_test_split(X,Y, test_size=0.2, random_state=42)
X_train,X_val,y_train,y_val        = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [10]:
train=pd.concat([X_train,y_train],axis=1)
val=pd.concat([X_val,y_val],axis=1)
test=pd.concat([X_test,y_test],axis=1)

In [11]:
evaluater= LabelEncoder()
evaluater.fit(val['dx'])
name_as_indexes_train=evaluater.transform(val['dx']) 
val['label']=name_as_indexes_train

In [12]:
evaluater=LabelEncoder()
evaluater.fit(test['dx'])
name_as_indexes_test=evaluater.transform(test['dx']) 
test['label']=name_as_indexes_test

In [13]:
train_generator = ImageDataGenerator(rescale = 1./255,
                                     rotation_range=10,  
                                     zoom_range = 0.1, 
                                     width_shift_range=0.1,  height_shift_range=0.1) 

train_data= train_generator.flow_from_dataframe(dataframe=train,x_col="image_name",y_col="dx",
                                                batch_size=64,directory="/content/ham10000_images_part_2",
                                                shuffle=True,class_mode="categorical",target_size=(224,224))

Found 6009 validated image filenames belonging to 7 classes.


In [14]:
test_generator=ImageDataGenerator(rescale = 1./255)

test_data= test_generator.flow_from_dataframe(dataframe=test,x_col="image_name",y_col="dx",
                                              directory="/content/ham10000_images_part_2",
                                              shuffle=False,batch_size=1,class_mode=None,target_size=(224,224))

Found 2003 validated image filenames.


In [15]:
val_data=test_generator.flow_from_dataframe(dataframe=val,x_col="image_name",y_col="dx",
                                            directory="/content/ham10000_images_part_2",
                                            batch_size=64,shuffle=False,class_mode="categorical",target_size=(224,224))

Found 2003 validated image filenames belonging to 7 classes.


In [16]:
learningrate = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=.5, min_lr=0.0001)

In [17]:
checkpoints = ModelCheckpoint('/content/best.h5', verbose=1,save_best_only=True)

In [18]:
vgg_model= VGG16(include_top=False, weights="imagenet", input_shape=(224,224,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [19]:
model= Sequential()
model.add(vgg_model)
model.add(Conv2D(64, (3, 3), activation = 'relu'))
model.add(Dropout(0.40))
model.add(Conv2D(64, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.40))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(7, activation='softmax'))

In [20]:
model.compile(optimizer=optimizers.Adam(lr=0.0001),loss="categorical_crossentropy",metrics=["accuracy"])
model.fit_generator(generator=train_data,
                            steps_per_epoch=train_data.samples//train_data.batch_size,
                            validation_data=val_data,
                            verbose=1,
                            validation_steps=val_data.samples//val_data.batch_size,
                            epochs=100,callbacks=[learningrate,checkpoints])

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.98058, saving model to /content/best.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.98058 to 0.87493, saving model to /content/best.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.87493 to 0.86591, saving model to /content/best.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.86591 to 0.81022, saving model to /content/best.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.81022 to 0.80019, saving model to /content/best.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.80019 to 0.76362, saving model to /content/best.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.76362
Epoch 8/100
Epoch 00008: val_loss improved from 0.76362 to 0.74416, saving model to /content/best.h5
Epoch 9/100
Epoch 00009: val_loss improved from 0.74416 to 0.70559, saving model to /content/best.h5
Epoch 10/100
Epoch 00010: val_loss improved from 0.70559 

<tensorflow.python.keras.callbacks.History at 0x7f100034b0f0>

In [21]:
model.save("skin_model.h5")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: model_e100/assets


In [22]:
test_data.reset()
predictions = model.predict_generator(test_data, steps=test_data.samples/test_data.batch_size,verbose=1)
y_pred= np.argmax(predictions, axis=1)

Instructions for updating:
Please use Model.predict, which supports generators.
