In [18]:
import tensorflow as tf 
import numpy as np
import pandas as pd
import datetime, os
%load_ext tensorboard


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [19]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

In [20]:
data = pd.read_csv("/Users/xxx/Documents/Jedha/final_project/Data/HAM10000_metadata.csv")
data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [21]:
data.shape

(10015, 7)

In [22]:
data["image_id"] = data["image_id"].astype("str") + ".jpg"
data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419.jpg,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030.jpg,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769.jpg,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661.jpg,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633.jpg,bkl,histo,75.0,male,ear


In [23]:
#Rows suffle (because the values in the target column "dx" are grouped by value, it creates a bias)
data = data.sample(frac=1,random_state=0).reset_index(drop=True)

In [24]:
data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0002695,ISIC_0028664.jpg,nv,follow_up,45.0,male,back
1,HAM_0000370,ISIC_0025998.jpg,nv,follow_up,70.0,male,trunk
2,HAM_0006372,ISIC_0032817.jpg,nv,histo,35.0,male,back
3,HAM_0006835,ISIC_0026577.jpg,nv,histo,75.0,male,chest
4,HAM_0005536,ISIC_0026798.jpg,bcc,histo,45.0,male,lower extremity


In [65]:
diagnostics = data.dx.unique()

In [72]:
diagnostics

array(['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc'], dtype=object)

In [70]:
sorted = diagnostics.sort()

In [71]:
print(sorted)

None


In [25]:
data.shape

(10015, 7)

In [26]:
#Check of missing values
print(100*data.isna().sum()/data.shape[0])

lesion_id       0.000000
image_id        0.000000
dx              0.000000
dx_type         0.000000
age             0.569146
sex             0.000000
localization    0.000000
dtype: float64


In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_generator = ImageDataGenerator(
    rotation_range=180,
    horizontal_flip=True,
    vertical_flip=True, 
    rescale=1/255., 
    validation_split=0.3
    )

In [28]:
train_generator = img_generator.flow_from_dataframe(
    dataframe=data,
    directory="/Users/xxx/Documents/Jedha/final_project/Data/HAM10000_images",
    x_col="image_id", 
    y_col="dx",
    target_size=(128,128),
    class_mode = "sparse", 
    batch_size=200, 
    shuffle = True,
    seed=0,
    subset = "training"
)

val_generator = img_generator.flow_from_dataframe(
    dataframe=data,
    directory="/Users/xxx/Documents/Jedha/final_project/Data/HAM10000_images",
    x_col="image_id",
    y_col="dx",
    target_size=(128,128),
    class_mode = "sparse", 
    batch_size=200, 
    shuffle = True,
    seed=0,
    subset = "validation"
)

Found 7011 validated image filenames belonging to 7 classes.
Found 3004 validated image filenames belonging to 7 classes.


In [29]:
imgs, targets = next(iter(train_generator))

In [30]:
imgs.shape

(200, 128, 128, 3)

In [31]:
from tensorflow.keras.layers import Conv2D, MaxPool2D

modelconv = tf.keras.Sequential(
    [   
        Conv2D(
            filters = 32, 
            kernel_size = (3,3), 
            strides = 1, 
            padding = "same",
            activation = "relu", 
            input_shape = (imgs.shape[1], imgs.shape[2], imgs.shape[3])
        ), # the input shape (height, width, channels)
     MaxPool2D(
         pool_size=2, # the size of the pooling window
         strides=2
     ), # the movement of the pooling on the input
     Conv2D(
         filters = 64, 
         kernel_size = (3,3), 
         strides = 1, 
         padding = "same",
         activation = "relu"
     ),
     MaxPool2D(2,2),
     Conv2D(
         filters = 128, 
         kernel_size = (3,3), 
         strides = 1, 
         padding = "same",
         activation = "relu"
     ),
     MaxPool2D(2,2),
     Conv2D(
         filters = 256, 
         kernel_size = (3,3), 
         strides = 1, 
         padding = "same",
         activation = "relu"
     ),
     MaxPool2D(2,2),
     tf.keras.layers.Flatten(), # this layer turns multi-dimensional images into flat objects
     tf.keras.layers.Dense(len(pd.unique(data['dx'])),"softmax")
     
    ]
)

In [32]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras.optimizers import Adam

modelconv.compile(optimizer=Adam(),
              loss=SparseCategoricalCrossentropy(),
              metrics=[SparseCategoricalAccuracy()])

In [33]:
modelconv.fit(train_generator, epochs = 2, validation_data=val_generator)

Epoch 1/2


2021-11-18 17:43:35.282083: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2021-11-18 17:44:26.134235: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


<keras.callbacks.History at 0x17b062bb0>

In [34]:
#tf.keras.models.save_model(modelconv,'skin_model.hdf5')

In [42]:
img = Image.open(img_path)
img = img.resize((12,image_size))
img = np.array(img)
img = img / 255.0
img = img.reshape(1,image_size,image_size,3)
m.predict_classes(img)

In [46]:
from PIL import Image

In [54]:
img = Image.open("/Users/xxx/Desktop/ISIC_0024347.jpg")
img = img.resize((128,128))
img = np.array(img)
img = img / 255.0
img = img.reshape(1,128,128,3)
modelconv.predict(img)

2021-11-18 21:16:22.399770: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


array([[0.00657241, 0.00373647, 0.01245109, 0.00157609, 0.08168465,
        0.8885637 , 0.00541566]], dtype=float32)

In [57]:
predict = modelconv.predict(img)

In [58]:
predict

array([[0.00657241, 0.00373647, 0.01245109, 0.00157609, 0.08168465,
        0.8885637 , 0.00541566]], dtype=float32)

In [61]:
(modelconv.predict(img) > 0.5).astype("int32")

array([[0, 0, 0, 0, 0, 1, 0]], dtype=int32)

In [59]:

np.argmax(predict,axis=1)

array([5])

In [62]:
modelconv.predict_classes(img)

AttributeError: 'Sequential' object has no attribute 'predict_classes'