# SKIN CANCER PREDICTION WITH IMAGES USING HAM10000

This tutorial is based on a self modified HAM10000 Dataset please modify the code according to your specific needs

In [None]:
!pip install tensorflow tensorflowjs pandas matplotlib scikit-learn opencv-python

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from google.colab import drive
import cv2

In [None]:
drive.mount('/content/drive')

!unzip -q '/content/drive/MyDrive/HAM10000/HAM10000_images_part_1.zip' -d ./HAM10000_images
!unzip -q '/content/drive/MyDrive/HAM10000/HAM10000_images_part_2.zip' -d ./HAM10000_images
!unzip -q '/content/drive/MyDrive/HAM10000/ISIC2018_Task3_Test_Images.zip' -d ./ISIC2018_Test_Images

metadata = pd.read_csv('/content/drive/MyDrive/HAM10000/HAM10000_metadata.csv')
test_metadata = pd.read_csv('/content/drive/MyDrive/HAM10000/ISIC2018_Task3_Test_GroundTruth.csv')

image_dir = './HAM10000_images'
metadata['image_path'] = metadata['image_id'].apply(lambda x: os.path.join(image_dir, f'{x}.jpg'))

test_image_dir = './ISIC2018_Test_Images'
test_metadata['image_path'] = test_metadata['image_id'].apply(lambda x: os.path.join(test_image_dir, f'{x}.jpg'))

print(metadata.head())
print(test_metadata.head())

Mounted at /content/drive
     lesion_id      image_id   dx dx_type   age   sex localization  \
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp   
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp   
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp   
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp   
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear   

        dataset                          image_path  
0  vidir_modern  ./HAM10000_images/ISIC_0027419.jpg  
1  vidir_modern  ./HAM10000_images/ISIC_0025030.jpg  
2  vidir_modern  ./HAM10000_images/ISIC_0026769.jpg  
3  vidir_modern  ./HAM10000_images/ISIC_0025661.jpg  
4  vidir_modern  ./HAM10000_images/ISIC_0031633.jpg  
         lesion_id      image_id   dx    dx_type   age     sex localization  \
0  HAMTEST_0000000  ISIC_0034524   nv  follow_up  40.0  female         back   
1  HAMTEST_0000001  ISIC_0034525   nv      histo  70.0    male      abd

In [None]:
!mv /content/HAM10000_images/HAM10000_images_part_1/* /content/HAM10000_images/

In [None]:
!cat /content/HAM10000_images/Normal_Img_1.jpg

In [None]:
IMAGE_SIZE = 128
BATCH_SIZE = 64

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    rotation_range=20
)

train_generator = datagen.flow_from_dataframe(
    dataframe=metadata,
    x_col='image_path',
    y_col='dx',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_dataframe(
    dataframe=metadata,
    x_col='image_path',
    y_col='dx',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_metadata,
    x_col='image_path',
    y_col='dx',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(8, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=14
)

model.save('skin_cancer_model.h5')

In [None]:
import json

class_indices = train_generator.class_indices
with open('class_indices.json', 'w') as json_file:
    json.dump(class_indices, json_file)

# Prediction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import json
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.models import load_model

IMAGE_SIZE = 128

model = load_model('full_model.h5')

with open('class_indices.json', 'r') as json_file:
    class_indices = json.load(json_file)

cancer_descriptions = {
    'akiec': 'Actinic keratoses and intraepithelial carcinoma / Bowen\'s disease',
    'bcc': 'Basal cell carcinoma',
    'bkl': 'Benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses)',
    'df': 'Dermatofibroma',
    'mel': 'Melanoma',
    'nv': 'Melanocytic nevi',
    'vasc': 'Vascular lesions (angiomas, angiokeratomas, pyogenic granulomas, and hemorrhage)',
    'healthy': 'This is healthy skin'
}

def generate_grid_image(image_array, grid_size=30):
    grid = np.zeros((grid_size, grid_size))
    image_shape = image_array.shape[:2]
    step_x = image_shape[0] // grid_size
    step_y = image_shape[1] // grid_size

    for i in range(grid_size):
        for j in range(grid_size):
            grid[i, j] = np.mean(image_array[i*step_x:(i+1)*step_x, j*step_y:(j+1)*step_y])

    return grid

def generate_numerical_grid_image(image_array, threshold=0.5, grid_size=30):
    grid = np.zeros((grid_size, grid_size))
    image_shape = image_array.shape[:2]
    step_x = image_shape[0] // grid_size
    step_y = image_shape[1] // grid_size

    for i in range(grid_size):
        for j in range(grid_size):
            mean_value = np.mean(image_array[i*step_x:(i+1)*step_x, j*step_y:(j+1)*step_y])
            grid[i, j] = 1 if mean_value >= threshold else 0

    return grid

def generate_bounding_box(image_array, threshold=0.5):
    img = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        confidence = 1 if np.mean(image_array[y:y+h, x:x+w]) >= threshold else 0
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255 * confidence, 0), 2)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def generate_confidence_grid(predictions, grid_size):
    confidence_grid = np.zeros((grid_size, grid_size))
    for i in range(grid_size):
        for j in range(grid_size):
            confidence_grid[i, j] = predictions[0, i * grid_size + j] if i * grid_size + j < predictions.shape[1] else 0
    return confidence_grid

def predict_cancer(image_path):
    img = keras_image.load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
    img_array = keras_image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    predictions = model.predict(img_array)
    predicted_index = np.argmax(predictions, axis=1)[0]
    predicted_label = list(class_indices.keys())[predicted_index]
    confidence = predictions[0][predicted_index]

    if predicted_label == 'healthy':
        return predicted_label, confidence, None, None, None, None, img_array

    img = cv2.imread(image_path)
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))

    grid_image = generate_grid_image(img_array[0])
    numerical_grid_image = generate_numerical_grid_image(img_array[0])
    bounding_box_image = generate_bounding_box(img)
    confidence_grid = generate_confidence_grid(predictions, grid_size=numerical_grid_image.shape[0])

    return predicted_label, confidence, grid_image, numerical_grid_image, bounding_box_image, confidence_grid, img

image_path = '/content/ISIC_0034065.jpg'
predicted_label, confidence, grid_image, numerical_grid_image, bounding_box_image, confidence_grid, original_image_array = predict_cancer(image_path)

print(f"Predicted cancer type: {predicted_label}")
print(f"Confidence: {confidence}")
if original_image_array is not None:
    print(f"Description: {cancer_descriptions[predicted_label]}")

    fig, axs = plt.subplots(1, 4, figsize=(20, 20))
    axs[0].imshow(grid_image, cmap='hot', interpolation='nearest')
    axs[0].set_title('30x30 Grid with Markings')
    axs[1].imshow(numerical_grid_image, cmap='binary', interpolation='nearest')
    axs[1].set_title('30x30 Grid Mask')
    axs[2].imshow(cv2.cvtColor(original_image_array, cv2.COLOR_BGR2RGB))
    axs[2].set_title('Original Image')
    axs[3].imshow(bounding_box_image)
    axs[3].set_title('Bounding Marking Area')

    plt.show()