<a href="https://colab.research.google.com/github/virgiawan/yog2a-melanoma/blob/master/melanoma_reizkian_ipnyb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MELANOMA PREDICTION, mnist dataset**


## Load and prepare data

dataset from 
https://www.kaggle.com/kmader/skin-cancer-mnist-ham10000

In [0]:
import csv
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [0]:
from google.colab import files
files.upload() # upload kaggle.json

In [0]:
# CODE: enabling import data from kaggle
# Install kaggle library 
!pip install -q kaggle
# Make ".kaggle" directory in root directory
!mkdir -p ~/.kaggle
# Copy the API token to the kaggle directory
!cp kaggle.json ~/.kaggle/
# Check the directory
!ls ~/.kaggle
# Adjust access permissions
!chmod 600 /root/.kaggle/kaggle.json

In [0]:
# Download the data
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

In [0]:
# unzip data
!unzip -q skin-cancer-mnist-ham10000.zip -d .
!ls

In [0]:
meta_file = 'HAM10000_metadata.csv'
bw_file = 'hmnist_28_28_L.csv'        # black white dataset
rgb_file = 'hmnist_28_28_RGB.csv'     # rgb dataset

In [0]:
# BLACK and WHITE extract pixel and labels from csv
with open(bw_file, newline='') as opened_file: #images
  reader = csv.reader(opened_file)
  bw_images_dataset=[]
  for row in reader:
    img = row[0:784]
    img2d = np.array_split(img,28)
    bw_images_dataset.append(img2d)
  bw_images_dataset = bw_images_dataset[1:]
  bw_images_dataset = np.array(bw_images_dataset, dtype=np.int)

with open(bw_file, newline='') as opened_file: #labels
  reader = csv.reader(opened_file)
  bw_labels_dataset = []
  for row in reader:
      label=row[784]
      bw_labels_dataset.append(label)
  bw_labels_dataset = bw_labels_dataset[1:]
  bw_labels_dataset = np.array(bw_labels_dataset, dtype=np.int)
  bw_labels = bw_labels_dataset

# 10015 images, 28x28 dimension, BW
print(bw_images_dataset.shape)
print(bw_labels.shape)

In [0]:
# RGB extract pixel and labels from csv
with open(rgb_file, newline='') as opened_file: #images
  reader = csv.reader(opened_file)
  rgb_images_dataset=[]
  for row in reader:
    r=np.array(row[0:784])
    g=np.array(row[784:1568])
    b=np.array(row[1568:2352])
    R = np.array_split(r,28)
    G = np.array_split(r,28)
    B = np.array_split(r,28)
    image = np.dstack((R,G,B))
    rgb_images_dataset.append(image)
  rgb_images_dataset = rgb_images_dataset[1:]
  rgb_images_dataset = np.array(rgb_images_dataset, dtype=np.int)

with open(rgb_file, newline='') as opened_file: #labels
  reader = csv.reader(opened_file)
  rgb_labels_dataset = []
  for row in reader:
      label=row[2353]
      rgb_labels_dataset.append(label)
  rgb_labels_dataset = rgb_labels_dataset[1:]
  rgb_labels_dataset = np.array(rgb_labels_dataset, dtype=np.int)
  rgb_labels = rgb_labels_dataset

# 10015 images, 28x28 dimension, RGB
print(rgb_images_dataset.shape)
print(rgb_labels.shape)

In [0]:
train_fraction = 0.7 # fraction frow whole images dataset choosed for training BW

# train dataset BW
trainImagesBW = bw_images_dataset[:int(bw_images_dataset.shape[0]*train_fraction)][:][:]
trainLabelsBW = bw_labels_dataset[:int(bw_labels_dataset.shape[0]*train_fraction)][:][:]
# test dataset BW
testImagesBW = bw_images_dataset[int(bw_images_dataset.shape[0]*train_fraction):][:][:]
testLabelsBW = bw_labels_dataset[int(bw_labels_dataset.shape[0]*train_fraction):][:][:]

print(trainImages.shape)
print(trainLabels.shape)
print(testImages.shape)
print(testLabels.shape)

In [0]:
train_fraction = 0.7 # fraction frow whole images dataset choosed for training RGB

# train dataset BW
trainImagesRGB = rgb_images_dataset[:int(rgb_images_dataset.shape[0]*train_fraction)][:][:][:]
trainLabelsRGB = rgb_labels_dataset[:int(rgb_labels_dataset.shape[0]*train_fraction)]
# test dataset BW
testImagesRGB = rgb_images_dataset[int(rgb_images_dataset.shape[0]*train_fraction):][:][:][:]
testLabelsRGB = rgb_labels_dataset[int(rgb_labels_dataset.shape[0]*train_fraction):]

print(trainImagesRGB.shape)
print(trainLabelsRGB.shape)
print(testImagesRGB.shape)
print(testLabelsRGB.shape)

In [0]:
# expanding dimension for the sake of image preporcessing BW
trainImages = np.expand_dims(trainImages, axis=3)
testImages = np.expand_dims(testImages, axis=3)
print(trainImages.shape)
print(testImages.shape)

## Images preprocessing

In [0]:
# Create an ImageDataGenerator and do Image Augmentation
train_datagen = ImageDataGenerator(
    # Your Code Here
    rescale= 1./255,
    rotation_range = 40,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
    )

validation_datagen = ImageDataGenerator(
    # Your Code Here
    rescale = 1./255
    )

## Build Model

In [0]:
from tensorflow.keras.callbacks import Callback, EarlyStopping

class myCallback(Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.95):
            print("\nReached 95% accuracy so cancelling training!")
            self.model.stop_training = True

# callbacks = myCallback()
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

In [0]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (2,2), activation='relu', input_shape=(28, 28, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax')
    ])

# Compile Model. 
model.compile(loss = 'sparse_categorical_crossentropy', 
              optimizer='rmsprop', 
              metrics=['accuracy']
              # Your Code Here
             )

# Train the Model RGB
history = model.fit_generator(
            train_datagen.flow(trainImagesRGB,trainLabelsRGB, batch_size=20),
            validation_data = (testImagesRGB,testLabelsRGB),
            epochs = 50,
            verbose=2,
            callbacks=[early_stopping])

In [0]:
# Plot the chart for accuracy and loss on both training and validation
%matplotlib inline
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()