In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.optimizers import Adam

import numpy as np
import os
import pandas as pd

from sklearn.model_selection import train_test_split

In [2]:
# Load the CSV file
data = pd.read_csv('dataset/train-metadata.csv')

print(data.head())

        isic_id  target  patient_id  age_approx   sex anatom_site_general  \
0  ISIC_0015670       0  IP_1235828        60.0  male     lower extremity   
1  ISIC_0015845       0  IP_8170065        60.0  male           head/neck   
2  ISIC_0015864       0  IP_6724798        60.0  male     posterior torso   
3  ISIC_0015902       0  IP_4111386        65.0  male      anterior torso   
4  ISIC_0024200       0  IP_8313778        55.0  male      anterior torso   

   clin_size_long_diam_mm          image_type tbp_tile_type   tbp_lv_A  ...  \
0                    3.04  TBP tile: close-up     3D: white  20.244422  ...   
1                    1.10  TBP tile: close-up     3D: white  31.712570  ...   
2                    3.40  TBP tile: close-up        3D: XP  22.575830  ...   
3                    3.22  TBP tile: close-up        3D: XP  14.242329  ...   
4                    2.73  TBP tile: close-up     3D: white  24.725520  ...   

    lesion_id  iddx_full  iddx_1  iddx_2  iddx_3  iddx_4  iddx

In [3]:
# Split data into train, validation, and test sets
train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=(1/3), random_state=42)

#print(len(train_data), len(val_data), len(test_data))

In [4]:
image_dir = 'dataset/train-images/'
batch_size = 32

In [5]:
def load_images_and_labels(img_path, label):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = img/255.0 # Normalize pixel values [0,1]
    return img, label

def dataframe_to_dataset(dataframe, image_dir):
    img_paths = dataframe['isic_id'].apply(lambda x: os.path.join(image_dir, x)).values
    labels = dataframe['iddx_full'].values
    dataset = tf.data.Dataset.from_tensor_slices((img_paths, labels))
    dataset = dataset.map(load_images_and_labels, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    return dataset


In [6]:
# Create datasets
train_dataset = dataframe_to_dataset(train_data, image_dir).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = dataframe_to_dataset(val_data, image_dir).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = dataframe_to_dataset(test_data, image_dir).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

#print(len(train_dataset), len(val_dataset), len(test_dataset))

In [7]:
# import ResNet50 model
from tensorflow.keras.applications.resnet50 import ResNet50

In [12]:
#Load ResNet-50 with pre-trained weights

#weights='imagenet' specifies that we want to use the weights that were learned on the ImageNet dataset
# include_top=False removes the fully connected layer at the top of the network
#imput_shape=(224,224,3) specifies the shape of the input image
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))