In [1]:
import os
import random
import shutil

# Path folder utama yang akan diproses (ganti sesuai dengan path di Kaggle)
data_path = '/kaggle/input/dataset/dataset'  # Path ke folder input di Kaggle
output_path = '/kaggle/working'  # Folder output untuk menyimpan hasil

subfolders = ['melanoma', 'basal cell carcinoma', 'squamous cell carcinoma', 'melanocytic nevus', 'seborrheic keratosis', 'actinic keratosis']

# Persentase pembagian data
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Path folder tujuan untuk train, val, dan test
train_folder = os.path.join(output_path, 'train')
val_folder = os.path.join(output_path, 'val')
test_folder = os.path.join(output_path, 'test')

# Membuat folder tujuan jika belum ada
for folder in [train_folder, val_folder, test_folder]:
    os.makedirs(folder, exist_ok=True)

# Mendefinisikan ekstensi file gambar yang diterima
image_extensions = ['.jpg', '.jpeg']  # Tambahkan ekstensi lain jika perlu

for subfolder in subfolders:
    # Path folder subfolder (kelas) di folder utama
    subfolder_path = os.path.join(data_path, subfolder)

    # Membuat sub-folder kelas di dalam train, val, dan test
    os.makedirs(os.path.join(train_folder, subfolder), exist_ok=True)
    os.makedirs(os.path.join(val_folder, subfolder), exist_ok=True)
    os.makedirs(os.path.join(test_folder, subfolder), exist_ok=True)

    # Mendapatkan daftar semua gambar di folder kelas
    imgs_list = [filename for filename in os.listdir(subfolder_path)
                 if os.path.splitext(filename)[-1].lower() in image_extensions]

    # Mengacak daftar gambar dengan seed agar konsisten
    random.seed(42)
    random.shuffle(imgs_list)

    # Menentukan jumlah gambar untuk train, val, dan test
    train_size = int(len(imgs_list) * train_ratio)
    val_size = int(len(imgs_list) * val_ratio)
    test_size = len(imgs_list) - train_size - val_size  # Menghitung test_size dengan sisa

    # Menyalin gambar ke folder tujuan
    for i, img_name in enumerate(imgs_list):
        src_path = os.path.join(subfolder_path, img_name)

        if i < train_size:
            dest_folder = os.path.join(train_folder, subfolder)
        elif i < train_size + val_size:
            dest_folder = os.path.join(val_folder, subfolder)
        else:
            dest_folder = os.path.join(test_folder, subfolder)

        shutil.copy(src_path, os.path.join(dest_folder, img_name))

print("Pembagian dataset selesai!")


Pembagian dataset selesai!


In [2]:
import os
# Tentukan path ke folder dataset
base_dir = ('/kaggle/working')
print("Contents of base directory:")
print(os.listdir(base_dir))

print("Contents of base directory:")
print(os.listdir(base_dir))

print("\nContents of train directory:")
print(os.listdir(f'{base_dir}/train'))

print("\nContents of validation directory:")
print(os.listdir(f'{base_dir}/val'))

Contents of base directory:
['train', '.virtual_documents', 'val', 'test']
Contents of base directory:
['train', '.virtual_documents', 'val', 'test']

Contents of train directory:
['seborrheic keratosis', 'actinic keratosis', 'basal cell carcinoma', 'squamous cell carcinoma', 'melanocytic nevus', 'melanoma']

Contents of validation directory:
['seborrheic keratosis', 'actinic keratosis', 'basal cell carcinoma', 'squamous cell carcinoma', 'melanocytic nevus', 'melanoma']


In [3]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Directory with training melanoma/bcc/scc pictures
train_melanoma_dir = os.path.join(train_dir, 'melanoma')
train_bcc_dir = os.path.join(train_dir, 'basal cell carcinoma')
train_scc_dir = os.path.join(train_dir, 'squamous cell carcinoma')
train_nv_dir = os.path.join(train_dir, 'melanocytic nevus')
train_sk_dir = os.path.join(train_dir, 'seborrheic keratosis')
train_ak_dir = os.path.join(train_dir, 'actinic keratosis')

# Directory with validation melanoma/bcc/scc pictures
validation_melanoma_dir = os.path.join(validation_dir, 'melanoma')
validation_bcc_dir = os.path.join(validation_dir, 'basal cell carcinoma')
validation_scc_dir = os.path.join(validation_dir, 'squamous cell carcinoma')
validation_nv_dir = os.path.join(validation_dir, 'melanocytic nevus')
validation_sk_dir = os.path.join(validation_dir, 'seborrheic keratosis')
validation_ak_dir = os.path.join(validation_dir, 'actinic keratosis')

# Directory with test melanoma/bcc/scc pictures

test_melanoma_dir = os.path.join(test_dir, 'melanoma')
test_bcc_dir = os.path.join(test_dir, 'basal cell carcinoma')
test_scc_dir = os.path.join(test_dir, 'squamous cell carcinoma')
test_nv_dir = os.path.join(test_dir, 'melanocytic nevus')
test_sk_dir = os.path.join(test_dir, 'seborrheic keratosis')
test_ak_dir = os.path.join(test_dir, 'actinic keratosis')

In [4]:
print('total training melanoma images :', len(os.listdir(      train_melanoma_dir ) ))
print('total training basal cell carcinoma images :', len(os.listdir(      train_bcc_dir ) ))
print('total training squamous cell carcinoma images :', len(os.listdir(      train_scc_dir ) ))
print('total training melanocytic nevus images :', len(os.listdir(      train_nv_dir ) ))
print('total training seborrheic keratosis images :', len(os.listdir(      train_sk_dir ) ))
print('total training actinic keratosis images :', len(os.listdir(      train_ak_dir ) ))


print('total validation melanoma images :', len(os.listdir( validation_melanoma_dir ) ))
print('total validation basal cell carcinoma images :', len(os.listdir( validation_bcc_dir ) ))
print('total validation squamous cell carcinoma images :', len(os.listdir( validation_scc_dir)))
print('total validation melanocytic nevus images :', len(os.listdir( validation_nv_dir)))
print('total validation seborrheic keratosis images :', len(os.listdir( validation_sk_dir)))
print('total validation actinic keratosis images :', len(os.listdir( validation_ak_dir)))

print('total test melanoma images :', len(os.listdir( test_melanoma_dir ) ))
print('total test basal cell carcinoma images :', len(os.listdir( test_bcc_dir ) ))
print('total test squamous cell carcinoma images :', len(os.listdir( test_scc_dir)))
print('total test melanocytic nevus images :', len(os.listdir( test_nv_dir)))
print('total test seborrheic keratosis images :', len(os.listdir( test_sk_dir)))
print('total test actinic keratosis images :', len(os.listdir( test_ak_dir)))

total training melanoma images : 5253
total training basal cell carcinoma images : 3558
total training squamous cell carcinoma images : 1011
total training melanocytic nevus images : 5299
total training seborrheic keratosis images : 1388
total training actinic keratosis images : 984
total validation melanoma images : 1125
total validation basal cell carcinoma images : 762
total validation squamous cell carcinoma images : 216
total validation melanocytic nevus images : 1135
total validation seborrheic keratosis images : 297
total validation actinic keratosis images : 210
total test melanoma images : 1127
total test basal cell carcinoma images : 764
total test squamous cell carcinoma images : 218
total test melanocytic nevus images : 1136
total test seborrheic keratosis images : 298
total test actinic keratosis images : 212


In [5]:
# Menghitung total gambar untuk setiap bagian dataset
total_train_images = (len(os.listdir(train_melanoma_dir)) +
                      len(os.listdir(train_bcc_dir)) +
                      len(os.listdir(train_scc_dir)) +
                      len(os.listdir(train_nv_dir)) +
                      len(os.listdir(train_sk_dir)) +
                      len(os.listdir(train_ak_dir)))

total_validation_images = (len(os.listdir(validation_melanoma_dir)) +
                           len(os.listdir(validation_bcc_dir)) +
                           len(os.listdir(validation_scc_dir)) +
                           len(os.listdir(validation_nv_dir)) +
                           len(os.listdir(validation_sk_dir)) +
                           len(os.listdir(validation_ak_dir)))

total_test_images = (len(os.listdir(test_melanoma_dir)) +
                     len(os.listdir(test_bcc_dir)) +
                     len(os.listdir(test_scc_dir)) +
                     len(os.listdir(test_nv_dir)) +
                     len(os.listdir(test_sk_dir)) +
                     len(os.listdir(test_ak_dir)))

# Menghitung total gambar keseluruhan
total_images = total_train_images + total_validation_images + total_test_images

# Menampilkan hasil
print('Total training images:', total_train_images)
print('Total validation images:', total_validation_images)
print('Total test images:', total_test_images)
print('Total images overall:', total_images)


Total training images: 17493
Total validation images: 3745
Total test images: 3755
Total images overall: 24993


In [6]:
import keras_cv
import keras_core as keras
import numpy as np

Using TensorFlow backend


In [7]:
# Memuat backbone
efficientnet = keras_cv.models.EfficientNetV2Backbone.from_preset(
    "efficientnetv2_b0_imagenet", 
    input_shape=(224, 224, 3)
)

In [8]:
efficientnet.summary()

In [9]:
print(len(efficientnet.layers))

29


In [10]:
# Mengatur backbone agar dapat dilatih
efficientnet.trainable = True

In [11]:
last_output = efficientnet.output

In [12]:
print(last_output)

<KerasTensor shape=(None, 7, 7, 1280), dtype=float32, sparse=False, name=keras_tensor_28>


In [13]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, Model

# Menambahkan Global Average Pooling dan Dense layer untuk klasifikasi
x = layers.GlobalAveragePooling2D()(last_output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(6, activation='softmax')(x)

# Append the dense network to the base model
model = Model(efficientnet.input, x)

# Print the model summary. See your dense network connected at the end.
model.summary()

In [14]:
# Set the training parameters
model.compile(optimizer = Adam(learning_rate=0.0007),
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

In [15]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input

# Tambahkan parameter augmentasi data ke ImageDataGenerator untuk data pelatihan
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Preprocessing yang cocok untuk EfficientNetV2
    rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest'
)

# Data validasi hanya menggunakan preprocessing tanpa augmentasi
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Mengalirkan gambar pelatihan dalam batch menggunakan generator train_datagen
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'  # Sesuaikan dengan jumlah kelas
)

# Mengalirkan gambar validasi dalam batch menggunakan generator test_datagen
validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'  # Sesuaikan dengan jumlah kelas
)

Found 17493 images belonging to 6 classes.
Found 3745 images belonging to 6 classes.


In [16]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Inisialisasi ModelCheckpoint
checkpoint = ModelCheckpoint(
    'best_model.keras',          # Nama file untuk model terbaik
    monitor='val_accuracy',    # Metode evaluasi (val_accuracy)
    save_best_only=True,       # Hanya menyimpan model terbaik
    mode='max',                # Mode 'max' untuk akurasi, 'min' untuk loss
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

In [17]:
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
# Instantiate ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',    # Mengawasi validation loss
    factor=0.5,            # Menurunkan learning rate sebesar 20%
    patience=2,            # Menunggu 5 epoch tanpa perbaikan
    min_lr=1e-6,           # Learning rate minimum
    verbose=1              # Menampilkan informasi tentang penurunan learning rate
)


In [18]:
###### Train the model.
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=15,
    verbose=1,
    callbacks=[checkpoint, early_stopping, reduce_lr]  # Menambahkan ReduceLROnPlateau ke callback
)

Epoch 1/15


  self._warn_if_super_not_called()
I0000 00:00:1731647042.246520      97 service.cc:145] XLA service 0x782860003890 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731647042.246582      97 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1731647042.246588      97 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5

I0000 00:00:1731647111.406893      97 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956ms/step - accuracy: 0.5922 - loss: 1.0805
Epoch 1: val_accuracy improved from -inf to 0.67744, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m708s[0m 1s/step - accuracy: 0.5923 - loss: 1.0802 - val_accuracy: 0.6774 - val_loss: 0.8822 - learning_rate: 7.0000e-04
Epoch 2/15
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 826ms/step - accuracy: 0.7120 - loss: 0.7943
Epoch 2: val_accuracy improved from 0.67744 to 0.74419, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 928ms/step - accuracy: 0.7120 - loss: 0.7942 - val_accuracy: 0.7442 - val_loss: 0.7107 - learning_rate: 7.0000e-04
Epoch 3/15
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 831ms/step - accuracy: 0.7507 - loss: 0.6951
Epoch 3: val_accuracy improved from 0.74419 to 0.74499, saving model to best_model.keras
[1m547/547[0m [

In [19]:
from tensorflow.keras.models import load_model

# Load model yang disimpan
model = load_model('best_model.keras')


In [20]:
test_generator = test_datagen.flow_from_directory(
    test_dir,                   # Folder data test
    target_size=(224, 224),     # Ukuran gambar yang sama dengan input model
    batch_size=32,              # Ukuran batch (sesuai kebutuhan)
    class_mode='categorical',   # Mendapatkan label asli dalam bentuk one-hot encoding
    shuffle=False               # Jangan acak agar urutan prediksi sesuai dengan file
)


Found 3755 images belonging to 6 classes.


In [21]:
# Melakukan prediksi pada data test
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)  # Mendapatkan kelas prediksi

# Mendapatkan kelas sebenarnya
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())  # Mendapatkan label kelas

# Menghitung akurasi secara manual atau menggunakan classification report
from sklearn.metrics import accuracy_score, classification_report

# Akurasi
accuracy = accuracy_score(true_classes, predicted_classes)
print(f'Akurasi manual pada data test: {accuracy:.2f}')

# Classification report untuk metrik lainnya
print(classification_report(true_classes, predicted_classes, target_names=class_labels))


[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 507ms/step
Akurasi manual pada data test: 0.81
                         precision    recall  f1-score   support

      actinic keratosis       0.61      0.33      0.43       212
   basal cell carcinoma       0.77      0.86      0.81       764
      melanocytic nevus       0.91      0.95      0.93      1136
               melanoma       0.83      0.85      0.84      1127
   seborrheic keratosis       0.65      0.57      0.61       298
squamous cell carcinoma       0.58      0.45      0.51       218

               accuracy                           0.81      3755
              macro avg       0.72      0.67      0.69      3755
           weighted avg       0.80      0.81      0.80      3755

