In [102]:
import os
import random
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# Set input and output paths
input_data_path = "/kaggle/input/siim-isic-melanoma-classification/jpeg/train"
labels_csv_path = "/kaggle/input/siim-isic-melanoma-classification/train.csv"  # Ground truth 파일 경로

In [103]:
# 라벨 데이터 로드 및 매핑 설정
df = pd.read_csv(labels_csv_path)
label_map = {1: "Melanoma", 0: "Benign"}
df['label'] = df['target'].map(label_map)  # 'target' 컬럼에 따라 라벨 매핑

In [104]:
df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target,label
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0,Benign
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0,Benign
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0,Benign
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0,Benign
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0,Benign


In [105]:
# 이미지 파일 리스트 생성 및 셔플
file_names = df['image_name'].tolist()
file_names = [os.path.join(input_data_path, f"{name}.jpg") for name in file_names]
random.shuffle(file_names)

# 이미지 파일에 해당하는 라벨 생성
labels = df['label'].tolist()

In [106]:
# 데이터셋 나누기
test_split_ratio = 0.2
val_split_ratio = 0.3

# 인덱스 계산
test_split_index = int(len(file_names) * (1 - test_split_ratio))
val_split_index = int(test_split_index * (1 - val_split_ratio))

# 파일 및 라벨 나누기
train_files = file_names[:val_split_index]
train_labels = labels[:val_split_index]

val_files = file_names[val_split_index:test_split_index]
val_labels = labels[val_split_index:test_split_index]

test_files = file_names[test_split_index:]
test_labels = labels[test_split_index:]

In [107]:
import torch  # PyTorch library
import torch.nn as nn  # Module for neural networks
import torch.optim as optim  # Optimization algorithms
import torchvision.transforms as transforms  # Module for image transformations
from torchvision import datasets  # Image datasets
from torch.utils.data import DataLoader  # Data loaders 
import matplotlib.pyplot as plt  # Visualization tool
import numpy as np  # Mathematical and array manipulation tool
from collections import Counter  # Tool for counting elements
import torchvision.models as models  # Pre-trained models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score  # Evaluation metrics
from sklearn.model_selection import train_test_split  # Dataset splitting
import optuna  # Hyperparameter optimization tool
import os  # Operating system-related tool
import csv  # Tool for handling CSV files

In [108]:
# Plotting the training and validation loss
def draw_train_val_curve(train_losses, val_losses, val_accuracies, val_micro_aurocs):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title("Training and Validation Losses per Epoch")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('train_val_losses_p_epoch.png')
    plt.close()
    plt.clf()

    plt.figure(figsize=(10, 6))
    val_accuracies_cpu = [acc.cpu().numpy() for acc in val_accuracies]
    plt.plot(val_accuracies_cpu, label='Validation Accuracy')
    plt.title('Validation Accuracy per Epoch')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig('val_acc_p_epoch.png')
    plt.close()
    plt.clf()

    plt.figure(figsize=(10, 6))
    plt.plot(val_micro_aurocs, label='Micro-average AUROC (Training)')
    plt.title('Micro-average AUROC per Epoch (Training)')
    plt.xlabel('Epochs')
    plt.ylabel('AUROC')
    plt.legend()
    plt.savefig('val_micro_auroc_p_epoch.png')
    plt.close()
    plt.clf()


In [109]:
torch.set_num_threads(1)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Image_Size = 227 
Num_Epochs = 5
Learning_Rate = 0.01 
Batch_Size = 128
Model_Name = 'Model_name'

In [110]:
train_transform = transforms.Compose([
    transforms.Resize((227, 227)),          
    transforms.ToTensor(), 
])

val_transform = transforms.Compose([
        transforms.Resize((227, 227)), 
        transforms.ToTensor()
])

In [111]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image


# 커스텀 데이터셋 클래스
class CustomImageDataset(Dataset):
    def __init__(self, file_list, labels, transform=None):
        self.file_list = file_list
        self.labels = labels
        self.transform = transform
        self.classes = sorted(list(set(labels)))  # 클래스 이름 추출 및 정렬
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label_idx = self.class_to_idx[label]
        return image, label_idx

# 데이터셋 및 데이터로더 생성
train_dataset = CustomImageDataset(train_files, train_labels, transform=train_transform)
val_dataset = CustomImageDataset(val_files, val_labels, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 클래스 이름 및 개수 확인
class_names = train_dataset.classes
num_classes = len(class_names)
print("Class names:", class_names)
print("Number of classes:", num_classes)

Class names: ['Benign', 'Melanoma']
Number of classes: 2


In [112]:
weights = models.ResNet50_Weights.IMAGENET1K_V1
model = models.resnet50(weights=weights)
classifier = model.fc
last_layer_in_features = classifier.in_features
model.fc = nn.Linear(last_layer_in_features, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = Learning_Rate)

device = torch.device("cuda" if torch.cuda.is_available() else"cpu")
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
import numpy as np
from sklearn.metrics import roc_auc_score

best_val_loss = float('inf')
best_val_acc = float(0.0)
best_val_auroc = float(0.0)
p_acc_counter = 0
p_loss_counter = 0
p_auroc_counter = 0

patience = 300
train_losses = []
val_losses = []
val_accuracies = []
val_micro_aurocs = []
log_file_path = 'training_log.txt'
csv_file_path = 'training_metrics.csv'

for epoch in range(Num_Epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    val_labels = []
    val_probas = []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)

            val_corrects += torch.sum(preds == labels.data)
            val_labels.extend(labels.cpu().numpy())
            val_probas.extend(probabilities.cpu().numpy())

        val_labels = np.array(val_labels)
        val_probas = np.array(val_probas)

        # If labels are one-hot encoded, convert to class indices
        if val_labels.ndim > 1:
            val_labels = np.argmax(val_labels, axis=1)

        train_loss = train_loss / len(train_loader.dataset)
        val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = val_corrects.double() / len(val_loader.dataset)

        # AUROC 계산
        val_auroc = roc_auc_score(val_labels, val_probas[:, 1], average='micro')

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        val_micro_aurocs.append(val_auroc)
        draw_train_val_curve(train_losses, val_losses, val_accuracies, val_micro_aurocs)

    # Write metrics to CSV file
    with open(csv_file_path, 'a', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow([epoch + 1, train_loss, val_auroc, val_loss, val_accuracy])

    # Check for improvement
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'valloss.pth')
        p_loss_counter = 0
    else:
        p_loss_counter += 1

    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        torch.save(model.state_dict(), 'valacc.pth')
        p_acc_counter = 0
    else:
        p_acc_counter += 1
    
    if val_auroc > best_val_auroc:
        best_val_auroc = val_auroc
        torch.save(model.state_dict(), 'auroc.pth')
        p_auroc_counter = 0
    else:
        p_auroc_counter += 1

    # Early stopping check (수정)
    if p_loss_counter >= patience and p_acc_counter >= patience and p_auroc_counter >= patience:
        print("Stopping early due to no improvement in validation metrics.")
        break

    torch.save(model.state_dict(), 'epoch.pth')

    # Log training progress
    with open(log_file_path, 'a') as log_file:
        log_file.write(f"Epoch {epoch+1}/{Num_Epochs}, Train Loss: {train_loss:.4f}, Val auroc: {val_auroc:.4f}, "
                       f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}, "
                       f"patience counter (acc, loss, auroc): {p_acc_counter}, {p_loss_counter}, {p_auroc_counter}\n")