## module 

In [None]:
!pip install tensorflow scikit-learn pandas matplotlib seaborn opencv-python

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2, EfficientNetB0, ResNet50, DenseNet121
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

## dataset 🔧

In [None]:
# dataset split
import os, shutil
from sklearn.model_selection import train_test_split

# 🔧 데이터셋 원본 경로 (클래스별 디렉토리만 존재)
ORIGINAL_DATASET_DIR = "/your_data_root"  # 🔧 수정 필요
BASE_OUTPUT_DIR = "/split_dataset"        # 🔧 분할된 결과 저장 경로

# 비율 설정
train_ratio, val_ratio, test_ratio = 0.7, 0.15, 0.15

# 경로 초기화
if not os.path.exists(BASE_OUTPUT_DIR):
    os.makedirs(BASE_OUTPUT_DIR)

for cls in os.listdir(ORIGINAL_DATASET_DIR):
    cls_path = os.path.join(ORIGINAL_DATASET_DIR, cls)
    if not os.path.isdir(cls_path):
        continue

    images = os.listdir(cls_path)
    train_imgs, temp_imgs = train_test_split(images, train_size=train_ratio, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=test_ratio/(val_ratio+test_ratio), random_state=42)

    for category, category_imgs in zip(['train', 'val', 'test'], [train_imgs, val_imgs, test_imgs]):
        save_path = os.path.join(BASE_OUTPUT_DIR, category, cls)
        os.makedirs(save_path, exist_ok=True)
        for img in category_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(save_path, img))

print("✅ 데이터셋 분할 완료. 경로:", BASE_OUTPUT_DIR)


In [None]:
# mapping preprocess per model
preprocess_map = {
    "MobileNetV2": mobilenet_preprocess,
    "EfficientNetB0": efficientnet_preprocess,
    "ResNet50": resnet_preprocess,
    "DenseNet121": densenet_preprocess
}

In [None]:
# data generators with preprocessing
def get_data_generators(model_name, data_dir, img_size=(224, 224), batch_size=32):
    preprocess_func = preprocess_map[model_name]
    
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_func,
        rotation_range=15,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        validation_split=0.2
    )

    val_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_func,
        validation_split=0.2
    )

    train_gen = train_datagen.flow_from_directory(
        os.path.join(data_dir, "train"),
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training'
    )

    val_gen = val_datagen.flow_from_directory(
        os.path.join(data_dir, "train"),
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation'
    )

    test_gen = ImageDataGenerator(preprocessing_function=preprocess_func).flow_from_directory(
        os.path.join(data_dir, "test"),
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    return train_gen, val_gen, test_gen

## base model

In [None]:
def build_model(base, input_shape, n_classes):
    input_tensor = Input(shape=input_shape + (3,))
    base_model = base(include_top=False, weights='imagenet', input_tensor=input_tensor)
    x = GlobalAveragePooling2D()(base_model.output)
    output = Dense(n_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model


## train

### train setting 🔧

In [None]:
# 🔧 실행할 모델 이름 설정
model_name = "MobileNetV2"
base_models = {
    "MobileNetV2": MobileNetV2,
    "EfficientNetB0": EfficientNetB0,
    "ResNet50": ResNet50,
    "DenseNet121": DenseNet121
}

### traing and evaluation

In [None]:
train_gen, val_gen, test_gen = get_data_generators(model_name, BASE_OUTPUT_DIR)
input_shape = (224, 224)
n_classes = train_gen.num_classes

model = build_model(base_models[model_name], input_shape, n_classes)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

ckpt = ModelCheckpoint(f'{model_name}.h5', save_best_only=True, monitor='val_accuracy')
early = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=30,
    callbacks=[ckpt, early]
)

# 평가
test_gen.reset()
pred_probs = model.predict(test_gen)
y_true = test_gen.classes
y_pred = np.argmax(pred_probs, axis=1)
class_names = list(test_gen.class_indices.keys())

report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
cm = confusion_matrix(y_true, y_pred)


## visualization, result save

In [None]:
# confusion matrix, train history, classification report
def plot_confusion_matrix(cm, class_names, title, save_path=None):
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(title)
    if save_path:
        plt.savefig(save_path)
    plt.show()

def plot_train_history(history, title_prefix, save_path=None):
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.title(f'{title_prefix} Accuracy')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title(f'{title_prefix} Loss')
    plt.legend()
    if save_path:
        plt.savefig(save_path)
    plt.show()

def save_results(model_name, history, cm, class_names, report):
    result_dir = f"result/{model_name}-result"
    os.makedirs(result_dir, exist_ok=True)

    plot_confusion_matrix(cm, class_names, title=f'{model_name} Confusion Matrix',
                          save_path=os.path.join(result_dir, "confusion_matrix.png"))
    plot_train_history(history, title_prefix=model_name,
                       save_path=os.path.join(result_dir, "accuracy_loss.png"))
    with open(os.path.join(result_dir, "classification_report.json"), "w") as f:
        json.dump(report, f, indent=4)


In [None]:
# top-3 accuracy

def show_top_misclassified(y_true, y_pred, probs, class_names, generator, model_name, save_dir, top_n=3):
    error_indices = np.where(y_true != y_pred)[0]
    confidence_errors = probs[error_indices, y_pred[error_indices]]
    sorted_idx = error_indices[np.argsort(confidence_errors)[-top_n:][::-1]]

    os.makedirs(save_dir, exist_ok=True)

    for i, idx in enumerate(sorted_idx):
        img_path = generator.filepaths[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.figure(figsize=(4, 4))
        plt.imshow(img)
        plt.title(
            f"True: {class_names[y_true[idx]]}\nPred: {class_names[y_pred[idx]]}\nConf: {confidence_errors[i]:.2f}"
        )
        plt.axis('off')

        save_path = os.path.join(save_dir, f"misclassified_{i+1}.png")
        plt.savefig(save_path)
        plt.show()

    print(f"✅ 오분류 top-{top_n} 이미지 저장 완료: {save_dir}")


In [None]:
# results 
save_results(model_name, history, cm, class_names, report)
show_top_misclassified(
    y_true=y_true,
    y_pred=y_pred,
    probs=pred_probs,
    class_names=class_names,
    generator=test_gen,
    model_name=model_name,
    save_dir=f"result/{model_name}-result",
    top_n=3
)
print(f"\u2705 저장 완료: result/{model_name}-result")