### 1. setting and data

In [4]:
#install module
!pip install --upgrade pip
!pip install pyyaml pandas numpy matplotlib seaborn scikit-learn tensorflow-macos keras

Collecting pyyaml
  Downloading PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting pandas
  Using cached pandas-2.3.0-cp39-cp39-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting numpy
  Using cached numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting matplotlib
  Using cached matplotlib-3.9.4-cp39-cp39-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting tensorflow-macos
  Using cached tensorflow_macos-2.16.2-cp39-cp39-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting keras
  Using cached keras-3.10.0-py3-none-any.whl.metadata (6.0 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting cont

In [3]:
#import module
import os
import yaml
import pandas as pd
from config.load_config import load_config
from data.split_dataset import split_dataframe, split_dataset_by_class
from data.dataloader import get_generators
from data.dataloader import show_images
from models.build_model import get_model
from models.build_model import build_model
from train.optimizer import get_optimizer
from train.callbacks import get_callbacks
from train.trainer import train_model
from utils.evaluation import plot_confusion_matrix
from utils.evaluation import plot_train_history
from utils.evaluation import show_top_misclassified
from utils.save_results import save_results


ModuleNotFoundError: No module named 'config'

In [None]:
# Load configuration
config = config_loader.load_config('config/config.yaml')

In [None]:
# split dataset
class_image_counts, images, min_count = split_dataframe(
    df=None,
    original_dataset_dir=config['original_dataset_dir']
)

split_dataset_by_class(
    original_dataset_dir=config['original_dataset_dir'],
    min_count=min_count,
    images=images,
    base_output_dir=f"{config['base_output_dir']}/'seed'{config['seed']}",
    train_ratio=config['train_ratio'],
    val_ratio=config['val_ratio'],
    test_ratio=config['test_ratio'],
    seed=config['seed']
)

In [None]:
# data generators - preprocessing and augmentation
train_gen, val_gen, test_gen = get_generators(
    train_df, val_df, test_df,
    preprocessing_func=config['preprocessing'],
    target_size=tuple(config['input_shape'][:2]),
    batch_size=config['batch_size'],
    augmentations=config['augmentations']
)

In [None]:
# show dataset
print("Training Data image ex:")
show_images(train_gen)
print("Validation Data image ex:")
show_images(val_gen)


### 2. model training

In [None]:
# model structure
model = build_model(
    backbone_name=config['backbone'],
    input_shape=tuple(config['input_shape']),
    num_classes=config['num_classes'],
    dropout_rate=config['dropout_rate']
)
# compile model
optimizer = get_optimizer(config['optimizer'], config['learning_rate'])
callbacks = get_callbacks(
    model_name=config['model_name'],
    save_dir=f"results/{config['experiment_id']}",
    patience=config['patience']
)

In [None]:
# model training
history = train_model(
    model,
    train_gen,
    val_gen,
    epochs=config['epochs'],
    callbacks=callbacks,
    optimizer=optimizer
)

### 3. evaluation and results

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [None]:
y_true, y_pred, y_prob = evaluate_model(model, test_gen)

In [None]:
# results visualization
cm = confusion_matrix(y_true, y_pred)
class_names = list(test_gen.class_indices.keys())


plot_confusion_matrix(cm, class_names, title="Confusion Matrix")
plot_train_history(history)

show_top_misclassified(
    y_true,
    y_pred,
    y_prob,
    class_names=test_gen.class_indices,
    generator=test_gen,
    model_name=config['experiment_id'],
    save_dir=f"results/{config['experiment_id']}",
    top_n=3
)

### 4. results save

In [None]:

save_results(
    model_name=config['experiment_id'],
    history=history,
    cm=confusion_matrix(y_true, y_pred),
    class_names=list(test_gen.class_indices.keys()),
    report=classification_report(y_true, y_pred, target_names=list(test_gen.class_indices.keys()), output_dict=True)
)

print(f"\u2705 저장 완료: results/{config['experiment_id']}")
