In [None]:
import sys
import os
sys.path.append(os.path.abspath('..'))

import warnings
warnings.filterwarnings('ignore')

import torch
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
from timm import create_model
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

import pandas as pd
from datetime import datetime
from sklearn.metrics import accuracy_score

from dataloaders.loaders import get_dataloaders
from models.train_utils import train_one_epoch, evaluate, EarlyStopping, multiclass_log_loss
from analysis.result_plotter import analyze_model_output

from timm.data import Mixup
from timm.loss import SoftTargetCrossEntropy

import matplotlib.pyplot as plt
import random
import numpy as np

import torch.nn.functional as F

def rand_bbox(size, lam):
    W = size[3]
    H = size[2]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def apply_cutmix(x, y, num_classes, beta=1.0):
    lam = np.random.beta(beta, beta)
    rand_index = torch.randperm(x.size(0)).to(x.device)
    y1 = F.one_hot(y, num_classes=num_classes).float()
    y2 = y1[rand_index]

    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bby1:bby2, bbx1:bbx2] = x[rand_index, :, bby1:bby2, bbx1:bbx2]

    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
    y_mix = y1 * lam + y2 * (1. - lam)
    return x, y_mix

# 시드 고정
def seed_everything(seed=28):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(28)

# 한글 폰트 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

# ✅ 설정
data_root = '../data/train2'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 40
patience = 4
batch_size = 32
base_log_dir = "../runs"
os.makedirs(base_log_dir, exist_ok=True)

# ✅ 데이터 로더 구성
dataloaders = get_dataloaders(data_root, batch_size=batch_size)

# 첫 번째 모델의 데이터 수 확인
first_model_name = list(dataloaders.keys())[0]
train_loader = dataloaders[first_model_name]['train']
val_loader = dataloaders[first_model_name]['val']
print(f"학습 데이터 개수: {len(train_loader.dataset)}")
print(f"검증 데이터 개수: {len(val_loader.dataset)}")
print(f"총 데이터 개수: {len(train_loader.dataset) + len(val_loader.dataset)}")

results = []
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

for model_name, loaders in dataloaders.items():
    print(f"\n🚀 학습 시작: {model_name}")
    
    log_dir = os.path.join(base_log_dir, f"{model_name}_{timestamp}")
    writer = SummaryWriter(log_dir=log_dir)

    model = create_model(model_name, pretrained=True, num_classes=len(loaders['classes'])).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
    # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    # mixup_fn = Mixup(
    #     mixup_alpha=0.8,
    #     cutmix_alpha=1.0,
    #     cutmix_minmax=None,
    #     prob=0.5,
    #     switch_prob=0.5,
    #     mode='batch',
    #     label_smoothing=0.1,
    #     num_classes=396  # 클래스 수
    # )

        # num_classes 정의
    num_classes = len(loaders['classes'])

    # cutmix_fn 정의
    cutmix_fn = lambda x, y: apply_cutmix(x, y, num_classes=num_classes, beta=1.0)

    # criterion 변경
    criterion = SoftTargetCrossEntropy()  # Mixup & CutMix 사용 시
    # criterion = nn.CrossEntropyLoss()   # 사용 안 할 경우

    early_stopping = EarlyStopping(patience=patience)

    for epoch in range(num_epochs):
        # print(f"\n📘 Epoch {epoch + 1}")
        train_loss = train_one_epoch(model, loaders['train'], criterion, optimizer, device, cutmix_fn)
        # train_loss = train_one_epoch(model, loaders['train'], criterion, optimizer, device)
        y_pred, y_prob, y_true, y_id, _ = evaluate(model, loaders['val'], device)

        acc = accuracy_score(y_true, y_pred)
        class_list = [str(i) for i in range(y_prob.shape[1])]
        prob_df = pd.DataFrame(y_prob, columns=class_list)
        prob_df.insert(0, 'ID', y_id)
        label_df = pd.DataFrame({'ID': y_id, 'label': [str(l) for l in y_true]})

        logloss = multiclass_log_loss(label_df, prob_df)

        writer.add_scalar("Loss/Train", train_loss, epoch)
        writer.add_scalar("Loss/Validation", logloss, epoch)
        writer.add_scalar("Accuracy/Validation", acc, epoch)
        writer.add_scalar("LearningRate", scheduler.get_last_lr()[0], epoch)

        print(f"📘 Epoch {epoch + 1} train_loss: {train_loss:.4f} | learning_rate: {scheduler.get_last_lr()[0]:.6f} | ✅ Acc: {acc:.4f} | LogLoss: {logloss:.4f}")
        scheduler.step()
        early_stopping(logloss, model)
        if early_stopping.early_stop:
            print("⛔ Early stopping triggered.")
            break 

    model.load_state_dict(early_stopping.best_model_state)
    writer.close()

    # ✅ 분석 결과 자동 저장
    val_indices = loaders['val'].dataset.indices if hasattr(loaders['val'].dataset, 'indices') else list(range(len(loaders['val'].dataset)))
    base_dataset = loaders['val'].dataset.dataset
    image_paths = [base_dataset.samples[i][0] for i in val_indices]

    analyze_model_output(
        model_name=model_name,
        timestamp=timestamp,
        image_paths=image_paths,
        y_pred=y_pred,
        y_prob=y_prob,
        y_true=y_true,
        class_names=loaders['classes']
    )

    results.append({'model': model_name, 'accuracy': acc, 'log_loss': logloss, 'timestamp': timestamp})

# ✅ 최종 비교 결과
df_result = pd.DataFrame(results).sort_values(by='log_loss')
display(df_result)


계층적 분할 확인
학습 데이터 개수: 25112
검증 데이터 개수: 6279
총 데이터 개수: 31391

🚀 학습 시작: vit_base_patch16_224


                                                        

📘 Epoch 1 train_loss: 6.0251 | learning_rate: 0.000100 | ✅ Acc: 0.0037 | LogLoss: 5.9683


                                                        

📘 Epoch 2 train_loss: 5.9736 | learning_rate: 0.000100 | ✅ Acc: 0.0097 | LogLoss: 5.8251


                                                        

📘 Epoch 3 train_loss: 5.8780 | learning_rate: 0.000099 | ✅ Acc: 0.0137 | LogLoss: 5.6525


                                                        

📘 Epoch 4 train_loss: 5.7304 | learning_rate: 0.000099 | ✅ Acc: 0.0374 | LogLoss: 5.2720


                                                        

📘 Epoch 5 train_loss: 5.5528 | learning_rate: 0.000098 | ✅ Acc: 0.0771 | LogLoss: 4.7478


                                                        

📘 Epoch 6 train_loss: 5.3310 | learning_rate: 0.000096 | ✅ Acc: 0.1473 | LogLoss: 4.0873


                                                       

KeyboardInterrupt: 

In [2]:
import torch
print(torch.__version__)
print(torch.version.cuda)
  

2.7.0+cu118
11.8
