In [1]:
import os
import shutil
import tempfile

import matplotlib.pyplot as plt
from tqdm import tqdm

import random
import numpy as np
import torch


from monai.losses import DiceCELoss
from monai.inferers import sliding_window_inference
from monai.transforms import (
    AsDiscrete,
    EnsureChannelFirstd,
    Compose,
    CropForegroundd,
    LoadImaged,
    Orientationd,
    RandFlipd,
    RandCropByPosNegLabeld,
    RandShiftIntensityd,
    ScaleIntensityRanged,
    Spacingd,
    RandRotate90d,
)

from monai.config import print_config
from monai.metrics import DiceMetric
from src.models.swincspunetr import SwinCSPUNETR

from monai.data import (
    DataLoader,
    CacheDataset,
    load_decathlon_datalist,
    decollate_batch,
)

# 랜덤 시드 고정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


print_config()

MONAI version: 1.4.0
Numpy version: 1.26.3
Pytorch version: 2.4.1+cu121
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 46a5272196a6c2590ca2589029eed8e4d56ff008
MONAI __file__: c:\ProgramData\anaconda3\envs\ship\Lib\site-packages\monai\__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 5.3.2
scikit-image version: 0.24.0
scipy version: 1.14.1
Pillow version: 10.2.0
Tensorboard version: NOT INSTALLED or UNKNOWN VERSION.
gdown version: 5.2.0
TorchVision version: 0.19.1+cu121
tqdm version: 4.66.5
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 6.0.0
pandas version: 2.2.3
einops version: 0.8.0
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.
clearml version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the 

In [2]:
class_info = {
    0: {"name": "background", "weight": 0},  # weight 없음
    1: {"name": "apo-ferritin", "weight": 1000},
    2: {"name": "beta-amylase", "weight": 100}, # 4130
    3: {"name": "beta-galactosidase", "weight": 1500}, #3080
    4: {"name": "ribosome", "weight": 1000},
    5: {"name": "thyroglobulin", "weight": 1500},
    6: {"name": "virus-like-particle", "weight": 1000},
}

# 가중치에 비례한 비율 계산
raw_ratios = {
    k: (v["weight"] if v["weight"] is not None else 0.01)  # 가중치 비례, None일 경우 기본값a
    for k, v in class_info.items()
}
total = sum(raw_ratios.values())
ratios = {k: v / total for k, v in raw_ratios.items()}

# 최종 합계가 1인지 확인
final_total = sum(ratios.values())
print("클래스 비율:", ratios)
print("최종 합계:", final_total)

# 비율을 리스트로 변환
ratios_list = [ratios[k] for k in sorted(ratios.keys())]
print("클래스 비율 리스트:", ratios_list)

클래스 비율: {0: 0.0, 1: 0.16393442622950818, 2: 0.01639344262295082, 3: 0.2459016393442623, 4: 0.16393442622950818, 5: 0.2459016393442623, 6: 0.16393442622950818}
최종 합계: 1.0
클래스 비율 리스트: [0.0, 0.16393442622950818, 0.01639344262295082, 0.2459016393442623, 0.16393442622950818, 0.2459016393442623, 0.16393442622950818]


# 모델 설정

In [3]:
from src.dataset.dataset import create_dataloaders
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd
)
from monai.transforms import CastToTyped
import numpy as np

train_img_dir = "./datasets/train/images"
train_label_dir = "./datasets/train/labels"
val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
# DATA CONFIG
img_size =  96 # Match your patch size
img_depth = img_size
n_classes = 7
batch_size = 1 # 13.8GB GPU memory required for 128x128 img size
num_samples = batch_size # 한 이미지에서 뽑을 샘플 수
loader_batch = 1
num_repeat = 60
accumulation_steps = 16

# MODEL CONFIG
feature_size = 48
use_checkpoint = True
use_v2 = True
drop_rate= 0.25
attn_drop_rate = 0.25
num_bottleneck = 2

# TRAINING CONFIG
num_epochs = 4000

lr = 0.001

# LOSS
warmup_epochs = 5
schedule_epochs = 10
warmup_ce = 8.0
warmup_tv = 0.1
warmup_hd = 0.1
ce_end = 0.2
tv_end = 0.4
hd_end = 0.4
include_background = False
reduction = "mean"
softmax = True
tversky_alpha = 0.52  # Tversky loss의 alpha 값 = lamda
tversky_beta = 1.0 - tversky_alpha
tversky_smooth = 1e-5
tv_boost = 1.2
hd_boost = 1.2


class_weights = None
class_weights = torch.tensor([0.0001, 1, 0.001, 1.1, 1, 1.1, 1], dtype=torch.float32)  # 클래스별 가중치

# INIT
start_epoch = 0
best_val_loss = float('inf')
best_val_fbeta_score = 0

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    CastToTyped(keys=["image"], dtype=np.float16),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=1),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=2),
])

In [4]:
train_loader, val_loader = None, None
train_loader, val_loader = create_dataloaders(
    train_img_dir, 
    train_label_dir, 
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    random_transforms = random_transforms, 
    batch_size = loader_batch,
    num_workers=0,train_num_repeat=num_repeat)

Loading dataset: 100%|██████████| 24/24 [00:39<00:00,  1.63s/it]
Loading dataset: 100%|██████████| 4/4 [00:12<00:00,  3.19s/it]


https://monai.io/model-zoo.html

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from monai.losses import TverskyLoss, LogHausdorffDTLoss

class AdaptiveCombinedLoss(nn.Module):
    """
    요구사항:
    1) Warm-up 단계 (0 ~ warmup_epochs):
       - CE 위주로 학습(예: CE=1.0), Tversky/HD는 작게(예: 0.1)
    2) Warm-up 종료 시점:
       - 해당 시점 손실값 참고해 Kendall(\log\sigma) 초기값/가중치 보정
    3) Kendall(2018) 식:
       - (1/(2σ_i^2))*L_i + log(σ_i^2)
       - Tversky/HD는 더 강조(상수 w_i를 크게) + CE 최소 한도 유지
    4) Warm-up 이후 스케줄링:
       - CE 점진 감소(1.0→0.3), Tversky/HD 점진 증가(0.1→1.0)
       - 필요시 Tversky/HD를 추가로 boost
    """

    def __init__(
        self,
        # 1) Warm-up & 스케줄
        warmup_epochs: int = 5,
        schedule_epochs: int = 10,

        # 2) Warm-up 시 고정 가중치
        warmup_ce=1.0,
        warmup_tv=0.1,
        warmup_hd=0.1,

        # 3) 스케줄에서의 최종 가중치
        ce_end=0.3,
        tv_end=1.0,
        hd_end=1.0,

        # 4) Tversky/HD를 추가로 더 키우고 싶다면 boost 사용
        #    (예: 2.0이면 TV/HD가 2배 더 강조)
        tv_boost=1.2,
        hd_boost=1.2,

        # MONAI Loss 설정
        include_background=True,
        reduction="mean",
        softmax=True,

        # Tversky 파라미터
        tversky_alpha=0.52,
        tversky_beta=None,  # None이면 1 - alpha
        tversky_smooth=1e-5,

        # Kendall(2018) 식 사용 여부
        use_uncertainty=True,
    ):
        super().__init__()
        self.warmup_epochs = warmup_epochs
        self.schedule_epochs = schedule_epochs

        # Warm-up 고정 가중치
        self.warmup_ce = warmup_ce
        self.warmup_tv = warmup_tv
        self.warmup_hd = warmup_hd

        # Warm-up 이후 스케줄 start/end
        self.ce_start, self.ce_end = warmup_ce, ce_end
        self.tv_start, self.tv_end = warmup_tv, tv_end
        self.hd_start, self.hd_end = warmup_hd, hd_end

        # Tversky/HD 추가 배율(강조)
        self.tv_boost = tv_boost
        self.hd_boost = hd_boost

        self.use_uncertainty = use_uncertainty

        if tversky_beta is None:
            tversky_beta = 1.0 - tversky_alpha

        # 배경 채널 무시 설정
        if not include_background:
            ignore_index = 0
        else:
            ignore_index = -100

        # (1) 개별 손실 정의
        self.ce_loss = nn.CrossEntropyLoss()
        self.tversky_loss = TverskyLoss(
            alpha=tversky_alpha,
            beta=tversky_beta,
            smooth_nr=tversky_smooth,
            smooth_dr=tversky_smooth,
            softmax=softmax,
            reduction=reduction,
            include_background=include_background
        )
        self.haus_loss = LogHausdorffDTLoss(
            softmax=softmax,
            reduction=reduction,
            include_background=include_background
        )

        # (2) Kendall을 위한 log_sigma 파라미터
        if self.use_uncertainty:
            self.log_sigma_ce   = nn.Parameter(torch.zeros(1))
            self.log_sigma_tv   = nn.Parameter(torch.zeros(1))
            self.log_sigma_haus = nn.Parameter(torch.zeros(1))

        # (3) 현재 epoch, warm-up 손실 기록
        self.current_epoch = 0
        self.last_warmup_ce   = 0.0
        self.last_warmup_tv   = 0.0
        self.last_warmup_haus = 0.0

    def set_epoch(self, epoch: int):
        """학습 루프에서 매 epoch마다 호출하여 현재 epoch 갱신."""
        self.current_epoch = epoch

    def record_warmup_losses(self, ce_val, tv_val, hd_val):
        """
        Warm-up 단계 손실값을 기록 -> 이후 log_sigma 초기값 조정 등 활용 가능
        """
        self.last_warmup_ce   = ce_val
        self.last_warmup_tv   = tv_val
        self.last_warmup_haus = hd_val

    def end_of_warmup_init(self):
        """
        Warm-up → Kendall 전환 시점에서,
        warm-up 손실값 등을 참고해 log_sigma 등 초기 설정
        """
        if self.use_uncertainty:
            with torch.no_grad():
                # 예시: warm-up에서 CE가 안정적, TV/HD가 크면
                # TV/HD 강조 위해 log_sigma를 음수로 조정
                self.log_sigma_ce[0]   = 0.0   # CE
                self.log_sigma_tv[0]   = -0.5  # TV
                self.log_sigma_haus[0] = -0.5  # HD

    def forward(self, preds, targets):
        """
        preds: (B, C, D, H, W) - logit
        targets: (B, D, H, W) - 정수 라벨
        """
        # 1) 개별 손실 계산
        loss_ce   = self.ce_loss(preds, targets)
        loss_tv   = self.tversky_loss(preds, targets)
        loss_haus = self.haus_loss(preds, targets)

        # 2) Warm-up 단계
        if self.current_epoch < self.warmup_epochs:
            total_loss = (
                self.warmup_ce * loss_ce
                + self.warmup_tv * loss_tv
                + self.warmup_hd * loss_haus
            )
        else:
            # Warm-up 이후 → Kendall + 스케줄
            # (a) ratio 계산
            progress = self.current_epoch - self.warmup_epochs
            ratio = float(progress) / float(self.schedule_epochs)
            ratio = max(0.0, min(1.0, ratio))  # 0 ~ 1

            # (b) CE, TV, HAUS 선형 보간 가중치
            w_ce   = self.ce_start + (self.ce_end - self.ce_start)*ratio
            w_tv   = self.tv_start + (self.tv_end - self.tv_start)*ratio
            w_haus = self.hd_start + (self.hd_end - self.hd_start)*ratio

            

            if self.use_uncertainty:
                # (c) Kendall 식
                sigma_ce   = torch.exp(self.log_sigma_ce)
                sigma_tv   = torch.exp(self.log_sigma_tv)
                sigma_haus = torch.exp(self.log_sigma_haus)

                ce_term   = (1.0/(2.0*sigma_ce**2))   * loss_ce   + torch.log(sigma_ce**2)
                tv_term   = (1.0/(2.0*sigma_tv**2))   * loss_tv   + torch.log(sigma_tv**2)
                haus_term = (1.0/(2.0*sigma_haus**2)) * loss_haus + torch.log(sigma_haus**2)

                tv_term   = tv_term   * self.tv_boost
                haus_term = haus_term * self.hd_boost
                
                # (d) 최종 합산 (TV/HD > CE가 되도록 w_tv, w_haus를 크게)
                total_loss = w_ce*ce_term + w_tv*tv_term + w_haus*haus_term
            else:
                # Kendall 미사용 시 단순 가중합
                total_loss = w_ce*loss_ce + w_tv*loss_tv + w_haus*loss_haus

        return total_loss

  """


In [6]:
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import torch
from pathlib import Path
from monai.metrics import DiceMetric

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SwinCSPUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=feature_size,
    use_checkpoint=True,
    drop_rate = drop_rate,
    attn_drop_rate = attn_drop_rate,
    use_v2 = use_v2,
    n=num_bottleneck
).to(device)
# Pretrained weights 불러오기
# if use_checkpoint:
#     pretrain_path = "./swin_unetr_btcv_segmentation/models/model.pt"
#     weight = torch.load(pretrain_path, map_location=device)

#     # 출력 레이어의 키를 제외한 나머지 가중치만 로드
#     filtered_weights = {k: v for k, v in weight.items() if "out.conv.conv" not in k}

#     # strict=False로 로드하여 불일치하는 부분 무시
#     model.load_state_dict(filtered_weights, strict=False)
#     print("Filtered weights loaded successfully. Output layer will be trained from scratch.")


# Loss function
criterion = AdaptiveCombinedLoss(
    warmup_epochs=warmup_epochs,
    schedule_epochs=schedule_epochs,
    warmup_ce=warmup_ce,
    warmup_tv=warmup_tv,
    warmup_hd=warmup_hd,
    ce_end=ce_end,
    tv_end=tv_end,
    hd_end=hd_end,
    include_background=include_background,
    reduction=reduction,
    tversky_alpha=tversky_alpha,
    tversky_beta=tversky_beta,
    tversky_smooth=tversky_smooth,
    tv_boost=tv_boost,
    hd_boost=hd_boost,
)

pretrain_str = "yes" if use_checkpoint else "no"
weight_str = "weighted" if class_weights is not None else ""
if tv_boost == hd_boost == 1.0:
    boost_str = f"b{tv_boost:.2f}"
else:
    boost_str = f"tvb{tv_boost:.2f}_hb{hd_boost:.2f}"
# 체크포인트 디렉토리 및 파일 설정
checkpoint_base_dir = Path("./model_checkpoints")
folder_name = f"SwinCSPUNETr_CETVHF_{weight_str}_f{feature_size}s{img_size}_numb{num_bottleneck}_lr{lr:.0e}_T-a{tversky_alpha:.2f}b{tversky_beta:.2f}Wc{warmup_ce}_Wt{warmup_tv}Wh{warmup_hd}_We{warmup_epochs}_Se{schedule_epochs}_{boost_str}_b{batch_size}_r{num_repeat}"
checkpoint_dir = checkpoint_base_dir / folder_name
optimizer = optim.AdamW(list(model.parameters()) + list(criterion.parameters()), lr=lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
# 체크포인트 디렉토리 생성
checkpoint_dir.mkdir(parents=True, exist_ok=True)

if checkpoint_dir.exists():
    best_model_path = checkpoint_dir / 'best_model.pt'
    if best_model_path.exists():
        print(f"기존 best model 발견: {best_model_path}")
        try:
            checkpoint = torch.load(best_model_path, map_location=device)
            # 체크포인트 내부 키 검증
            required_keys = ['model_state_dict', 'optimizer_state_dict', 'epoch', 'best_val_loss']
            if all(k in checkpoint for k in required_keys):
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                start_epoch = checkpoint['epoch']
                best_val_loss = checkpoint['best_val_loss']
                print("기존 학습된 가중치를 성공적으로 로드했습니다.")
                checkpoint= None
            else:
                raise ValueError("체크포인트 파일에 필요한 key가 없습니다.")
        except Exception as e:
            print(f"체크포인트 파일을 로드하는 중 오류 발생: {e}")



기존 best model 발견: model_checkpoints\SwinUNETRv2_CETVHF_weighted_f48s96lr1e-03_T-a0.52b0.48Wc8.0_Wt0.1Wh0.1_We5_Se10_tvb1.20_hb1.20_b1_r60\best_model.pt


  checkpoint = torch.load(best_model_path, map_location=device)


기존 학습된 가중치를 성공적으로 로드했습니다.


In [7]:
batch = next(iter(val_loader))
images, labels = batch["image"], batch["label"]
print(images.shape, labels.shape)

torch.Size([1, 1, 96, 96, 96]) torch.Size([1, 1, 96, 96, 96])


In [8]:
torch.backends.cudnn.benchmark = True

In [9]:
import wandb
from datetime import datetime

current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
run_name = folder_name

# wandb 초기화
wandb.init(
    project='czii_SwinUnetR',  # 프로젝트 이름 설정
    name=run_name,         # 실행(run) 이름 설정
    config={
        'num_epochs': num_epochs,
        'learning_rate': lr,
        'batch_size': batch_size,
        'lambda': tversky_alpha,
        "cross_entropy_weight": warmup_ce,
        "tversky_weight": warmup_tv,
        "hausdorff_weight": warmup_hd,
        "cross_entropy_weight_end": ce_end,
        "tversky_weight_end": tv_end,
        "hausdorff_weight_end": hd_end,
        "tversky_weight_boost": tv_boost,
        "hausdorff_weight_boost": hd_boost,
        "include_background": include_background,
        "wramup_epochs": warmup_epochs,
        "schedule_epochs": schedule_epochs,
        "include_background": include_background,
        "reduction": reduction,
        'feature_size': feature_size,
        'img_size': img_size,
        'sampling_ratio': ratios_list,
        'device': device.type,
        "checkpoint_dir": str(folder_name),
        "class_weights": class_weights.tolist() if class_weights is not None else None,
        "use_checkpoint": use_checkpoint,
        "drop_rate": drop_rate,
        "attn_drop_rate": attn_drop_rate,
        "use_v2": use_v2,
        "accumulation_steps": accumulation_steps,
        "num_repeat": num_repeat,
        
        # 필요한 하이퍼파라미터 추가
    }
)
# 모델을 wandb에 연결
wandb.watch(model, log='all')

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mwoow070840[0m ([33mwaooang[0m). Use [1m`wandb login --relogin`[0m to force relogin


[]

# 학습

In [10]:
from monai.metrics import DiceMetric
    
def processing(batch_data, model, criterion, device):
    images = batch_data['image'].to(device)  # Input 이미지 (B, 1, 96, 96, 96)
    labels = batch_data['label'].to(device)  # 라벨 (B, 96, 96, 96)

    labels = labels.squeeze(1)  # (B, 1, 96, 96, 96) → (B, 96, 96, 96)
    labels = labels.long()  # 라벨을 정수형으로 변환

    # 원핫 인코딩 (B, H, W, D) → (B, num_classes, H, W, D)
    
    labels_onehot = torch.nn.functional.one_hot(labels, num_classes=n_classes)
    labels_onehot = labels_onehot.permute(0, 4, 1, 2, 3).float()  # (B, num_classes, H, W, D)

    # 모델 예측
    outputs = model(images)  # outputs: (B, num_classes, H, W, D)

    # Loss 계산
    # loss = criterion(outputs, labels_onehot)
    loss = criterion(outputs, labels_onehot)
    return loss, outputs, labels, outputs.argmax(dim=1)

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, accumulation_steps=4):
    model.train()
    epoch_loss = 0
    optimizer.zero_grad()  # 그래디언트 초기화
    with tqdm(train_loader, desc='Training') as pbar:
        for i, batch_data in enumerate(pbar):
            # 손실 계산
            loss, _, _, _ = processing(batch_data, model, criterion, device)

            # 그래디언트를 계산하고 누적
            loss = loss / accumulation_steps  # 그래디언트 누적을 위한 스케일링
            loss.backward()  # 그래디언트 계산 및 누적
            
            # 그래디언트 업데이트 (accumulation_steps마다 한 번)
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                optimizer.step()  # 파라미터 업데이트
                optimizer.zero_grad()  # 누적된 그래디언트 초기화
            
            # 손실값 누적 (스케일링 복구)
            epoch_loss += loss.item() * accumulation_steps  # 실제 손실값 반영
            pbar.set_postfix(loss=loss.item() * accumulation_steps)  # 실제 손실값 출력
    avg_loss = epoch_loss / len(train_loader)
    wandb.log({'train_epoch_loss': avg_loss, 'epoch': epoch + 1})
    return avg_loss


def validate_one_epoch(model, val_loader, criterion, device, epoch, calculate_dice_interval):
    model.eval()
    val_loss = 0
    
    class_dice_scores = {i: [] for i in range(n_classes)}
    class_f_beta_scores = {i: [] for i in range(n_classes)}
    with torch.no_grad():
        with tqdm(val_loader, desc='Validation') as pbar:
            for batch_data in pbar:
                loss, _, labels, preds = processing(batch_data, model, criterion, device)
                val_loss += loss.item()
                pbar.set_postfix(loss=loss.item())

                # 각 클래스별 Dice 점수 계산
                if epoch % calculate_dice_interval == 0:
                    for i in range(n_classes):
                        pred_i = (preds == i)
                        label_i = (labels == i)
                        dice_score = (2.0 * torch.sum(pred_i & label_i)) / (torch.sum(pred_i) + torch.sum(label_i) + 1e-8)
                        class_dice_scores[i].append(dice_score.item())
                        precision = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(pred_i) + 1e-8)
                        recall = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(label_i) + 1e-8)
                        f_beta_score = (1 + 4**2) * (precision * recall) / (4**2 * precision + recall + 1e-8)
                        class_f_beta_scores[i].append(f_beta_score.item())

    avg_loss = val_loss / len(val_loader)
    # 에포크별 평균 손실 로깅
    wandb.log({'val_epoch_loss': avg_loss, 'epoch': epoch + 1})
    
    # 각 클래스별 평균 Dice 점수 출력
    if epoch % calculate_dice_interval == 0:
        print("Validation Dice Score")
        all_classes_dice_scores = []
        for i in range(n_classes):
            mean_dice = np.mean(class_dice_scores[i])
            wandb.log({f'class_{i}_dice_score': mean_dice, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_dice:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_dice_scores.append(mean_dice)
            
        print()
    if epoch % calculate_dice_interval == 0:
        print("Validation F-beta Score")
        all_classes_fbeta_scores = []
        for i in range(n_classes):
            mean_fbeta = np.mean(class_f_beta_scores[i])
            wandb.log({f'class_{i}_f_beta_score': mean_fbeta, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_fbeta:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_fbeta_scores.append(mean_fbeta)
        print()
        overall_mean_dice = np.mean(all_classes_dice_scores)
        overall_mean_fbeta = np.mean(all_classes_fbeta_scores)
        wandb.log({'overall_mean_f_beta_score': overall_mean_fbeta, 'overall_mean_dice_score': overall_mean_dice, 'epoch': epoch + 1})
        print(f"\nOverall Mean Dice Score: {overall_mean_dice:.4f}\nOverall Mean F-beta Score: {overall_mean_fbeta:.4f}\n")

    if overall_mean_fbeta is None:
        overall_mean_fbeta = 0

    return val_loss / len(val_loader), overall_mean_fbeta

def train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, 
    device, start_epoch, best_val_loss, best_val_fbeta_score, calculate_dice_interval=1,
    accumulation_steps=4
):
    """
    모델을 학습하고 검증하는 함수
    Args:
        model: 학습할 모델
        train_loader: 학습 데이터 로더
        val_loader: 검증 데이터 로더
        criterion: 손실 함수
        optimizer: 최적화 알고리즘
        num_epochs: 총 학습 epoch 수
        patience: early stopping 기준
        device: GPU/CPU 장치
        start_epoch: 시작 epoch
        best_val_loss: 이전 최적 validation loss
        best_val_fbeta_score: 이전 최적 validation f-beta score
        calculate_dice_interval: Dice 점수 계산 주기
    """
    epochs_no_improve = 0

    for epoch in range(start_epoch, num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")

        # Train One Epoch
        train_loss = train_one_epoch(
            model=model, 
            train_loader=train_loader, 
            criterion=criterion, 
            optimizer=optimizer, 
            device=device,
            epoch=epoch,
            accumulation_steps= accumulation_steps
        )
        
        scheduler.step(train_loss)
        # Validate One Epoch
        val_loss, overall_mean_fbeta_score = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=epoch, 
            calculate_dice_interval=calculate_dice_interval
        )

        
        print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation F-beta: {overall_mean_fbeta_score:.4f}")

        if val_loss < best_val_loss and overall_mean_fbeta_score > best_val_fbeta_score:
            best_val_loss = val_loss
            best_val_fbeta_score = overall_mean_fbeta_score
            epochs_no_improve = 0
            checkpoint_path = os.path.join(checkpoint_dir, 'best_model.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            print(f"========================================================")
            print(f"SUPER Best model saved. Loss:{best_val_loss:.4f}, Score:{best_val_fbeta_score:.4f}")
            print(f"========================================================")

        # Early stopping 조건 체크
        if val_loss >= best_val_loss and overall_mean_fbeta_score <= best_val_fbeta_score:
            epochs_no_improve += 1
        else:
            epochs_no_improve = 0

        if epochs_no_improve >= patience:
            print("Early stopping")
            checkpoint_path = os.path.join(checkpoint_dir, 'last.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            break
        # if epochs_no_improve%6 == 0:
        #     # 손실이 개선되지 않았으므로 lambda 감소
        #     new_lamda = max(criterion.lamda - 0.01, 0.1)  # 최소값은 0.1로 설정
        #     criterion.set_lamda(new_lamda)
        #     print(f"Validation loss did not improve. Reducing lambda to {new_lamda:.4f}")

    wandb.finish()


In [None]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    patience=10,
    device=device,
    start_epoch=start_epoch,
    best_val_loss=best_val_loss,
    best_val_fbeta_score=best_val_fbeta_score,
    calculate_dice_interval=1,
    accumulation_steps = accumulation_steps
     ) 

Epoch 10/4000


  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]
Training: 100%|██████████| 1440/1440 [33:12<00:00,  1.38s/it, loss=0.176]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.15s/it, loss=0.612]


Validation Dice Score
Class 0: 0.9907, Class 1: 0.1387, Class 2: 0.0000, Class 3: 0.1614, Class 4: 0.4665, Class 5: 0.0742, Class 6: 0.4390, 
Validation F-beta Score
Class 0: 0.9961, Class 1: 0.5946, Class 2: 0.6667, Class 3: 0.2980, Class 4: 0.6693, Class 5: 0.2949, Class 6: 0.7588, 

Overall Mean Dice Score: 0.2560
Overall Mean F-beta Score: 0.5231

Training Loss: 0.4414, Validation Loss: 0.5655, Validation F-beta: 0.5231
Epoch 11/4000


Training: 100%|██████████| 1440/1440 [32:53<00:00,  1.37s/it, loss=0.347]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.17s/it, loss=0.334]


Validation Dice Score
Class 0: 0.9928, Class 1: 0.3084, Class 2: 0.0000, Class 3: 0.0837, Class 4: 0.2690, Class 5: 0.2669, Class 6: 0.4392, 
Validation F-beta Score
Class 0: 0.9952, Class 1: 0.6951, Class 2: 0.4167, Class 3: 0.3204, Class 4: 0.5958, Class 5: 0.2005, Class 6: 0.7211, 

Overall Mean Dice Score: 0.2735
Overall Mean F-beta Score: 0.5066

Training Loss: 0.4411, Validation Loss: 0.5067, Validation F-beta: 0.5066
Epoch 12/4000


Training: 100%|██████████| 1440/1440 [32:37<00:00,  1.36s/it, loss=0.685]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.12s/it, loss=1.54] 


Validation Dice Score
Class 0: 0.9917, Class 1: 0.4277, Class 2: 0.0010, Class 3: 0.0412, Class 4: 0.4708, Class 5: 0.1146, Class 6: 0.3016, 
Validation F-beta Score
Class 0: 0.9974, Class 1: 0.5603, Class 2: 0.4172, Class 3: 0.2819, Class 4: 0.6563, Class 5: 0.3441, Class 6: 0.7274, 

Overall Mean Dice Score: 0.2712
Overall Mean F-beta Score: 0.5140

Training Loss: 0.4237, Validation Loss: 0.5509, Validation F-beta: 0.5140
Epoch 13/4000


Training: 100%|██████████| 1440/1440 [32:52<00:00,  1.37s/it, loss=0.418]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.26s/it, loss=0.279]


Validation Dice Score
Class 0: 0.9907, Class 1: 0.4154, Class 2: 0.0000, Class 3: 0.1531, Class 4: 0.5575, Class 5: 0.1324, Class 6: 0.5161, 
Validation F-beta Score
Class 0: 0.9958, Class 1: 0.4293, Class 2: 0.5833, Class 3: 0.4480, Class 4: 0.7721, Class 5: 0.1884, Class 6: 0.7917, 

Overall Mean Dice Score: 0.3549
Overall Mean F-beta Score: 0.5259

Training Loss: 0.4056, Validation Loss: 0.5895, Validation F-beta: 0.5259
Epoch 14/4000


Training: 100%|██████████| 1440/1440 [32:51<00:00,  1.37s/it, loss=0.183]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.14s/it, loss=0.217]


Validation Dice Score
Class 0: 0.9918, Class 1: 0.2402, Class 2: 0.0013, Class 3: 0.0509, Class 4: 0.3666, Class 5: 0.1732, Class 6: 0.2850, 
Validation F-beta Score
Class 0: 0.9957, Class 1: 0.8131, Class 2: 0.2507, Class 3: 0.2832, Class 4: 0.5680, Class 5: 0.2280, Class 6: 0.8063, 

Overall Mean Dice Score: 0.2232
Overall Mean F-beta Score: 0.5397

Training Loss: 0.4034, Validation Loss: 0.4822, Validation F-beta: 0.5397
Epoch 15/4000


Training: 100%|██████████| 1440/1440 [32:41<00:00,  1.36s/it, loss=0.383]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.08s/it, loss=0.338]


Validation Dice Score
Class 0: 0.9931, Class 1: 0.2834, Class 2: 0.0000, Class 3: 0.0901, Class 4: 0.4746, Class 5: 0.1342, Class 6: 0.2758, 
Validation F-beta Score
Class 0: 0.9962, Class 1: 0.5406, Class 2: 0.2500, Class 3: 0.7403, Class 4: 0.6400, Class 5: 0.2755, Class 6: 0.8524, 

Overall Mean Dice Score: 0.2516
Overall Mean F-beta Score: 0.6097

Training Loss: 0.3871, Validation Loss: 0.4531, Validation F-beta: 0.6097
SUPER Best model saved. Loss:0.4531, Score:0.6097
Epoch 16/4000


Training: 100%|██████████| 1440/1440 [32:45<00:00,  1.37s/it, loss=0.259]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.11s/it, loss=0.246]


Validation Dice Score
Class 0: 0.9937, Class 1: 0.4297, Class 2: 0.0000, Class 3: 0.1436, Class 4: 0.1483, Class 5: 0.3102, Class 6: 0.3127, 
Validation F-beta Score
Class 0: 0.9973, Class 1: 0.7098, Class 2: 0.7500, Class 3: 0.3481, Class 4: 0.4449, Class 5: 0.3522, Class 6: 0.7351, 

Overall Mean Dice Score: 0.2689
Overall Mean F-beta Score: 0.5180

Training Loss: 0.3930, Validation Loss: 0.4748, Validation F-beta: 0.5180
Epoch 17/4000


Training: 100%|██████████| 1440/1440 [32:30<00:00,  1.35s/it, loss=0.257]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.26s/it, loss=0.453]


Validation Dice Score
Class 0: 0.9943, Class 1: 0.5171, Class 2: 0.0465, Class 3: 0.1624, Class 4: 0.1084, Class 5: 0.2678, Class 6: 0.3917, 
Validation F-beta Score
Class 0: 0.9976, Class 1: 0.5147, Class 2: 0.6124, Class 3: 0.3934, Class 4: 0.5190, Class 5: 0.2928, Class 6: 0.6515, 

Overall Mean Dice Score: 0.2895
Overall Mean F-beta Score: 0.4743

Training Loss: 0.3950, Validation Loss: 0.4768, Validation F-beta: 0.4743
Epoch 18/4000


Training: 100%|██████████| 1440/1440 [32:51<00:00,  1.37s/it, loss=0.373]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.21s/it, loss=0.239]


Validation Dice Score
Class 0: 0.9922, Class 1: 0.2993, Class 2: 0.0541, Class 3: 0.2667, Class 4: 0.3187, Class 5: 0.3275, Class 6: 0.6447, 
Validation F-beta Score
Class 0: 0.9974, Class 1: 0.7697, Class 2: 0.7913, Class 3: 0.3804, Class 4: 0.6797, Class 5: 0.3562, Class 6: 0.8971, 

Overall Mean Dice Score: 0.3714
Overall Mean F-beta Score: 0.6166

Training Loss: 0.3773, Validation Loss: 0.4866, Validation F-beta: 0.6166
Epoch 19/4000


Training: 100%|██████████| 1440/1440 [32:50<00:00,  1.37s/it, loss=0.399]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.30s/it, loss=0.501]


Validation Dice Score
Class 0: 0.9900, Class 1: 0.4144, Class 2: 0.0000, Class 3: 0.1278, Class 4: 0.4194, Class 5: 0.3211, Class 6: 0.5143, 
Validation F-beta Score
Class 0: 0.9970, Class 1: 0.6174, Class 2: 0.5000, Class 3: 0.2694, Class 4: 0.4092, Class 5: 0.3624, Class 6: 0.9535, 

Overall Mean Dice Score: 0.3594
Overall Mean F-beta Score: 0.5224

Training Loss: 0.3843, Validation Loss: 0.6721, Validation F-beta: 0.5224
Epoch 20/4000


Training: 100%|██████████| 1440/1440 [32:39<00:00,  1.36s/it, loss=0.201]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.24s/it, loss=0.248]


Validation Dice Score
Class 0: 0.9947, Class 1: 0.5640, Class 2: 0.0000, Class 3: 0.2545, Class 4: 0.3204, Class 5: 0.3042, Class 6: 0.4384, 
Validation F-beta Score
Class 0: 0.9981, Class 1: 0.8121, Class 2: 0.5000, Class 3: 0.2970, Class 4: 0.6863, Class 5: 0.5091, Class 6: 0.8654, 

Overall Mean Dice Score: 0.3763
Overall Mean F-beta Score: 0.6340

Training Loss: 0.3752, Validation Loss: 0.4367, Validation F-beta: 0.6340
SUPER Best model saved. Loss:0.4367, Score:0.6340
Epoch 21/4000


Training: 100%|██████████| 1440/1440 [32:41<00:00,  1.36s/it, loss=0.824]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.26s/it, loss=0.715]


Validation Dice Score
Class 0: 0.9930, Class 1: 0.3946, Class 2: 0.0000, Class 3: 0.2378, Class 4: 0.2265, Class 5: 0.3404, Class 6: 0.3604, 
Validation F-beta Score
Class 0: 0.9962, Class 1: 0.6913, Class 2: 0.3333, Class 3: 0.4540, Class 4: 0.5346, Class 5: 0.3234, Class 6: 0.8081, 

Overall Mean Dice Score: 0.3120
Overall Mean F-beta Score: 0.5623

Training Loss: 0.3787, Validation Loss: 0.5091, Validation F-beta: 0.5623
Epoch 22/4000


Training: 100%|██████████| 1440/1440 [33:08<00:00,  1.38s/it, loss=0.544]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.24s/it, loss=0.358]


Validation Dice Score
Class 0: 0.9936, Class 1: 0.3641, Class 2: 0.0017, Class 3: 0.2342, Class 4: 0.4799, Class 5: 0.2866, Class 6: 0.4361, 
Validation F-beta Score
Class 0: 0.9965, Class 1: 0.7498, Class 2: 0.3342, Class 3: 0.3413, Class 4: 0.6794, Class 5: 0.2681, Class 6: 0.8270, 

Overall Mean Dice Score: 0.3602
Overall Mean F-beta Score: 0.5731

Training Loss: 0.3738, Validation Loss: 0.4465, Validation F-beta: 0.5731
Epoch 23/4000


Training: 100%|██████████| 1440/1440 [33:05<00:00,  1.38s/it, loss=0.634]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.05s/it, loss=1.4]  


Validation Dice Score
Class 0: 0.9915, Class 1: 0.2356, Class 2: 0.0000, Class 3: 0.0265, Class 4: 0.3726, Class 5: 0.1645, Class 6: 0.0785, 
Validation F-beta Score
Class 0: 0.9944, Class 1: 0.7367, Class 2: 0.5833, Class 3: 0.3488, Class 4: 0.5496, Class 5: 0.1485, Class 6: 0.9129, 

Overall Mean Dice Score: 0.1756
Overall Mean F-beta Score: 0.5393

Training Loss: 0.3625, Validation Loss: 0.5585, Validation F-beta: 0.5393
Epoch 24/4000


Training: 100%|██████████| 1440/1440 [32:45<00:00,  1.36s/it, loss=0.498]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.19s/it, loss=0.686]


Validation Dice Score
Class 0: 0.9935, Class 1: 0.4627, Class 2: 0.0000, Class 3: 0.1645, Class 4: 0.2815, Class 5: 0.1699, Class 6: 0.5085, 
Validation F-beta Score
Class 0: 0.9980, Class 1: 0.7475, Class 2: 0.3333, Class 3: 0.2339, Class 4: 0.6578, Class 5: 0.3659, Class 6: 0.9392, 

Overall Mean Dice Score: 0.3174
Overall Mean F-beta Score: 0.5889

Training Loss: 0.3657, Validation Loss: 0.4627, Validation F-beta: 0.5889
Epoch 25/4000


Training: 100%|██████████| 1440/1440 [32:59<00:00,  1.37s/it, loss=0.494]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.19s/it, loss=0.496]


Validation Dice Score
Class 0: 0.9942, Class 1: 0.3063, Class 2: 0.0569, Class 3: 0.2707, Class 4: 0.4012, Class 5: 0.2407, Class 6: 0.2558, 
Validation F-beta Score
Class 0: 0.9970, Class 1: 0.7804, Class 2: 0.2906, Class 3: 0.3935, Class 4: 0.7226, Class 5: 0.3606, Class 6: 0.9449, 

Overall Mean Dice Score: 0.2949
Overall Mean F-beta Score: 0.6404

Training Loss: 0.3530, Validation Loss: 0.4309, Validation F-beta: 0.6404
SUPER Best model saved. Loss:0.4309, Score:0.6404
Epoch 26/4000


Training: 100%|██████████| 1440/1440 [33:30<00:00,  1.40s/it, loss=0.387]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.11s/it, loss=0.63] 


Validation Dice Score
Class 0: 0.9932, Class 1: 0.3376, Class 2: 0.0006, Class 3: 0.2019, Class 4: 0.3997, Class 5: 0.1707, Class 6: 0.3792, 
Validation F-beta Score
Class 0: 0.9968, Class 1: 0.9249, Class 2: 0.5837, Class 3: 0.3471, Class 4: 0.5988, Class 5: 0.2223, Class 6: 0.7283, 

Overall Mean Dice Score: 0.2978
Overall Mean F-beta Score: 0.5643

Training Loss: 0.3470, Validation Loss: 0.4416, Validation F-beta: 0.5643
Epoch 27/4000


Training: 100%|██████████| 1440/1440 [33:23<00:00,  1.39s/it, loss=0.519]
Validation: 100%|██████████| 12/12 [00:16<00:00,  1.35s/it, loss=0.479]


Validation Dice Score
Class 0: 0.9922, Class 1: 0.7005, Class 2: 0.0000, Class 3: 0.2853, Class 4: 0.3370, Class 5: 0.0912, Class 6: 0.4372, 
Validation F-beta Score
Class 0: 0.9968, Class 1: 0.8755, Class 2: 0.5000, Class 3: 0.3180, Class 4: 0.5390, Class 5: 0.1475, Class 6: 0.7617, 

Overall Mean Dice Score: 0.3703
Overall Mean F-beta Score: 0.5283

Training Loss: 0.3546, Validation Loss: 0.5223, Validation F-beta: 0.5283
Epoch 28/4000


Training: 100%|██████████| 1440/1440 [33:08<00:00,  1.38s/it, loss=0.533]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.24s/it, loss=1.22] 


Validation Dice Score
Class 0: 0.9917, Class 1: 0.4600, Class 2: 0.0000, Class 3: 0.3296, Class 4: 0.3469, Class 5: 0.1770, Class 6: 0.2984, 
Validation F-beta Score
Class 0: 0.9957, Class 1: 0.7234, Class 2: 0.5833, Class 3: 0.4115, Class 4: 0.4643, Class 5: 0.1590, Class 6: 0.7093, 

Overall Mean Dice Score: 0.3224
Overall Mean F-beta Score: 0.4935

Training Loss: 0.3429, Validation Loss: 0.5312, Validation F-beta: 0.4935
Epoch 29/4000


Training: 100%|██████████| 1440/1440 [33:31<00:00,  1.40s/it, loss=0.48] 
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.28s/it, loss=0.733]


Validation Dice Score
Class 0: 0.9937, Class 1: 0.3916, Class 2: 0.0462, Class 3: 0.2461, Class 4: 0.4295, Class 5: 0.3587, Class 6: 0.2940, 
Validation F-beta Score
Class 0: 0.9949, Class 1: 0.7378, Class 2: 0.4528, Class 3: 0.3862, Class 4: 0.7694, Class 5: 0.3332, Class 6: 0.8862, 

Overall Mean Dice Score: 0.3440
Overall Mean F-beta Score: 0.6226

Training Loss: 0.3493, Validation Loss: 0.4297, Validation F-beta: 0.6226
Epoch 30/4000


Training: 100%|██████████| 1440/1440 [34:14<00:00,  1.43s/it, loss=0.173]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.29s/it, loss=0.162]


Validation Dice Score
Class 0: 0.9911, Class 1: 0.5719, Class 2: 0.0000, Class 3: 0.2385, Class 4: 0.4707, Class 5: 0.0729, Class 6: 0.6066, 
Validation F-beta Score
Class 0: 0.9946, Class 1: 0.6210, Class 2: 0.4167, Class 3: 0.3835, Class 4: 0.6249, Class 5: 0.3007, Class 6: 0.9572, 

Overall Mean Dice Score: 0.3921
Overall Mean F-beta Score: 0.5775

Training Loss: 0.3399, Validation Loss: 0.6509, Validation F-beta: 0.5775
Epoch 31/4000


Training: 100%|██████████| 1440/1440 [35:31<00:00,  1.48s/it, loss=0.145]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.27s/it, loss=0.452]


Validation Dice Score
Class 0: 0.9937, Class 1: 0.4310, Class 2: 0.0000, Class 3: 0.2166, Class 4: 0.3585, Class 5: 0.2705, Class 6: 0.5321, 
Validation F-beta Score
Class 0: 0.9977, Class 1: 0.9331, Class 2: 0.4167, Class 3: 0.3556, Class 4: 0.7385, Class 5: 0.2313, Class 6: 0.8384, 

Overall Mean Dice Score: 0.3617
Overall Mean F-beta Score: 0.6194

Training Loss: 0.3369, Validation Loss: 0.4633, Validation F-beta: 0.6194
Epoch 32/4000


Training: 100%|██████████| 1440/1440 [35:28<00:00,  1.48s/it, loss=0.353]
Validation: 100%|██████████| 12/12 [00:16<00:00,  1.36s/it, loss=0.316]


Validation Dice Score
Class 0: 0.9940, Class 1: 0.5915, Class 2: 0.0270, Class 3: 0.1757, Class 4: 0.3730, Class 5: 0.2657, Class 6: 0.6948, 
Validation F-beta Score
Class 0: 0.9974, Class 1: 0.7438, Class 2: 0.8502, Class 3: 0.3959, Class 4: 0.6400, Class 5: 0.3043, Class 6: 0.7861, 

Overall Mean Dice Score: 0.4201
Overall Mean F-beta Score: 0.5740

Training Loss: 0.3327, Validation Loss: 0.4362, Validation F-beta: 0.5740
Epoch 33/4000


Training: 100%|██████████| 1440/1440 [35:28<00:00,  1.48s/it, loss=0.235]
Validation: 100%|██████████| 12/12 [00:16<00:00,  1.41s/it, loss=0.29] 


Validation Dice Score
Class 0: 0.9935, Class 1: 0.5753, Class 2: 0.0071, Class 3: 0.2726, Class 4: 0.3477, Class 5: 0.3427, Class 6: 0.4508, 
Validation F-beta Score
Class 0: 0.9962, Class 1: 0.7506, Class 2: 0.3379, Class 3: 0.3456, Class 4: 0.6585, Class 5: 0.3031, Class 6: 0.8966, 

Overall Mean Dice Score: 0.3978
Overall Mean F-beta Score: 0.5909

Training Loss: 0.3381, Validation Loss: 0.4638, Validation F-beta: 0.5909
Epoch 34/4000


Training: 100%|██████████| 1440/1440 [35:12<00:00,  1.47s/it, loss=0.362]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.31s/it, loss=0.366]


Validation Dice Score
Class 0: 0.9929, Class 1: 0.2688, Class 2: 0.0550, Class 3: 0.3275, Class 4: 0.5018, Class 5: 0.3845, Class 6: 0.2238, 
Validation F-beta Score
Class 0: 0.9968, Class 1: 0.7011, Class 2: 0.2066, Class 3: 0.6057, Class 4: 0.5638, Class 5: 0.3220, Class 6: 0.9117, 

Overall Mean Dice Score: 0.3413
Overall Mean F-beta Score: 0.6209

Training Loss: 0.3331, Validation Loss: 0.5945, Validation F-beta: 0.6209
Epoch 35/4000


Training: 100%|██████████| 1440/1440 [35:51<00:00,  1.49s/it, loss=0.329]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.13s/it, loss=0.265]


Validation Dice Score
Class 0: 0.9912, Class 1: 0.0675, Class 2: 0.0667, Class 3: 0.3004, Class 4: 0.5275, Class 5: 0.1568, Class 6: 0.2794, 
Validation F-beta Score
Class 0: 0.9954, Class 1: 0.5619, Class 2: 0.8904, Class 3: 0.5328, Class 4: 0.5947, Class 5: 0.2175, Class 6: 0.8896, 

Overall Mean Dice Score: 0.2664
Overall Mean F-beta Score: 0.5593

Training Loss: 0.3381, Validation Loss: 0.5850, Validation F-beta: 0.5593
Epoch 36/4000


Training: 100%|██████████| 1440/1440 [34:50<00:00,  1.45s/it, loss=0.288]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.17s/it, loss=1.72] 


Validation Dice Score
Class 0: 0.9933, Class 1: 0.5470, Class 2: 0.0054, Class 3: 0.1829, Class 4: 0.1395, Class 5: 0.1291, Class 6: 0.3431, 
Validation F-beta Score
Class 0: 0.9980, Class 1: 0.7590, Class 2: 0.5030, Class 3: 0.2324, Class 4: 0.5997, Class 5: 0.1793, Class 6: 0.7073, 

Overall Mean Dice Score: 0.2683
Overall Mean F-beta Score: 0.4955

Training Loss: 0.3244, Validation Loss: 0.5911, Validation F-beta: 0.4955
Epoch 37/4000


Training: 100%|██████████| 1440/1440 [33:46<00:00,  1.41s/it, loss=0.354]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.17s/it, loss=0.515]


Validation Dice Score
Class 0: 0.9944, Class 1: 0.3340, Class 2: 0.0000, Class 3: 0.1287, Class 4: 0.6227, Class 5: 0.1402, Class 6: 0.6165, 
Validation F-beta Score
Class 0: 0.9961, Class 1: 0.8070, Class 2: 0.7500, Class 3: 0.3519, Class 4: 0.6688, Class 5: 0.4646, Class 6: 0.9605, 

Overall Mean Dice Score: 0.3684
Overall Mean F-beta Score: 0.6506

Training Loss: 0.3204, Validation Loss: 0.3994, Validation F-beta: 0.6506
SUPER Best model saved. Loss:0.3994, Score:0.6506
Epoch 38/4000


Training: 100%|██████████| 1440/1440 [33:01<00:00,  1.38s/it, loss=0.169]
Validation: 100%|██████████| 12/12 [00:14<00:00,  1.23s/it, loss=0.631]


Validation Dice Score
Class 0: 0.9939, Class 1: 0.2820, Class 2: 0.0131, Class 3: 0.3221, Class 4: 0.2916, Class 5: 0.2525, Class 6: 0.2817, 
Validation F-beta Score
Class 0: 0.9955, Class 1: 0.5474, Class 2: 0.2578, Class 3: 0.3880, Class 4: 0.7777, Class 5: 0.2361, Class 6: 0.8890, 

Overall Mean Dice Score: 0.2860
Overall Mean F-beta Score: 0.5677

Training Loss: 0.3263, Validation Loss: 0.4437, Validation F-beta: 0.5677
Epoch 39/4000


Training: 100%|██████████| 1440/1440 [33:23<00:00,  1.39s/it, loss=0.619]
Validation: 100%|██████████| 12/12 [00:15<00:00,  1.26s/it, loss=1.17] 


Validation Dice Score
Class 0: 0.9911, Class 1: 0.5296, Class 2: 0.0146, Class 3: 0.2959, Class 4: 0.2237, Class 5: 0.2839, Class 6: 0.5651, 
Validation F-beta Score
Class 0: 0.9961, Class 1: 0.8484, Class 2: 0.5919, Class 3: 0.3675, Class 4: 0.7124, Class 5: 0.2183, Class 6: 0.9254, 

Overall Mean Dice Score: 0.3796
Overall Mean F-beta Score: 0.6144

Training Loss: 0.3205, Validation Loss: 0.5588, Validation F-beta: 0.6144
Epoch 40/4000


Training: 100%|██████████| 1440/1440 [33:04<00:00,  1.38s/it, loss=0.169]
Validation: 100%|██████████| 12/12 [00:16<00:00,  1.35s/it, loss=0.385]


Validation Dice Score
Class 0: 0.9943, Class 1: 0.5372, Class 2: 0.0037, Class 3: 0.2403, Class 4: 0.4686, Class 5: 0.3021, Class 6: 0.5499, 
Validation F-beta Score
Class 0: 0.9952, Class 1: 0.6830, Class 2: 0.3353, Class 3: 0.3942, Class 4: 0.6747, Class 5: 0.3511, Class 6: 0.9560, 

Overall Mean Dice Score: 0.4196
Overall Mean F-beta Score: 0.6118

Training Loss: 0.3137, Validation Loss: 0.3894, Validation F-beta: 0.6118
Epoch 41/4000


Training: 100%|██████████| 1440/1440 [33:02<00:00,  1.38s/it, loss=0.316]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.13s/it, loss=1.03] 


Validation Dice Score
Class 0: 0.9916, Class 1: 0.3693, Class 2: 0.0154, Class 3: 0.2740, Class 4: 0.3073, Class 5: 0.1384, Class 6: 0.2732, 
Validation F-beta Score
Class 0: 0.9950, Class 1: 0.7131, Class 2: 0.5088, Class 3: 0.5614, Class 4: 0.6912, Class 5: 0.1876, Class 6: 0.6833, 

Overall Mean Dice Score: 0.2724
Overall Mean F-beta Score: 0.5673

Training Loss: 0.3210, Validation Loss: 0.5803, Validation F-beta: 0.5673
Epoch 42/4000


Training:  34%|███▍      | 487/1440 [11:13<20:39,  1.30s/it, loss=0.609]

In [12]:
if:

SyntaxError: invalid syntax (879943805.py, line 1)

# VAl

In [None]:
from monai.data import DataLoader, Dataset, CacheDataset
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd
)
from monai.metrics import DiceMetric
from monai.networks.nets import UNETR, SwinUNETR
from monai.losses import TverskyLoss
import torch
import numpy as np
from tqdm import tqdm
import wandb
from src.dataset.dataset import make_val_dataloader

val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
img_depth = 96
img_size = 96  # Match your patch size
n_classes = 7
batch_size = 2 # 13.8GB GPU memory required for 128x128 img size
num_samples = batch_size # 한 이미지에서 뽑을 샘플 수
loader_batch = 1
lamda = 0.52

wandb.init(
    project='czii_SwinUnetR_val',  # 프로젝트 이름 설정
    name='SwinUNETR96_96_lr0.001_lambda0.52_batch2',         # 실행(run) 이름 설정
    config={
        'learning_rate': 0.001,
        'batch_size': batch_size,
        'lambda': lamda,
        'img_size': img_size,
        'device': 'cuda',
        "checkpoint_dir": "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2",
        
    }
)

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
])

val_loader = make_val_dataloader(
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    random_transforms = random_transforms, 
    batch_size = loader_batch,
    num_workers=0
)
criterion = TverskyLoss(
    alpha= 1 - lamda,  # FP에 대한 가중치
    beta=lamda,       # FN에 대한 가중치
    include_background=False,  # 배경 클래스 제외
    softmax=True
)
    
    
from monai.metrics import DiceMetric

img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

val_loss, overall_mean_fbeta_score = validate_one_epoch(
    model=model, 
    val_loader=val_loader, 
    criterion=criterion, 
    device=device, 
    epoch=0, 
    calculate_dice_interval=1
)

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
class_0_dice_score,▁
class_0_f_beta_score,▁
class_1_dice_score,▁
class_1_f_beta_score,▁
class_2_dice_score,▁
class_2_f_beta_score,▁
class_3_dice_score,▁
class_3_f_beta_score,▁
class_4_dice_score,▁
class_4_f_beta_score,▁

0,1
class_0_dice_score,0.65703
class_0_f_beta_score,0.50748
class_1_dice_score,0.53332
class_1_f_beta_score,0.64703
class_2_dice_score,0.00286
class_2_f_beta_score,0.02334
class_3_dice_score,0.23703
class_3_f_beta_score,0.23033
class_4_dice_score,0.65487
class_4_f_beta_score,0.62525


Loading dataset: 100%|██████████| 4/4 [00:06<00:00,  1.58s/it]
  checkpoint = torch.load(pretrain_path, map_location=device)
Validation: 100%|██████████| 4/4 [00:01<00:00,  2.38it/s, loss=0.865]

Validation Dice Score
Class 0: 0.6570, Class 1: 0.5333, Class 2: 0.0029, Class 3: 0.2370, 
Class 4: 0.6549, Class 5: 0.4790, Class 6: 0.4255, 
Validation F-beta Score
Class 0: 0.5075, Class 1: 0.6470, Class 2: 0.0233, Class 3: 0.2303, 
Class 4: 0.6252, Class 5: 0.5145, Class 6: 0.4720, 
Overall Mean Dice Score: 0.4659
Overall Mean F-beta Score: 0.4978






# Inference

In [None]:
from src.dataset.preprocessing import Preprocessor

In [None]:
from monai.inferers import sliding_window_inference
from monai.transforms import Compose, EnsureChannelFirstd, NormalizeIntensityd, Orientationd, GaussianSmoothd
from monai.data import DataLoader, Dataset, CacheDataset
from monai.networks.nets import SwinUNETR
from pathlib import Path
import numpy as np
import copick

import torch
print("Done.")

Done.


In [None]:
config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/test/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
preprocessor = Preprocessor(config_blob,copick_config_path=copick_config_path)
non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
    ])

Config file written to ./kaggle/working/copick.config
file length: 7


In [None]:
img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])


  checkpoint = torch.load(pretrain_path, map_location=device)


<All keys matched successfully>

In [None]:
val_loss = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=1, 
            calculate_dice_interval=0
        )

Validation:   0%|          | 0/4 [00:03<?, ?it/s, loss=0.764]


ZeroDivisionError: integer modulo by zero

In [None]:
import torch
import numpy as np
from scipy.ndimage import label, center_of_mass
import pandas as pd
from tqdm import tqdm
from monai.data import CacheDataset, DataLoader
from monai.transforms import Compose, NormalizeIntensity
import cc3d

def dict_to_df(coord_dict, experiment_name):
    all_coords = []
    all_labels = []
    
    for label, coords in coord_dict.items():
        all_coords.append(coords)
        all_labels.extend([label] * len(coords))
    
    all_coords = np.vstack(all_coords)
    df = pd.DataFrame({
        'experiment': experiment_name,
        'particle_type': all_labels,
        'x': all_coords[:, 0],
        'y': all_coords[:, 1],
        'z': all_coords[:, 2]
    })
    return df

id_to_name = {1: "apo-ferritin", 
              2: "beta-amylase",
              3: "beta-galactosidase", 
              4: "ribosome", 
              5: "thyroglobulin", 
              6: "virus-like-particle"}
BLOB_THRESHOLD = 200
CERTAINTY_THRESHOLD = 0.05

classes = [1, 2, 3, 4, 5, 6]

model.eval()
with torch.no_grad():
    location_dfs = []  # DataFrame 리스트로 초기화
    
    for vol_idx, run in enumerate(preprocessor.root.runs):
        print(f"Processing volume {vol_idx + 1}/{len(preprocessor.root.runs)}")
        tomogram = preprocessor.processing(run=run, task="task")
        task_files = [{"image": tomogram}]
        task_ds = CacheDataset(data=task_files, transform=non_random_transforms)
        task_loader = DataLoader(task_ds, batch_size=1, num_workers=0)
        
        for task_data in task_loader:
            images = task_data['image'].to("cuda")
            outputs = sliding_window_inference(
                inputs=images,
                roi_size=(96, 96, 96),  # ROI 크기
                sw_batch_size=4,
                predictor=model.forward,
                overlap=0.1,
                sw_device="cuda",
                device="cpu",
                buffer_steps=1,
                buffer_dim=-1
            )
            outputs = outputs.argmax(dim=1).squeeze(0).cpu().numpy()  # 클래스 채널 예측
            location = {}  # 좌표 저장용 딕셔너리
            for c in classes:
                cc = cc3d.connected_components(outputs == c)  # cc3d 라벨링
                stats = cc3d.statistics(cc)
                zyx = stats['centroids'][1:] * 10.012444  # 스케일 변환
                zyx_large = zyx[stats['voxel_counts'][1:] > BLOB_THRESHOLD]  # 크기 필터링
                xyz = np.ascontiguousarray(zyx_large[:, ::-1])  # 좌표 스왑 (z, y, x -> x, y, z)

                location[id_to_name[c]] = xyz  # ID 이름 매칭 저장

            # 데이터프레임 변환
            df = dict_to_df(location, run.name)
            location_dfs.append(df)  # 리스트에 추가
        
        # if vol_idx == 2:
        #     break
    
    # DataFrame 병합
    final_df = pd.concat(location_dfs, ignore_index=True)
    
    # ID 추가 및 CSV 저장
    final_df.insert(loc=0, column='id', value=np.arange(len(final_df)))
    final_df.to_csv("submission.csv", index=False)
    print("Submission saved to: submission.csv")


Processing volume 1/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing volume 2/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing volume 3/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Submission saved to: submission.csv
