In [1]:
import os
import shutil
import tempfile

import matplotlib.pyplot as plt
from tqdm import tqdm

import random
import numpy as np
import torch


from monai.losses import DiceCELoss
from monai.inferers import sliding_window_inference
from monai.transforms import (
    AsDiscrete,
    EnsureChannelFirstd,
    Compose,
    CropForegroundd,
    LoadImaged,
    Orientationd,
    RandFlipd,
    RandCropByPosNegLabeld,
    RandShiftIntensityd,
    ScaleIntensityRanged,
    Spacingd,
    RandRotate90d,
)

from monai.config import print_config
from monai.metrics import DiceMetric
from src.models.swincspunetr import SwinCSPUNETR

from monai.data import (
    DataLoader,
    CacheDataset,
    load_decathlon_datalist,
    decollate_batch,
)

# 랜덤 시드 고정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


print_config()

MONAI version: 1.4.0
Numpy version: 1.26.3
Pytorch version: 2.4.1+cu121
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 46a5272196a6c2590ca2589029eed8e4d56ff008
MONAI __file__: c:\ProgramData\anaconda3\envs\ship\Lib\site-packages\monai\__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 5.3.2
scikit-image version: 0.24.0
scipy version: 1.14.1
Pillow version: 10.2.0
Tensorboard version: NOT INSTALLED or UNKNOWN VERSION.
gdown version: 5.2.0
TorchVision version: 0.19.1+cu121
tqdm version: 4.66.5
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 6.0.0
pandas version: 2.2.3
einops version: 0.8.0
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.
clearml version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the 

In [2]:
class_info = {
    0: {"name": "background", "weight": 0},  # weight 없음
    1: {"name": "apo-ferritin", "weight": 1000},
    2: {"name": "beta-amylase", "weight": 100}, # 4130
    3: {"name": "beta-galactosidase", "weight": 1500}, #3080
    4: {"name": "ribosome", "weight": 1000},
    5: {"name": "thyroglobulin", "weight": 1500},
    6: {"name": "virus-like-particle", "weight": 1000},
}

# 가중치에 비례한 비율 계산
raw_ratios = {
    k: (v["weight"] if v["weight"] is not None else 0.01)  # 가중치 비례, None일 경우 기본값a
    for k, v in class_info.items()
}
total = sum(raw_ratios.values())
ratios = {k: v / total for k, v in raw_ratios.items()}

# 최종 합계가 1인지 확인
final_total = sum(ratios.values())
print("클래스 비율:", ratios)
print("최종 합계:", final_total)

# 비율을 리스트로 변환
ratios_list = [ratios[k] for k in sorted(ratios.keys())]
print("클래스 비율 리스트:", ratios_list)

클래스 비율: {0: 0.0, 1: 0.16393442622950818, 2: 0.01639344262295082, 3: 0.2459016393442623, 4: 0.16393442622950818, 5: 0.2459016393442623, 6: 0.16393442622950818}
최종 합계: 1.0
클래스 비율 리스트: [0.0, 0.16393442622950818, 0.01639344262295082, 0.2459016393442623, 0.16393442622950818, 0.2459016393442623, 0.16393442622950818]


# 모델 설정

In [3]:
from src.dataset.dataset import create_dataloaders
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd
)
from monai.transforms import CastToTyped
import numpy as np

train_img_dir = "./datasets/train/images"
train_label_dir = "./datasets/train/labels"
val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
# DATA CONFIG
img_size =  96 # Match your patch size
img_depth = img_size
n_classes = 7
batch_size = 1 # 13.8GB GPU memory required for 128x128 img size
num_samples = batch_size # 한 이미지에서 뽑을 샘플 수
loader_batch = 1
num_repeat = 60
accumulation_steps = 16

# MODEL CONFIG
feature_size = 48
use_checkpoint = True
use_v2 = True
drop_rate= 0.25
attn_drop_rate = 0.25
num_bottleneck = 2

# TRAINING CONFIG
num_epochs = 4000

lr = 0.0001

# LOSS
warmup_epochs = 5
schedule_epochs = 10
warmup_ce = 8.0
warmup_tv = 0.1
warmup_hd = 0.1
ce_end = 0.2
tv_end = 0.4
hd_end = 0.4
include_background = False
reduction = "mean"
softmax = True
tversky_alpha = 0.52  # Tversky loss의 alpha 값 = lamda
tversky_beta = 1.0 - tversky_alpha
tversky_smooth = 1e-5
tv_boost = 1.2
hd_boost = 1.2


class_weights = None
# class_weights = torch.tensor([0.0001, 1, 0.001, 1.1, 1, 1.1, 1], dtype=torch.float32)  # 클래스별 가중치

# INIT
start_epoch = 0
best_val_loss = float('inf')
best_val_fbeta_score = 0

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    CastToTyped(keys=["image"], dtype=np.float16),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=1),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=2),
])

In [4]:
train_loader, val_loader = None, None
train_loader, val_loader = create_dataloaders(
    train_img_dir, 
    train_label_dir, 
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    random_transforms = random_transforms, 
    batch_size = loader_batch,
    num_workers=0,train_num_repeat=num_repeat)

Loading dataset: 100%|██████████| 24/24 [00:37<00:00,  1.57s/it]
Loading dataset: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it]


https://monai.io/model-zoo.html

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from monai.losses import TverskyLoss, LogHausdorffDTLoss

class AdaptiveCombinedLoss(nn.Module):
    """
    요구사항:
    1) Warm-up 단계 (0 ~ warmup_epochs):
       - CE 위주로 학습(예: CE=1.0), Tversky/HD는 작게(예: 0.1)
    2) Warm-up 종료 시점:
       - 해당 시점 손실값 참고해 Kendall(\log\sigma) 초기값/가중치 보정
    3) Kendall(2018) 식:
       - (1/(2σ_i^2))*L_i + log(σ_i^2)
       - Tversky/HD는 더 강조(상수 w_i를 크게) + CE 최소 한도 유지
    4) Warm-up 이후 스케줄링:
       - CE 점진 감소(1.0→0.3), Tversky/HD 점진 증가(0.1→1.0)
       - 필요시 Tversky/HD를 추가로 boost
    """

    def __init__(
        self,
        # 1) Warm-up & 스케줄
        warmup_epochs: int = 5,
        schedule_epochs: int = 10,

        # 2) Warm-up 시 고정 가중치
        warmup_ce=1.0,
        warmup_tv=0.1,
        warmup_hd=0.1,

        # 3) 스케줄에서의 최종 가중치
        ce_end=0.3,
        tv_end=1.0,
        hd_end=1.0,

        # 4) Tversky/HD를 추가로 더 키우고 싶다면 boost 사용
        #    (예: 2.0이면 TV/HD가 2배 더 강조)
        tv_boost=1.2,
        hd_boost=1.2,

        # MONAI Loss 설정
        include_background=True,
        reduction="mean",
        softmax=True,

        # Tversky 파라미터
        tversky_alpha=0.52,
        tversky_beta=None,  # None이면 1 - alpha
        tversky_smooth=1e-5,

        # Kendall(2018) 식 사용 여부
        use_uncertainty=True,
    ):
        super().__init__()
        self.warmup_epochs = warmup_epochs
        self.schedule_epochs = schedule_epochs

        # Warm-up 고정 가중치
        self.warmup_ce = warmup_ce
        self.warmup_tv = warmup_tv
        self.warmup_hd = warmup_hd

        # Warm-up 이후 스케줄 start/end
        self.ce_start, self.ce_end = warmup_ce, ce_end
        self.tv_start, self.tv_end = warmup_tv, tv_end
        self.hd_start, self.hd_end = warmup_hd, hd_end

        # Tversky/HD 추가 배율(강조)
        self.tv_boost = tv_boost
        self.hd_boost = hd_boost

        self.use_uncertainty = use_uncertainty

        if tversky_beta is None:
            tversky_beta = 1.0 - tversky_alpha

        # 배경 채널 무시 설정
        if not include_background:
            ignore_index = 0
        else:
            ignore_index = -100

        # (1) 개별 손실 정의
        self.ce_loss = nn.CrossEntropyLoss()
        self.tversky_loss = TverskyLoss(
            alpha=tversky_alpha,
            beta=tversky_beta,
            smooth_nr=tversky_smooth,
            smooth_dr=tversky_smooth,
            softmax=softmax,
            reduction=reduction,
            include_background=include_background
        )
        self.haus_loss = LogHausdorffDTLoss(
            softmax=softmax,
            reduction=reduction,
            include_background=include_background
        )

        # (2) Kendall을 위한 log_sigma 파라미터
        if self.use_uncertainty:
            self.log_sigma_ce   = nn.Parameter(torch.zeros(1))
            self.log_sigma_tv   = nn.Parameter(torch.zeros(1))
            self.log_sigma_haus = nn.Parameter(torch.zeros(1))

        # (3) 현재 epoch, warm-up 손실 기록
        self.current_epoch = 0
        self.last_warmup_ce   = 0.0
        self.last_warmup_tv   = 0.0
        self.last_warmup_haus = 0.0

    def set_epoch(self, epoch: int):
        """학습 루프에서 매 epoch마다 호출하여 현재 epoch 갱신."""
        self.current_epoch = epoch

    def record_warmup_losses(self, ce_val, tv_val, hd_val):
        """
        Warm-up 단계 손실값을 기록 -> 이후 log_sigma 초기값 조정 등 활용 가능
        """
        self.last_warmup_ce   = ce_val
        self.last_warmup_tv   = tv_val
        self.last_warmup_haus = hd_val

    def end_of_warmup_init(self):
        """
        Warm-up → Kendall 전환 시점에서,
        warm-up 손실값 등을 참고해 log_sigma 등 초기 설정
        """
        if self.use_uncertainty:
            with torch.no_grad():
                # 예시: warm-up에서 CE가 안정적, TV/HD가 크면
                # TV/HD 강조 위해 log_sigma를 음수로 조정
                self.log_sigma_ce[0]   = 0.0   # CE
                self.log_sigma_tv[0]   = -0.5  # TV
                self.log_sigma_haus[0] = -0.5  # HD

    def forward(self, preds, targets):
        """
        preds: (B, C, D, H, W) - logit
        targets: (B, D, H, W) - 정수 라벨
        """
        # 1) 개별 손실 계산
        loss_ce   = self.ce_loss(preds, targets)
        loss_tv   = self.tversky_loss(preds, targets)
        loss_haus = self.haus_loss(preds, targets)

        # 2) Warm-up 단계
        if self.current_epoch < self.warmup_epochs:
            total_loss = (
                self.warmup_ce * loss_ce
                + self.warmup_tv * loss_tv
                + self.warmup_hd * loss_haus
            )
        else:
            # Warm-up 이후 → Kendall + 스케줄
            # (a) ratio 계산
            progress = self.current_epoch - self.warmup_epochs
            ratio = float(progress) / float(self.schedule_epochs)
            ratio = max(0.0, min(1.0, ratio))  # 0 ~ 1

            # (b) CE, TV, HAUS 선형 보간 가중치
            w_ce   = self.ce_start + (self.ce_end - self.ce_start)*ratio
            w_tv   = self.tv_start + (self.tv_end - self.tv_start)*ratio
            w_haus = self.hd_start + (self.hd_end - self.hd_start)*ratio

            

            if self.use_uncertainty:
                # (c) Kendall 식
                sigma_ce   = torch.exp(self.log_sigma_ce)
                sigma_tv   = torch.exp(self.log_sigma_tv)
                sigma_haus = torch.exp(self.log_sigma_haus)

                ce_term   = (1.0/(2.0*sigma_ce**2))   * loss_ce   + torch.log(sigma_ce**2)
                tv_term   = (1.0/(2.0*sigma_tv**2))   * loss_tv   + torch.log(sigma_tv**2)
                haus_term = (1.0/(2.0*sigma_haus**2)) * loss_haus + torch.log(sigma_haus**2)

                tv_term   = tv_term   * self.tv_boost
                haus_term = haus_term * self.hd_boost
                
                # (d) 최종 합산 (TV/HD > CE가 되도록 w_tv, w_haus를 크게)
                total_loss = w_ce*ce_term + w_tv*tv_term + w_haus*haus_term
            else:
                # Kendall 미사용 시 단순 가중합
                total_loss = w_ce*loss_ce + w_tv*loss_tv + w_haus*loss_haus

        return total_loss

  """


In [6]:
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import torch
from pathlib import Path
from monai.metrics import DiceMetric

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SwinCSPUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=feature_size,
    use_checkpoint=True,
    drop_rate = drop_rate,
    attn_drop_rate = attn_drop_rate,
    use_v2 = use_v2,
    n=num_bottleneck
).to(device)
# Pretrained weights 불러오기
# if use_checkpoint:
#     pretrain_path = "./swin_unetr_btcv_segmentation/models/model.pt"
#     weight = torch.load(pretrain_path, map_location=device)

#     # 출력 레이어의 키를 제외한 나머지 가중치만 로드
#     filtered_weights = {k: v for k, v in weight.items() if "out.conv.conv" not in k}

#     # strict=False로 로드하여 불일치하는 부분 무시
#     model.load_state_dict(filtered_weights, strict=False)
#     print("Filtered weights loaded successfully. Output layer will be trained from scratch.")


# Loss function
criterion = AdaptiveCombinedLoss(
    warmup_epochs=warmup_epochs,
    schedule_epochs=schedule_epochs,
    warmup_ce=warmup_ce,
    warmup_tv=warmup_tv,
    warmup_hd=warmup_hd,
    ce_end=ce_end,
    tv_end=tv_end,
    hd_end=hd_end,
    include_background=include_background,
    reduction=reduction,
    tversky_alpha=tversky_alpha,
    tversky_beta=tversky_beta,
    tversky_smooth=tversky_smooth,
    tv_boost=tv_boost,
    hd_boost=hd_boost,
)

pretrain_str = "yes" if use_checkpoint else "no"
weight_str = "weighted" if class_weights is not None else ""
if tv_boost == hd_boost == 1.0:
    boost_str = f"b{tv_boost:.2f}"
else:
    boost_str = f"tvb{tv_boost:.2f}_hb{hd_boost:.2f}"
# 체크포인트 디렉토리 및 파일 설정
checkpoint_base_dir = Path("./model_checkpoints")
folder_name = f"SwinCSPUNETr_CETVHF_{weight_str}_f{feature_size}s{img_size}_numb{num_bottleneck}_lr{lr:.0e}_T-a{tversky_alpha:.2f}b{tversky_beta:.2f}Wc{warmup_ce}_Wt{warmup_tv}Wh{warmup_hd}_We{warmup_epochs}_Se{schedule_epochs}_{boost_str}_b{batch_size}_r{num_repeat}"
checkpoint_dir = checkpoint_base_dir / folder_name
optimizer = optim.AdamW(list(model.parameters()) + list(criterion.parameters()), lr=lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
# 체크포인트 디렉토리 생성
checkpoint_dir.mkdir(parents=True, exist_ok=True)

if checkpoint_dir.exists():
    best_model_path = checkpoint_dir / 'best_model.pt'
    if best_model_path.exists():
        print(f"기존 best model 발견: {best_model_path}")
        try:
            checkpoint = torch.load(best_model_path, map_location=device)
            # 체크포인트 내부 키 검증
            required_keys = ['model_state_dict', 'optimizer_state_dict', 'epoch', 'best_val_loss']
            if all(k in checkpoint for k in required_keys):
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                start_epoch = checkpoint['epoch']
                best_val_loss = checkpoint['best_val_loss']
                print("기존 학습된 가중치를 성공적으로 로드했습니다.")
                checkpoint= None
            else:
                raise ValueError("체크포인트 파일에 필요한 key가 없습니다.")
        except Exception as e:
            print(f"체크포인트 파일을 로드하는 중 오류 발생: {e}")



기존 best model 발견: model_checkpoints\SwinCSPUNETr_CETVHF__f48s96_numb2_lr1e-04_T-a0.52b0.48Wc8.0_Wt0.1Wh0.1_We5_Se10_tvb1.20_hb1.20_b1_r60\best_model.pt


  checkpoint = torch.load(best_model_path, map_location=device)


기존 학습된 가중치를 성공적으로 로드했습니다.


In [7]:
batch = next(iter(val_loader))
images, labels = batch["image"], batch["label"]
print(images.shape, labels.shape)

torch.Size([1, 1, 96, 96, 96]) torch.Size([1, 1, 96, 96, 96])


In [8]:
torch.backends.cudnn.benchmark = True

In [9]:
import wandb
from datetime import datetime

current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
run_name = folder_name

# wandb 초기화
wandb.init(
    project='czii_SwinUnetR',  # 프로젝트 이름 설정
    name=run_name,         # 실행(run) 이름 설정
    config={
        'num_epochs': num_epochs,
        'learning_rate': lr,
        'batch_size': batch_size,
        'lambda': tversky_alpha,
        "cross_entropy_weight": warmup_ce,
        "tversky_weight": warmup_tv,
        "hausdorff_weight": warmup_hd,
        "cross_entropy_weight_end": ce_end,
        "tversky_weight_end": tv_end,
        "hausdorff_weight_end": hd_end,
        "tversky_weight_boost": tv_boost,
        "hausdorff_weight_boost": hd_boost,
        "include_background": include_background,
        "wramup_epochs": warmup_epochs,
        "schedule_epochs": schedule_epochs,
        "include_background": include_background,
        "reduction": reduction,
        'feature_size': feature_size,
        'img_size': img_size,
        'sampling_ratio': ratios_list,
        'device': device.type,
        "checkpoint_dir": str(folder_name),
        "class_weights": class_weights.tolist() if class_weights is not None else None,
        "use_checkpoint": use_checkpoint,
        "drop_rate": drop_rate,
        "attn_drop_rate": attn_drop_rate,
        "use_v2": use_v2,
        "accumulation_steps": accumulation_steps,
        "num_repeat": num_repeat,
        
        # 필요한 하이퍼파라미터 추가
    }
)
# 모델을 wandb에 연결
wandb.watch(model, log='all')

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mwoow070840[0m ([33mwaooang[0m). Use [1m`wandb login --relogin`[0m to force relogin


[]

# 학습

In [10]:
from monai.metrics import DiceMetric
    
def processing(batch_data, model, criterion, device):
    images = batch_data['image'].to(device)  # Input 이미지 (B, 1, 96, 96, 96)
    labels = batch_data['label'].to(device)  # 라벨 (B, 96, 96, 96)

    labels = labels.squeeze(1)  # (B, 1, 96, 96, 96) → (B, 96, 96, 96)
    labels = labels.long()  # 라벨을 정수형으로 변환

    # 원핫 인코딩 (B, H, W, D) → (B, num_classes, H, W, D)
    
    labels_onehot = torch.nn.functional.one_hot(labels, num_classes=n_classes)
    labels_onehot = labels_onehot.permute(0, 4, 1, 2, 3).float()  # (B, num_classes, H, W, D)

    # 모델 예측
    outputs = model(images)  # outputs: (B, num_classes, H, W, D)

    # Loss 계산
    # loss = criterion(outputs, labels_onehot)
    loss = criterion(outputs, labels_onehot)
    return loss, outputs, labels, outputs.argmax(dim=1)

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, accumulation_steps=4):
    model.train()
    epoch_loss = 0
    optimizer.zero_grad()  # 그래디언트 초기화
    with tqdm(train_loader, desc='Training') as pbar:
        for i, batch_data in enumerate(pbar):
            # 손실 계산
            loss, _, _, _ = processing(batch_data, model, criterion, device)

            # 그래디언트를 계산하고 누적
            loss = loss / accumulation_steps  # 그래디언트 누적을 위한 스케일링
            loss.backward()  # 그래디언트 계산 및 누적
            
            # 그래디언트 업데이트 (accumulation_steps마다 한 번)
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                optimizer.step()  # 파라미터 업데이트
                optimizer.zero_grad()  # 누적된 그래디언트 초기화
            
            # 손실값 누적 (스케일링 복구)
            epoch_loss += loss.item() * accumulation_steps  # 실제 손실값 반영
            pbar.set_postfix(loss=loss.item() * accumulation_steps)  # 실제 손실값 출력
    avg_loss = epoch_loss / len(train_loader)
    wandb.log({'train_epoch_loss': avg_loss, 'epoch': epoch + 1})
    return avg_loss


def validate_one_epoch(model, val_loader, criterion, device, epoch, calculate_dice_interval):
    model.eval()
    val_loss = 0
    
    class_dice_scores = {i: [] for i in range(n_classes)}
    class_f_beta_scores = {i: [] for i in range(n_classes)}
    with torch.no_grad():
        with tqdm(val_loader, desc='Validation') as pbar:
            for batch_data in pbar:
                loss, _, labels, preds = processing(batch_data, model, criterion, device)
                val_loss += loss.item()
                pbar.set_postfix(loss=loss.item())

                # 각 클래스별 Dice 점수 계산
                if epoch % calculate_dice_interval == 0:
                    for i in range(n_classes):
                        pred_i = (preds == i)
                        label_i = (labels == i)
                        dice_score = (2.0 * torch.sum(pred_i & label_i)) / (torch.sum(pred_i) + torch.sum(label_i) + 1e-8)
                        class_dice_scores[i].append(dice_score.item())
                        precision = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(pred_i) + 1e-8)
                        recall = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(label_i) + 1e-8)
                        f_beta_score = (1 + 4**2) * (precision * recall) / (4**2 * precision + recall + 1e-8)
                        class_f_beta_scores[i].append(f_beta_score.item())

    avg_loss = val_loss / len(val_loader)
    # 에포크별 평균 손실 로깅
    wandb.log({'val_epoch_loss': avg_loss, 'epoch': epoch + 1})
    
    # 각 클래스별 평균 Dice 점수 출력
    if epoch % calculate_dice_interval == 0:
        print("Validation Dice Score")
        all_classes_dice_scores = []
        for i in range(n_classes):
            mean_dice = np.mean(class_dice_scores[i])
            wandb.log({f'class_{i}_dice_score': mean_dice, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_dice:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_dice_scores.append(mean_dice)
            
        print()
    if epoch % calculate_dice_interval == 0:
        print("Validation F-beta Score")
        all_classes_fbeta_scores = []
        for i in range(n_classes):
            mean_fbeta = np.mean(class_f_beta_scores[i])
            wandb.log({f'class_{i}_f_beta_score': mean_fbeta, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_fbeta:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_fbeta_scores.append(mean_fbeta)
        print()
        overall_mean_dice = np.mean(all_classes_dice_scores)
        overall_mean_fbeta = np.mean(all_classes_fbeta_scores)
        wandb.log({'overall_mean_f_beta_score': overall_mean_fbeta, 'overall_mean_dice_score': overall_mean_dice, 'epoch': epoch + 1})
        print(f"\nOverall Mean Dice Score: {overall_mean_dice:.4f}\nOverall Mean F-beta Score: {overall_mean_fbeta:.4f}\n")

    if overall_mean_fbeta is None:
        overall_mean_fbeta = 0

    return val_loss / len(val_loader), overall_mean_fbeta

def train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, 
    device, start_epoch, best_val_loss, best_val_fbeta_score, calculate_dice_interval=1,
    accumulation_steps=4
):
    """
    모델을 학습하고 검증하는 함수
    Args:
        model: 학습할 모델
        train_loader: 학습 데이터 로더
        val_loader: 검증 데이터 로더
        criterion: 손실 함수
        optimizer: 최적화 알고리즘
        num_epochs: 총 학습 epoch 수
        patience: early stopping 기준
        device: GPU/CPU 장치
        start_epoch: 시작 epoch
        best_val_loss: 이전 최적 validation loss
        best_val_fbeta_score: 이전 최적 validation f-beta score
        calculate_dice_interval: Dice 점수 계산 주기
    """
    epochs_no_improve = 0

    for epoch in range(start_epoch, num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")

        # Train One Epoch
        train_loss = train_one_epoch(
            model=model, 
            train_loader=train_loader, 
            criterion=criterion, 
            optimizer=optimizer, 
            device=device,
            epoch=epoch,
            accumulation_steps= accumulation_steps
        )
        
        scheduler.step(train_loss)
        # Validate One Epoch
        val_loss, overall_mean_fbeta_score = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=epoch, 
            calculate_dice_interval=calculate_dice_interval
        )

        
        print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation F-beta: {overall_mean_fbeta_score:.4f}")

        if val_loss < best_val_loss and overall_mean_fbeta_score > best_val_fbeta_score:
            best_val_loss = val_loss
            best_val_fbeta_score = overall_mean_fbeta_score
            epochs_no_improve = 0
            checkpoint_path = os.path.join(checkpoint_dir, 'best_model.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            print(f"========================================================")
            print(f"SUPER Best model saved. Loss:{best_val_loss:.4f}, Score:{best_val_fbeta_score:.4f}")
            print(f"========================================================")

        # Early stopping 조건 체크
        if val_loss >= best_val_loss and overall_mean_fbeta_score <= best_val_fbeta_score:
            epochs_no_improve += 1
        else:
            epochs_no_improve = 0

        if epochs_no_improve >= patience:
            print("Early stopping")
            checkpoint_path = os.path.join(checkpoint_dir, 'last.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            break
        # if epochs_no_improve%6 == 0:
        #     # 손실이 개선되지 않았으므로 lambda 감소
        #     new_lamda = max(criterion.lamda - 0.01, 0.1)  # 최소값은 0.1로 설정
        #     criterion.set_lamda(new_lamda)
        #     print(f"Validation loss did not improve. Reducing lambda to {new_lamda:.4f}")

    wandb.finish()


In [None]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    patience=10,
    device=device,
    start_epoch=start_epoch,
    best_val_loss=best_val_loss,
    best_val_fbeta_score=best_val_fbeta_score,
    calculate_dice_interval=1,
    accumulation_steps = accumulation_steps
     ) 

Epoch 6/4000


  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]
Training: 100%|██████████| 1440/1440 [25:35<00:00,  1.07s/it, loss=0.361]
Validation: 100%|██████████| 12/12 [00:09<00:00,  1.21it/s, loss=1.4]  


Validation Dice Score
Class 0: 0.9836, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0023, Class 4: 0.2632, Class 5: 0.0004, Class 6: 0.0037, 
Validation F-beta Score
Class 0: 0.9899, Class 1: 0.5000, Class 2: 0.6667, Class 3: 0.3347, Class 4: 0.2266, Class 5: 0.4169, Class 6: 0.5020, 

Overall Mean Dice Score: 0.0539
Overall Mean F-beta Score: 0.3960

Training Loss: 0.7254, Validation Loss: 1.0338, Validation F-beta: 0.3960
Epoch 7/4000


Training: 100%|██████████| 1440/1440 [25:49<00:00,  1.08s/it, loss=0.568]
Validation: 100%|██████████| 12/12 [00:10<00:00,  1.10it/s, loss=0.519]


Validation Dice Score
Class 0: 0.9903, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.1181, Class 5: 0.0045, Class 6: 0.0000, 
Validation F-beta Score
Class 0: 0.9972, Class 1: 0.5833, Class 2: 0.4167, Class 3: 0.2500, Class 4: 0.2484, Class 5: 0.0857, Class 6: 0.4167, 

Overall Mean Dice Score: 0.0245
Overall Mean F-beta Score: 0.3168

Training Loss: 0.7144, Validation Loss: 0.7634, Validation F-beta: 0.3168
SUPER Best model saved. Loss:0.7634, Score:0.3168
Epoch 8/4000


Training: 100%|██████████| 1440/1440 [25:59<00:00,  1.08s/it, loss=0.975]
Validation: 100%|██████████| 12/12 [00:10<00:00,  1.20it/s, loss=2.02] 


Validation Dice Score
Class 0: 0.9875, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.1575, Class 5: 0.0263, Class 6: 0.0000, 
Validation F-beta Score
Class 0: 0.9975, Class 1: 0.3333, Class 2: 0.4167, Class 3: 0.6667, Class 4: 0.3605, Class 5: 0.2653, Class 6: 0.5833, 

Overall Mean Dice Score: 0.0367
Overall Mean F-beta Score: 0.4418

Training Loss: 0.6975, Validation Loss: 0.9345, Validation F-beta: 0.4418
Epoch 9/4000


Training: 100%|██████████| 1440/1440 [26:28<00:00,  1.10s/it, loss=0.66] 
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.02it/s, loss=0.623]


Validation Dice Score
Class 0: 0.9868, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.2998, Class 5: 0.0395, Class 6: 0.0157, 
Validation F-beta Score
Class 0: 0.9977, Class 1: 0.3333, Class 2: 0.5833, Class 3: 0.3333, Class 4: 0.2998, Class 5: 0.1072, Class 6: 0.3424, 

Overall Mean Dice Score: 0.0710
Overall Mean F-beta Score: 0.2832

Training Loss: 0.6728, Validation Loss: 0.9152, Validation F-beta: 0.2832
Epoch 10/4000


Training: 100%|██████████| 1440/1440 [27:14<00:00,  1.14s/it, loss=0.557]
Validation: 100%|██████████| 12/12 [00:10<00:00,  1.10it/s, loss=0.242]


Validation Dice Score
Class 0: 0.9885, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.2903, Class 5: 0.0000, Class 6: 0.0599, 
Validation F-beta Score
Class 0: 0.9926, Class 1: 0.5833, Class 2: 0.3333, Class 3: 0.2500, Class 4: 0.6125, Class 5: 0.1667, Class 6: 0.4528, 

Overall Mean Dice Score: 0.0700
Overall Mean F-beta Score: 0.4130

Training Loss: 0.6776, Validation Loss: 0.7186, Validation F-beta: 0.4130
SUPER Best model saved. Loss:0.7186, Score:0.4130
Epoch 11/4000


Training: 100%|██████████| 1440/1440 [27:40<00:00,  1.15s/it, loss=0.738]
Validation: 100%|██████████| 12/12 [00:10<00:00,  1.12it/s, loss=0.683]


Validation Dice Score
Class 0: 0.9876, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.1639, Class 5: 0.0015, Class 6: 0.0684, 
Validation F-beta Score
Class 0: 0.9979, Class 1: 0.5833, Class 2: 0.2500, Class 3: 0.6667, Class 4: 0.2712, Class 5: 0.0842, Class 6: 0.4617, 

Overall Mean Dice Score: 0.0467
Overall Mean F-beta Score: 0.4134

Training Loss: 0.6455, Validation Loss: 0.8276, Validation F-beta: 0.4134
Epoch 12/4000


Training: 100%|██████████| 1440/1440 [27:30<00:00,  1.15s/it, loss=0.44] 
Validation: 100%|██████████| 12/12 [00:10<00:00,  1.12it/s, loss=0.39]


Validation Dice Score
Class 0: 0.9913, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.0457, Class 5: 0.1325, Class 6: 0.0539, 
Validation F-beta Score
Class 0: 0.9958, Class 1: 0.3333, Class 2: 0.7500, Class 3: 0.3333, Class 4: 0.3943, Class 5: 0.1700, Class 6: 0.5358, 

Overall Mean Dice Score: 0.0464
Overall Mean F-beta Score: 0.3534

Training Loss: 0.6345, Validation Loss: 0.7106, Validation F-beta: 0.3534
Epoch 13/4000


Training: 100%|██████████| 1440/1440 [27:38<00:00,  1.15s/it, loss=0.518]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.00s/it, loss=0.738]


Validation Dice Score
Class 0: 0.9913, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0108, Class 4: 0.0859, Class 5: 0.0939, Class 6: 0.1312, 
Validation F-beta Score
Class 0: 0.9946, Class 1: 0.1667, Class 2: 0.5833, Class 3: 0.4228, Class 4: 0.3202, Class 5: 0.1480, Class 6: 0.2822, 

Overall Mean Dice Score: 0.0644
Overall Mean F-beta Score: 0.2680

Training Loss: 0.6142, Validation Loss: 0.7319, Validation F-beta: 0.2680
Epoch 14/4000


Training: 100%|██████████| 1440/1440 [28:00<00:00,  1.17s/it, loss=0.566]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.01it/s, loss=0.401]


Validation Dice Score
Class 0: 0.9902, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0419, Class 4: 0.2270, Class 5: 0.2056, Class 6: 0.3802, 
Validation F-beta Score
Class 0: 0.9979, Class 1: 0.5000, Class 2: 0.7500, Class 3: 0.4434, Class 4: 0.5843, Class 5: 0.1455, Class 6: 0.3980, 

Overall Mean Dice Score: 0.1710
Overall Mean F-beta Score: 0.4143

Training Loss: 0.6024, Validation Loss: 0.6439, Validation F-beta: 0.4143
SUPER Best model saved. Loss:0.6439, Score:0.4143
Epoch 15/4000


Training: 100%|██████████| 1440/1440 [27:51<00:00,  1.16s/it, loss=0.623]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.11s/it, loss=0.757]


Validation Dice Score
Class 0: 0.9883, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0365, Class 4: 0.3939, Class 5: 0.1516, Class 6: 0.1733, 
Validation F-beta Score
Class 0: 0.9975, Class 1: 0.2500, Class 2: 0.5000, Class 3: 0.1062, Class 4: 0.3851, Class 5: 0.1925, Class 6: 0.3750, 

Overall Mean Dice Score: 0.1511
Overall Mean F-beta Score: 0.2618

Training Loss: 0.6056, Validation Loss: 0.8331, Validation F-beta: 0.2618
Epoch 16/4000


Training: 100%|██████████| 1440/1440 [28:10<00:00,  1.17s/it, loss=0.414]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.04it/s, loss=0.225]


Validation Dice Score
Class 0: 0.9920, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0042, Class 4: 0.0514, Class 5: 0.0010, Class 6: 0.3009, 
Validation F-beta Score
Class 0: 0.9986, Class 1: 0.2500, Class 2: 0.5000, Class 3: 0.1689, Class 4: 0.3643, Class 5: 0.1672, Class 6: 0.4971, 

Overall Mean Dice Score: 0.0715
Overall Mean F-beta Score: 0.2895

Training Loss: 0.5730, Validation Loss: 0.6257, Validation F-beta: 0.2895
Epoch 17/4000


Training: 100%|██████████| 1440/1440 [28:11<00:00,  1.17s/it, loss=1.07] 
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.01it/s, loss=0.534]


Validation Dice Score
Class 0: 0.9912, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0073, Class 4: 0.0694, Class 5: 0.0792, Class 6: 0.2321, 
Validation F-beta Score
Class 0: 0.9985, Class 1: 0.3333, Class 2: 0.3333, Class 3: 0.3373, Class 4: 0.3763, Class 5: 0.0544, Class 6: 0.7064, 

Overall Mean Dice Score: 0.0776
Overall Mean F-beta Score: 0.3615

Training Loss: 0.6013, Validation Loss: 0.7240, Validation F-beta: 0.3615
Epoch 18/4000


Training: 100%|██████████| 1440/1440 [28:25<00:00,  1.18s/it, loss=0.876]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.03s/it, loss=0.392]


Validation Dice Score
Class 0: 0.9910, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.2767, Class 5: 0.1557, Class 6: 0.1913, 
Validation F-beta Score
Class 0: 0.9974, Class 1: 0.5000, Class 2: 0.3333, Class 3: 0.3333, Class 4: 0.3779, Class 5: 0.1099, Class 6: 0.4947, 

Overall Mean Dice Score: 0.1248
Overall Mean F-beta Score: 0.3632

Training Loss: 0.5819, Validation Loss: 0.6424, Validation F-beta: 0.3632
Epoch 19/4000


Training: 100%|██████████| 1440/1440 [28:33<00:00,  1.19s/it, loss=1.03] 
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.09it/s, loss=2.74] 


Validation Dice Score
Class 0: 0.9900, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.2468, Class 5: 0.0921, Class 6: 0.0647, 
Validation F-beta Score
Class 0: 0.9963, Class 1: 0.6667, Class 2: 0.5833, Class 3: 0.3333, Class 4: 0.3845, Class 5: 0.1488, Class 6: 0.4716, 

Overall Mean Dice Score: 0.0807
Overall Mean F-beta Score: 0.4010

Training Loss: 0.5673, Validation Loss: 0.7504, Validation F-beta: 0.4010
Epoch 20/4000


Training: 100%|██████████| 1440/1440 [28:27<00:00,  1.19s/it, loss=0.678]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.02it/s, loss=1.21] 


Validation Dice Score
Class 0: 0.9906, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0000, Class 4: 0.1431, Class 5: 0.0793, Class 6: 0.1922, 
Validation F-beta Score
Class 0: 0.9973, Class 1: 0.4167, Class 2: 0.3333, Class 3: 0.3333, Class 4: 0.4335, Class 5: 0.2151, Class 6: 0.6023, 

Overall Mean Dice Score: 0.0829
Overall Mean F-beta Score: 0.4002

Training Loss: 0.5663, Validation Loss: 0.7468, Validation F-beta: 0.4002
Epoch 21/4000


Training: 100%|██████████| 1440/1440 [28:44<00:00,  1.20s/it, loss=0.679]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.07it/s, loss=0.549]


Validation Dice Score
Class 0: 0.9928, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0038, Class 4: 0.3334, Class 5: 0.0702, Class 6: 0.1366, 
Validation F-beta Score
Class 0: 0.9972, Class 1: 0.5833, Class 2: 0.5000, Class 3: 0.2520, Class 4: 0.5599, Class 5: 0.1313, Class 6: 0.6944, 

Overall Mean Dice Score: 0.1088
Overall Mean F-beta Score: 0.4442

Training Loss: 0.5477, Validation Loss: 0.5651, Validation F-beta: 0.4442
SUPER Best model saved. Loss:0.5651, Score:0.4442
Epoch 22/4000


Training: 100%|██████████| 1440/1440 [28:32<00:00,  1.19s/it, loss=0.443]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.08it/s, loss=0.654]


Validation Dice Score
Class 0: 0.9906, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0093, Class 4: 0.2193, Class 5: 0.1246, Class 6: 0.2828, 
Validation F-beta Score
Class 0: 0.9976, Class 1: 0.5833, Class 2: 0.5833, Class 3: 0.3385, Class 4: 0.4048, Class 5: 0.2526, Class 6: 0.7519, 

Overall Mean Dice Score: 0.1272
Overall Mean F-beta Score: 0.4663

Training Loss: 0.5486, Validation Loss: 0.5966, Validation F-beta: 0.4663
Epoch 23/4000


Training: 100%|██████████| 1440/1440 [28:29<00:00,  1.19s/it, loss=0.654]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.10s/it, loss=0.811]


Validation Dice Score
Class 0: 0.9902, Class 1: 0.0019, Class 2: 0.0000, Class 3: 0.0037, Class 4: 0.2141, Class 5: 0.0837, Class 6: 0.3437, 
Validation F-beta Score
Class 0: 0.9960, Class 1: 0.1677, Class 2: 0.5000, Class 3: 0.0020, Class 4: 0.4086, Class 5: 0.1393, Class 6: 0.5393, 

Overall Mean Dice Score: 0.1294
Overall Mean F-beta Score: 0.2514

Training Loss: 0.5483, Validation Loss: 0.7297, Validation F-beta: 0.2514
Epoch 24/4000


Training: 100%|██████████| 1440/1440 [28:38<00:00,  1.19s/it, loss=0.746]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.01it/s, loss=1.43] 


Validation Dice Score
Class 0: 0.9907, Class 1: 0.0000, Class 2: 0.0000, Class 3: 0.0016, Class 4: 0.3484, Class 5: 0.0757, Class 6: 0.2266, 
Validation F-beta Score
Class 0: 0.9941, Class 1: 0.1667, Class 2: 0.6667, Class 3: 0.3344, Class 4: 0.4911, Class 5: 0.0721, Class 6: 0.5716, 

Overall Mean Dice Score: 0.1305
Overall Mean F-beta Score: 0.3272

Training Loss: 0.5350, Validation Loss: 0.6654, Validation F-beta: 0.3272
Epoch 25/4000


Training: 100%|██████████| 1440/1440 [28:48<00:00,  1.20s/it, loss=0.705]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it, loss=0.882]


Validation Dice Score
Class 0: 0.9925, Class 1: 0.0040, Class 2: 0.0000, Class 3: 0.0042, Class 4: 0.3825, Class 5: 0.3149, Class 6: 0.2792, 
Validation F-beta Score
Class 0: 0.9941, Class 1: 0.3355, Class 2: 0.5000, Class 3: 0.2523, Class 4: 0.6571, Class 5: 0.2782, Class 6: 0.6838, 

Overall Mean Dice Score: 0.1969
Overall Mean F-beta Score: 0.4414

Training Loss: 0.5490, Validation Loss: 0.5798, Validation F-beta: 0.4414
Epoch 26/4000


Training: 100%|██████████| 1440/1440 [28:46<00:00,  1.20s/it, loss=0.223]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.16s/it, loss=0.378]


Validation Dice Score
Class 0: 0.9884, Class 1: 0.0285, Class 2: 0.0000, Class 3: 0.0309, Class 4: 0.3005, Class 5: 0.0312, Class 6: 0.4362, 
Validation F-beta Score
Class 0: 0.9947, Class 1: 0.1882, Class 2: 0.4167, Class 3: 0.2686, Class 4: 0.4230, Class 5: 0.1047, Class 6: 0.4679, 

Overall Mean Dice Score: 0.1655
Overall Mean F-beta Score: 0.2905

Training Loss: 0.5242, Validation Loss: 0.8441, Validation F-beta: 0.2905
Epoch 27/4000


Training: 100%|██████████| 1440/1440 [29:17<00:00,  1.22s/it, loss=0.355]
Validation: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it, loss=0.944]


Validation Dice Score
Class 0: 0.9921, Class 1: 0.0662, Class 2: 0.0000, Class 3: 0.0117, Class 4: 0.2569, Class 5: 0.0875, Class 6: 0.4397, 
Validation F-beta Score
Class 0: 0.9970, Class 1: 0.5529, Class 2: 0.5000, Class 3: 0.0915, Class 4: 0.5804, Class 5: 0.0662, Class 6: 0.6783, 

Overall Mean Dice Score: 0.1724
Overall Mean F-beta Score: 0.3939

Training Loss: 0.5306, Validation Loss: 0.6822, Validation F-beta: 0.3939
Epoch 28/4000


Training: 100%|██████████| 1440/1440 [29:25<00:00,  1.23s/it, loss=0.579]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.13s/it, loss=0.315]


Validation Dice Score
Class 0: 0.9918, Class 1: 0.0752, Class 2: 0.0000, Class 3: 0.0305, Class 4: 0.3159, Class 5: 0.1469, Class 6: 0.5421, 
Validation F-beta Score
Class 0: 0.9958, Class 1: 0.1560, Class 2: 0.8333, Class 3: 0.3518, Class 4: 0.6481, Class 5: 0.1820, Class 6: 0.5964, 

Overall Mean Dice Score: 0.2221
Overall Mean F-beta Score: 0.3869

Training Loss: 0.5078, Validation Loss: 0.6144, Validation F-beta: 0.3869
Epoch 29/4000


Training: 100%|██████████| 1440/1440 [29:44<00:00,  1.24s/it, loss=0.569]
Validation: 100%|██████████| 12/12 [00:13<00:00,  1.12s/it, loss=0.207]


Validation Dice Score
Class 0: 0.9923, Class 1: 0.0511, Class 2: 0.0000, Class 3: 0.0248, Class 4: 0.2726, Class 5: 0.0730, Class 6: 0.3445, 
Validation F-beta Score
Class 0: 0.9984, Class 1: 0.1175, Class 2: 0.5833, Class 3: 0.2649, Class 4: 0.4677, Class 5: 0.1357, Class 6: 0.5637, 

Overall Mean Dice Score: 0.1532
Overall Mean F-beta Score: 0.3099

Training Loss: 0.5084, Validation Loss: 0.6440, Validation F-beta: 0.3099
Epoch 30/4000


Training: 100%|██████████| 1440/1440 [29:22<00:00,  1.22s/it, loss=0.466]
Validation: 100%|██████████| 12/12 [00:11<00:00,  1.00it/s, loss=0.589]


Validation Dice Score
Class 0: 0.9913, Class 1: 0.0370, Class 2: 0.0000, Class 3: 0.0149, Class 4: 0.4226, Class 5: 0.0615, Class 6: 0.1435, 
Validation F-beta Score
Class 0: 0.9982, Class 1: 0.6048, Class 2: 0.5833, Class 3: 0.3416, Class 4: 0.3435, Class 5: 0.0366, Class 6: 0.6409, 

Overall Mean Dice Score: 0.1359
Overall Mean F-beta Score: 0.3935

Training Loss: 0.5251, Validation Loss: 0.7435, Validation F-beta: 0.3935
Epoch 31/4000


Training:  45%|████▌     | 653/1440 [13:45<17:01,  1.30s/it, loss=0.373]

In [12]:
if:

SyntaxError: invalid syntax (879943805.py, line 1)

# VAl

In [None]:
from monai.data import DataLoader, Dataset, CacheDataset
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd
)
from monai.metrics import DiceMetric
from monai.networks.nets import UNETR, SwinUNETR
from monai.losses import TverskyLoss
import torch
import numpy as np
from tqdm import tqdm
import wandb
from src.dataset.dataset import make_val_dataloader

val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
img_depth = 96
img_size = 96  # Match your patch size
n_classes = 7
batch_size = 2 # 13.8GB GPU memory required for 128x128 img size
num_samples = batch_size # 한 이미지에서 뽑을 샘플 수
loader_batch = 1
lamda = 0.52

wandb.init(
    project='czii_SwinUnetR_val',  # 프로젝트 이름 설정
    name='SwinUNETR96_96_lr0.001_lambda0.52_batch2',         # 실행(run) 이름 설정
    config={
        'learning_rate': 0.001,
        'batch_size': batch_size,
        'lambda': lamda,
        'img_size': img_size,
        'device': 'cuda',
        "checkpoint_dir": "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2",
        
    }
)

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
])

val_loader = make_val_dataloader(
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    random_transforms = random_transforms, 
    batch_size = loader_batch,
    num_workers=0
)
criterion = TverskyLoss(
    alpha= 1 - lamda,  # FP에 대한 가중치
    beta=lamda,       # FN에 대한 가중치
    include_background=False,  # 배경 클래스 제외
    softmax=True
)
    
    
from monai.metrics import DiceMetric

img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

val_loss, overall_mean_fbeta_score = validate_one_epoch(
    model=model, 
    val_loader=val_loader, 
    criterion=criterion, 
    device=device, 
    epoch=0, 
    calculate_dice_interval=1
)

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
class_0_dice_score,▁
class_0_f_beta_score,▁
class_1_dice_score,▁
class_1_f_beta_score,▁
class_2_dice_score,▁
class_2_f_beta_score,▁
class_3_dice_score,▁
class_3_f_beta_score,▁
class_4_dice_score,▁
class_4_f_beta_score,▁

0,1
class_0_dice_score,0.65703
class_0_f_beta_score,0.50748
class_1_dice_score,0.53332
class_1_f_beta_score,0.64703
class_2_dice_score,0.00286
class_2_f_beta_score,0.02334
class_3_dice_score,0.23703
class_3_f_beta_score,0.23033
class_4_dice_score,0.65487
class_4_f_beta_score,0.62525


Loading dataset: 100%|██████████| 4/4 [00:06<00:00,  1.58s/it]
  checkpoint = torch.load(pretrain_path, map_location=device)
Validation: 100%|██████████| 4/4 [00:01<00:00,  2.38it/s, loss=0.865]

Validation Dice Score
Class 0: 0.6570, Class 1: 0.5333, Class 2: 0.0029, Class 3: 0.2370, 
Class 4: 0.6549, Class 5: 0.4790, Class 6: 0.4255, 
Validation F-beta Score
Class 0: 0.5075, Class 1: 0.6470, Class 2: 0.0233, Class 3: 0.2303, 
Class 4: 0.6252, Class 5: 0.5145, Class 6: 0.4720, 
Overall Mean Dice Score: 0.4659
Overall Mean F-beta Score: 0.4978






# Inference

In [None]:
from src.dataset.preprocessing import Preprocessor

In [None]:
from monai.inferers import sliding_window_inference
from monai.transforms import Compose, EnsureChannelFirstd, NormalizeIntensityd, Orientationd, GaussianSmoothd
from monai.data import DataLoader, Dataset, CacheDataset
from monai.networks.nets import SwinUNETR
from pathlib import Path
import numpy as np
import copick

import torch
print("Done.")

Done.


In [None]:
config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/test/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
preprocessor = Preprocessor(config_blob,copick_config_path=copick_config_path)
non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
    ])

Config file written to ./kaggle/working/copick.config
file length: 7


In [None]:
img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])


  checkpoint = torch.load(pretrain_path, map_location=device)


<All keys matched successfully>

In [None]:
val_loss = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=1, 
            calculate_dice_interval=0
        )

Validation:   0%|          | 0/4 [00:03<?, ?it/s, loss=0.764]


ZeroDivisionError: integer modulo by zero

In [None]:
import torch
import numpy as np
from scipy.ndimage import label, center_of_mass
import pandas as pd
from tqdm import tqdm
from monai.data import CacheDataset, DataLoader
from monai.transforms import Compose, NormalizeIntensity
import cc3d

def dict_to_df(coord_dict, experiment_name):
    all_coords = []
    all_labels = []
    
    for label, coords in coord_dict.items():
        all_coords.append(coords)
        all_labels.extend([label] * len(coords))
    
    all_coords = np.vstack(all_coords)
    df = pd.DataFrame({
        'experiment': experiment_name,
        'particle_type': all_labels,
        'x': all_coords[:, 0],
        'y': all_coords[:, 1],
        'z': all_coords[:, 2]
    })
    return df

id_to_name = {1: "apo-ferritin", 
              2: "beta-amylase",
              3: "beta-galactosidase", 
              4: "ribosome", 
              5: "thyroglobulin", 
              6: "virus-like-particle"}
BLOB_THRESHOLD = 200
CERTAINTY_THRESHOLD = 0.05

classes = [1, 2, 3, 4, 5, 6]

model.eval()
with torch.no_grad():
    location_dfs = []  # DataFrame 리스트로 초기화
    
    for vol_idx, run in enumerate(preprocessor.root.runs):
        print(f"Processing volume {vol_idx + 1}/{len(preprocessor.root.runs)}")
        tomogram = preprocessor.processing(run=run, task="task")
        task_files = [{"image": tomogram}]
        task_ds = CacheDataset(data=task_files, transform=non_random_transforms)
        task_loader = DataLoader(task_ds, batch_size=1, num_workers=0)
        
        for task_data in task_loader:
            images = task_data['image'].to("cuda")
            outputs = sliding_window_inference(
                inputs=images,
                roi_size=(96, 96, 96),  # ROI 크기
                sw_batch_size=4,
                predictor=model.forward,
                overlap=0.1,
                sw_device="cuda",
                device="cpu",
                buffer_steps=1,
                buffer_dim=-1
            )
            outputs = outputs.argmax(dim=1).squeeze(0).cpu().numpy()  # 클래스 채널 예측
            location = {}  # 좌표 저장용 딕셔너리
            for c in classes:
                cc = cc3d.connected_components(outputs == c)  # cc3d 라벨링
                stats = cc3d.statistics(cc)
                zyx = stats['centroids'][1:] * 10.012444  # 스케일 변환
                zyx_large = zyx[stats['voxel_counts'][1:] > BLOB_THRESHOLD]  # 크기 필터링
                xyz = np.ascontiguousarray(zyx_large[:, ::-1])  # 좌표 스왑 (z, y, x -> x, y, z)

                location[id_to_name[c]] = xyz  # ID 이름 매칭 저장

            # 데이터프레임 변환
            df = dict_to_df(location, run.name)
            location_dfs.append(df)  # 리스트에 추가
        
        # if vol_idx == 2:
        #     break
    
    # DataFrame 병합
    final_df = pd.concat(location_dfs, ignore_index=True)
    
    # ID 추가 및 CSV 저장
    final_df.insert(loc=0, column='id', value=np.arange(len(final_df)))
    final_df.to_csv("submission.csv", index=False)
    print("Submission saved to: submission.csv")


Processing volume 1/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing volume 2/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing volume 3/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Submission saved to: submission.csv
