In [1]:
import os
import shutil
import tempfile

import matplotlib.pyplot as plt
from tqdm import tqdm

import random
import numpy as np
import torch

from src.models import UNet_CBAM

from monai.losses import DiceCELoss
from monai.inferers import sliding_window_inference
from monai.transforms import (
    AsDiscrete,
    EnsureChannelFirstd,
    Compose,
    CropForegroundd,
    LoadImaged,
    Orientationd,
    RandFlipd,
    RandCropByPosNegLabeld,
    RandShiftIntensityd,
    ScaleIntensityRanged,
    Spacingd,
    RandRotate90d,
)

from monai.networks.layers.factories import Act, Norm

from monai.config import print_config
from monai.metrics import DiceMetric
# from src.models.swincspunetr import SwinCSPUNETR
# from src.models.swincspunetr_unet import SwinCSPUNETR_unet
# from src.models.swincspunetr3plus import SwinCSPUNETR3plus

from monai.data import (
    DataLoader,
    CacheDataset,
    load_decathlon_datalist,
    decollate_batch,
)

# 랜덤 시드 고정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


print_config()

  from .autonotebook import tqdm as notebook_tqdm


MONAI version: 1.4.0
Numpy version: 1.26.4
Pytorch version: 2.4.1+cu121
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 46a5272196a6c2590ca2589029eed8e4d56ff008
MONAI __file__: c:\Users\<username>\.conda\envs\UM\Lib\site-packages\monai\__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 5.3.2
scikit-image version: 0.24.0
scipy version: 1.14.1
Pillow version: 10.2.0
Tensorboard version: 2.18.0
gdown version: 5.2.0
TorchVision version: 0.19.1+cu121
tqdm version: 4.66.5
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 6.0.0
pandas version: 2.2.3
einops version: 0.8.0
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: 2.17.2
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.
clearml version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional dependencies, please visit:
    https://docs.

In [2]:
class_info = {
    0: {"name": "background", "weight": 0},  # weight 없음
    1: {"name": "apo-ferritin", "weight": 1000},
    2: {"name": "beta-amylase", "weight": 100}, # 4130
    3: {"name": "beta-galactosidase", "weight": 1500}, #3080
    4: {"name": "ribosome", "weight": 1000},
    5: {"name": "thyroglobulin", "weight": 1500},
    6: {"name": "virus-like-particle", "weight": 1000},
}

# 가중치에 비례한 비율 계산
raw_ratios = {
    k: (v["weight"] if v["weight"] is not None else 0.01)  # 가중치 비례, None일 경우 기본값a
    for k, v in class_info.items()
}
total = sum(raw_ratios.values())
ratios = {k: v / total for k, v in raw_ratios.items()}

# 최종 합계가 1인지 확인
final_total = sum(ratios.values())
print("클래스 비율:", ratios)
print("최종 합계:", final_total)

# 비율을 리스트로 변환
ratios_list = [ratios[k] for k in sorted(ratios.keys())]
print("클래스 비율 리스트:", ratios_list)

클래스 비율: {0: 0.0, 1: 0.16393442622950818, 2: 0.01639344262295082, 3: 0.2459016393442623, 4: 0.16393442622950818, 5: 0.2459016393442623, 6: 0.16393442622950818}
최종 합계: 1.0
클래스 비율 리스트: [0.0, 0.16393442622950818, 0.01639344262295082, 0.2459016393442623, 0.16393442622950818, 0.2459016393442623, 0.16393442622950818]


# 모델 설정

In [None]:
from src.dataset.dataset import create_dataloaders, create_dataloaders_bw
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd, RandCropd,RandCropByPosNegLabeld, RandGaussianSmoothd
)
from monai.transforms import CastToTyped
import numpy as np

train_img_dir = "./datasets/pretrain_exdata/images"
train_label_dir = "./datasets/pretrain_exdata/labels"
val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
# DATA CONFIG
img_size =  96 # Match your patch size
img_depth = 32
n_classes = 7
batch_size = 16 # 13.8GB GPU memory required for 128x128 img size
loader_batch = 1
num_samples = batch_size // loader_batch # 한 이미지에서 뽑을 샘플 수
num_repeat = 4
# MODEL CONFIG
num_epochs = 4000
lamda = 0.52
ce_weight = 0.4
lr = 0.001
feature_size = 48
use_checkpoint = True
use_v2 = True
drop_rate= 0.25
attn_drop_rate = 0.25
num_bottleneck = 2
# CLASS_WEIGHTS
class_weights = None
# class_weights = torch.tensor([0.0001, 1, 0.001, 1.1, 1, 1.1, 1], dtype=torch.float32)  # 클래스별 가중치
# class_weights = torch.tensor([0.9,1,0.9,1.1,1,1.1,1], dtype=torch.float32)  # 클래스별 가중치
# class_weights = torch.tensor([1,1,1,1,1,1,1], dtype=torch.float32)  # 클래스별 가중치
sigma = 2.0
accumulation_steps = 1
# INIT
start_epoch = 0
best_val_loss = float('inf')
best_val_fbeta_score = 0

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    # GaussianSmoothd(
    #     keys=["image"],      # 변환을 적용할 키
    #     sigma=[sigma, sigma, sigma]  # 각 축(x, y, z)의 시그마 값
    #     ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=1),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=2),
    RandGaussianSmoothd(
    keys=["image"],      # 변환을 적용할 키
    sigma_x = (0.0, sigma), # 각 축(x, y, z)의 시그마 값
    sigma_y = (0.0, sigma),
    sigma_z = (0.0, sigma),
    prob=1.0,
    ),
])
val_random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=1),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=2),
    # RandGaussianSmoothd(
    # keys=["image"],      # 변환을 적용할 키
    # sigma_x = (0.0, sigma), # 각 축(x, y, z)의 시그마 값
    # sigma_y = (0.0, sigma),
    # sigma_z = (0.0, sigma),
    # prob=1.0,
    # ),
])


In [None]:
train_loader, val_loader = None, None
train_loader, val_loader = create_dataloaders_bw(
    train_img_dir, 
    train_label_dir, 
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    val_non_random_transforms=non_random_transforms,
    random_transforms = random_transforms, 
    val_random_transforms=val_random_transforms,
    batch_size = loader_batch,
    num_workers=0,
    train_num_repeat=num_repeat,
    val_num_repeat=num_repeat,
    )

Loading dataset: 100%|██████████| 51/51 [00:04<00:00, 10.37it/s]
Loading dataset: 100%|██████████| 1/1 [00:00<00:00,  6.22it/s]


https://monai.io/model-zoo.html

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from monai.losses import TverskyLoss

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DynamicTverskyLoss 클래스 정의
class DynamicTverskyLoss(TverskyLoss):
    def __init__(self, lamda=0.5, **kwargs):
        super().__init__(alpha=1 - lamda, beta=lamda, **kwargs)
        self.lamda = lamda

    def set_lamda(self, lamda):
        self.lamda = lamda
        self.alpha = 1 - lamda
        self.beta = lamda


# CombinedCETverskyLoss 클래스
class CombinedCETverskyLoss_cl_weight(nn.Module):
    
    
    def __init__(self, lamda=0.5, ce_weight=0.5, n_classes=7, class_weights=None, ignore_index=-1, **kwargs):
        super().__init__()
        self.n_classes = n_classes
        self.ce_weight = ce_weight
        self.ignore_index = ignore_index
        
        # CrossEntropyLoss에서 클래스별 가중치를 적용
        self.ce = nn.CrossEntropyLoss(weight=class_weights, ignore_index=self.ignore_index, reduction='mean', **kwargs)
        
        # TverskyLoss
        self.tversky = DynamicTverskyLoss(lamda=lamda, reduction="none",softmax=True, **kwargs)

    def forward(self, inputs, targets):
        
        # CrossEntropyLoss는 정수형 클래스 인덱스를 사용
        ce_loss = self.ce(inputs, targets)

        # TverskyLoss 계산 (원핫 인코딩된 라벨을 사용)
        
        tversky_loss = self.tversky(inputs, targets)

        # 클래스별 가중치 적용 (Tversky 손실에도 가중치를 곱하기)
        class_weights = torch.tensor(self.ce.weight)  # CrossEntropy의 weight를 사용

        # Tversky 손실이 (B, num_classes) 형태이므로, 가중치를 클래스 차원에 곱합니다.
        tversky_loss = tversky_loss * class_weights.view(1, self.n_classes)

        # 최종 손실 계산
        final_loss = self.ce_weight * ce_loss + (1 - self.ce_weight) * tversky_loss.mean()  # mean()으로 배치에 대해 평균
        return final_loss

    def set_lamda(self, lamda):
        self.tversky.set_lamda(lamda)

    @property
    def lamda(self):
        return self.tversky.lamda

# CombinedCETverskyLoss 클래스
class CombinedCETverskyLoss(nn.Module):
    
    def __init__(self, lamda=0.5, ce_weight=0.5, n_classes=7, class_weights=None, ignore_index=-1, **kwargs):
        super().__init__()
        self.n_classes = n_classes
        self.ce_weight = ce_weight
        self.ignore_index = ignore_index
        
        # CrossEntropyLoss에서 클래스별 가중치를 적용
        self.ce = nn.CrossEntropyLoss(ignore_index=self.ignore_index, reduction='mean', **kwargs)
        
        # TverskyLoss
        self.tversky = DynamicTverskyLoss(lamda=lamda, reduction="mean",softmax=True, **kwargs)

    def forward(self, inputs, targets):
        
        # CrossEntropyLoss는 정수형 클래스 인덱스를 사용
        ce_loss = self.ce(inputs, targets)

        # TverskyLoss 계산 (원핫 인코딩된 라벨을 사용)
        
        tversky_loss = self.tversky(inputs, targets)

        # 최종 손실 계산
        final_loss = self.ce_weight * ce_loss + (1 - self.ce_weight) * tversky_loss  # mean()으로 배치에 대해 평균
        
        return final_loss

    def set_lamda(self, lamda):
        self.tversky.set_lamda(lamda)

    @property
    def lamda(self):
        return self.tversky.lamda

criterion = CombinedCETverskyLoss(
    lamda=lamda,
    ce_weight=ce_weight,
    n_classes=n_classes,
    class_weights=class_weights,
).to(device)

In [6]:
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import torch
from pathlib import Path
from monai.networks.nets import UNet
from src.models import DP_UNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=n_classes,
    channels=(32, 64, 128, 256),
    strides=(2, 2, 2),
    dropout = drop_rate,
    norm = Norm.INSTANCE,
    act = Act.PRELU,
).to(device)

pretrain_str = "yes" if use_checkpoint else "no"
weight_str = "weighted" if class_weights is not None else ""

# 체크포인트 디렉토리 및 파일 설정
checkpoint_base_dir = Path("./model_checkpoints")
folder_name = f"UNET_randGaus_511_241_noclswt_prelu_instance_f{feature_size}_lr{lr:.0e}_a{lamda:.2f}_b{batch_size}_r{num_repeat}_ce{ce_weight}_ac{accumulation_steps}"
checkpoint_dir = checkpoint_base_dir / folder_name
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
# 체크포인트 디렉토리 생성
checkpoint_dir.mkdir(parents=True, exist_ok=True)

if checkpoint_dir.exists():
    best_model_path = checkpoint_dir / 'best_model_pretrained.pt'
    if best_model_path.exists():
        print(f"기존 best model 발견: {best_model_path}")
        try:
            checkpoint = torch.load(best_model_path, map_location=device)
            # 체크포인트 내부 키 검증
            required_keys = ['model_state_dict', 'optimizer_state_dict', 'epoch', 'best_val_loss', 'best_val_fbeta_score']
            if all(k in checkpoint for k in required_keys):
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                start_epoch = checkpoint['epoch']
                best_val_loss = checkpoint['best_val_loss']
                best_val_fbeta_score = checkpoint['best_val_fbeta_score']
                print("기존 학습된 가중치를 성공적으로 로드했습니다.")
                checkpoint= None
            else:
                raise ValueError("체크포인트 파일에 필요한 key가 없습니다.")
        except Exception as e:
            print(f"체크포인트 파일을 로드하는 중 오류 발생: {e}")

In [7]:
batch = next(iter(val_loader))
images, labels = batch["image"], batch["label"]
print(images.shape, labels.shape)

torch.Size([16, 1, 96, 96, 96]) torch.Size([16, 1, 96, 96, 96])


In [8]:
torch.backends.cudnn.benchmark = True

In [9]:
import wandb
from datetime import datetime

current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
run_name = folder_name

# wandb 초기화
wandb.init(
    project='czii_UNet',  # 프로젝트 이름 설정
    name=run_name,         # 실행(run) 이름 설정
    config={
        'num_epochs': num_epochs,
        'learning_rate': lr,
        'batch_size': batch_size,
        'lambda': lamda,
        "cross_entropy_weight": ce_weight,
        'feature_size': feature_size,
        'img_size': img_size,
        'sampling_ratio': ratios_list,
        'device': device.type,
        "checkpoint_dir": str(checkpoint_dir),
        "class_weights": class_weights.tolist() if class_weights is not None else None,
        # "use_checkpoint": use_checkpoint,
        "drop_rate": drop_rate,
        # "attn_drop_rate": attn_drop_rate,
        # "use_v2": use_v2,
        "accumulation_steps": accumulation_steps,
        "num_repeat": num_repeat,
        # "num_bottleneck": num_bottleneck,
        
        # 필요한 하이퍼파라미터 추가
    }
)
# 모델을 wandb에 연결
wandb.watch(model, log='all')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mpook0612[0m ([33mlimbw[0m). Use [1m`wandb login --relogin`[0m to force relogin


# 학습

In [10]:
from monai.metrics import DiceMetric
    
def processing(batch_data, model, criterion, device):
    images = batch_data['image'].to(device)  # Input 이미지 (B, 1, 96, 96, 96)
    labels = batch_data['label'].to(device)  # 라벨 (B, 96, 96, 96)

    labels = labels.squeeze(1)  # (B, 1, 96, 96, 96) → (B, 96, 96, 96)
    labels = labels.long()  # 라벨을 정수형으로 변환

    # 원핫 인코딩 (B, H, W, D) → (B, num_classes, H, W, D)
    
    labels_onehot = torch.nn.functional.one_hot(labels, num_classes=n_classes)
    labels_onehot = labels_onehot.permute(0, 4, 1, 2, 3).float()  # (B, num_classes, H, W, D)

    # 모델 예측
    outputs = model(images)  # outputs: (B, num_classes, H, W, D)

    # Loss 계산
    loss = criterion(outputs, labels_onehot)
    # loss = loss_fn(criterion(outputs, labels_onehot),class_weights=class_weights, device=device)
    return loss, outputs, labels, outputs.argmax(dim=1)

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, accumulation_steps=4):
    model.train()
    epoch_loss = 0
    optimizer.zero_grad()  # 그래디언트 초기화
    with tqdm(train_loader, desc='Training') as pbar:
        for i, batch_data in enumerate(pbar):
            # 손실 계산
            loss, _, _, _ = processing(batch_data, model, criterion, device)

            # 그래디언트를 계산하고 누적
            loss = loss / accumulation_steps  # 그래디언트 누적을 위한 스케일링
            loss.backward()  # 그래디언트 계산 및 누적
            
            # 그래디언트 업데이트 (accumulation_steps마다 한 번)
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                optimizer.step()  # 파라미터 업데이트
                optimizer.zero_grad()  # 누적된 그래디언트 초기화
            
            # 손실값 누적 (스케일링 복구)
            epoch_loss += loss.item() * accumulation_steps  # 실제 손실값 반영
            pbar.set_postfix(loss=loss.item() * accumulation_steps)  # 실제 손실값 출력
    avg_loss = epoch_loss / len(train_loader)
    wandb.log({'train_epoch_loss': avg_loss, 'epoch': epoch + 1})
    return avg_loss


def validate_one_epoch(model, val_loader, criterion, device, epoch, calculate_dice_interval):
    model.eval()
    val_loss = 0
    
    class_dice_scores = {i: [] for i in range(n_classes)}
    class_f_beta_scores = {i: [] for i in range(n_classes)}
    with torch.no_grad():
        with tqdm(val_loader, desc='Validation') as pbar:
            for batch_data in pbar:
                loss, _, labels, preds = processing(batch_data, model, criterion, device)
                val_loss += loss.item()
                pbar.set_postfix(loss=loss.item())

                # 각 클래스별 Dice 점수 계산
                if epoch % calculate_dice_interval == 0:
                    for i in range(n_classes):
                        pred_i = (preds == i)
                        label_i = (labels == i)
                        dice_score = (2.0 * torch.sum(pred_i & label_i)) / (torch.sum(pred_i) + torch.sum(label_i) + 1e-8)
                        class_dice_scores[i].append(dice_score.item())
                        precision = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(pred_i) + 1e-8)
                        recall = (torch.sum(pred_i & label_i) + 1e-8) / (torch.sum(label_i) + 1e-8)
                        f_beta_score = (1 + 4**2) * (precision * recall) / (4**2 * precision + recall + 1e-8)
                        class_f_beta_scores[i].append(f_beta_score.item())

    avg_loss = val_loss / len(val_loader)
    # 에포크별 평균 손실 로깅
    wandb.log({'val_epoch_loss': avg_loss, 'epoch': epoch + 1})
    
    # 각 클래스별 평균 Dice 점수 출력
    if epoch % calculate_dice_interval == 0:
        print("Validation Dice Score")
        all_classes_dice_scores = []
        for i in range(n_classes):
            mean_dice = np.mean(class_dice_scores[i])
            wandb.log({f'class_{i}_dice_score': mean_dice, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_dice:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_dice_scores.append(mean_dice)
            
        print()
    if epoch % calculate_dice_interval == 0:
        print("Validation F-beta Score")
        all_classes_fbeta_scores = []
        for i in range(n_classes):
            mean_fbeta = np.mean(class_f_beta_scores[i])
            wandb.log({f'class_{i}_f_beta_score': mean_fbeta, 'epoch': epoch + 1})
            print(f"Class {i}: {mean_fbeta:.4f}", end=", ")
            if i not in [0, 2]:  # 평균에 포함할 클래스만 추가
                all_classes_fbeta_scores.append(mean_fbeta)
                
        print()
        overall_mean_dice = np.mean(all_classes_dice_scores)
        overall_mean_fbeta = np.mean(all_classes_fbeta_scores)
        wandb.log({'overall_mean_f_beta_score': overall_mean_fbeta, 'overall_mean_dice_score': overall_mean_dice, 'epoch': epoch + 1})
        print(f"\nOverall Mean Dice Score: {overall_mean_dice:.4f}\nOverall Mean F-beta Score: {overall_mean_fbeta:.4f}\n")

    if overall_mean_fbeta is None:
        overall_mean_fbeta = 0

    return val_loss / len(val_loader), overall_mean_fbeta

def train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, 
    device, start_epoch, best_val_loss, best_val_fbeta_score, calculate_dice_interval=1,
    accumulation_steps=4, pretrained=False
):
    """
    모델을 학습하고 검증하는 함수
    Args:
        model: 학습할 모델
        train_loader: 학습 데이터 로더
        val_loader: 검증 데이터 로더
        criterion: 손실 함수
        optimizer: 최적화 알고리즘
        num_epochs: 총 학습 epoch 수
        patience: early stopping 기준
        device: GPU/CPU 장치
        start_epoch: 시작 epoch
        best_val_loss: 이전 최적 validation loss
        best_val_fbeta_score: 이전 최적 validation f-beta score
        calculate_dice_interval: Dice 점수 계산 주기
    """
    epochs_no_improve = 0

    for epoch in range(start_epoch, num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")

        # Train One Epoch
        train_loss = train_one_epoch(
            model=model, 
            train_loader=train_loader, 
            criterion=criterion, 
            optimizer=optimizer, 
            device=device,
            epoch=epoch,
            accumulation_steps= accumulation_steps
        )
        
        scheduler.step(train_loss)
        # Validate One Epoch
        val_loss, overall_mean_fbeta_score = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=epoch, 
            calculate_dice_interval=calculate_dice_interval
        )

        
        print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation F-beta: {overall_mean_fbeta_score:.4f}")

        if val_loss < best_val_loss and overall_mean_fbeta_score > best_val_fbeta_score:
            best_val_loss = val_loss
            best_val_fbeta_score = overall_mean_fbeta_score
            epochs_no_improve = 0
            if pretrained:
                checkpoint_path = os.path.join(checkpoint_dir, 'best_model_pretrained.pt')
            else:
                checkpoint_path = os.path.join(checkpoint_dir, 'best_model.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            print(f"========================================================")
            print(f"SUPER Best model saved. Loss:{best_val_loss:.4f}, Score:{best_val_fbeta_score:.4f}")
            print(f"========================================================")

        # Early stopping 조건 체크
        if val_loss >= best_val_loss and overall_mean_fbeta_score <= best_val_fbeta_score:
            epochs_no_improve += 1
        else:
            epochs_no_improve = 0

        if epochs_no_improve >= patience:
            print("Early stopping")
            checkpoint_path = os.path.join(checkpoint_dir, 'last.pt')
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
                'best_val_fbeta_score': best_val_fbeta_score
            }, checkpoint_path)
            break
        # if epochs_no_improve % 6 == 0 & epochs_no_improve != 0:
        #     # 손실이 개선되지 않았으므로 lambda 감소
        #     new_lamda = max(criterion.lamda - 0.01, 0.35)  # 최소값은 0.1로 설정
        #     criterion.set_lamda(new_lamda)
        #     print(f"Validation loss did not improve. Reducing lambda to {new_lamda:.4f}")

    wandb.finish()


In [None]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    patience=10,
    device=device,
    start_epoch=start_epoch,
    best_val_loss=best_val_loss,
    best_val_fbeta_score=best_val_fbeta_score,
    calculate_dice_interval=1,
    accumulation_steps = accumulation_steps,
    pretrained=True,
     ) 

Epoch 1/4000


Training:   8%|▊         | 16/204 [00:32<03:05,  1.01it/s, loss=0.899]

In [None]:
train_img_dir = './datasets/public_data/images'
train_label_dir = './datasets/public_data/labels'

train_loader, val_loader = None, None
train_loader, val_loader = create_dataloaders_bw(
    train_img_dir, 
    train_label_dir, 
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    val_non_random_transforms=non_random_transforms,
    random_transforms = random_transforms, 
    val_random_transforms=val_random_transforms,
    batch_size = loader_batch,
    num_workers=0,
    train_num_repeat=num_repeat
    )

batch = next(iter(val_loader))
images, labels = batch["image"], batch["label"]
print(images.shape, labels.shape)

Loading dataset: 100%|██████████| 24/24 [00:02<00:00, 11.52it/s]
Loading dataset: 100%|██████████| 1/1 [00:00<00:00,  9.66it/s]


torch.Size([16, 1, 96, 96, 96]) torch.Size([16, 1, 96, 96, 96])


In [None]:
torch.backends.cudnn.benchmark = True

In [None]:

if checkpoint_dir.exists():
    best_model_path = checkpoint_dir / 'best_model_pretrained.pt'
    if best_model_path.exists():
        print(f"기존 best model 발견: {best_model_path}")
        try:
            checkpoint = torch.load(best_model_path, map_location=device)
            # 체크포인트 내부 키 검증
            required_keys = ['model_state_dict', 'optimizer_state_dict', 'epoch', 'best_val_loss', 'best_val_fbeta_score']
            if all(k in checkpoint for k in required_keys):
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                start_epoch = checkpoint['epoch']
                best_val_loss = checkpoint['best_val_loss']
                best_val_fbeta_score = checkpoint['best_val_fbeta_score']
                print("기존 학습된 가중치를 성공적으로 로드했습니다.")
                checkpoint= None
            else:
                raise ValueError("체크포인트 파일에 필요한 key가 없습니다.")
        except Exception as e:
            print(f"체크포인트 파일을 로드하는 중 오류 발생: {e}")
            
lr = lr/2
            
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)


기존 best model 발견: model_checkpoints\UNET_randGaus_511_241_noclswt_f48_d96s96_numb2_lr1e-03_a0.52_b0.48_b16_r4_ce0.4_ac1\best_model_pretrained.pt
기존 학습된 가중치를 성공적으로 로드했습니다.


  checkpoint = torch.load(best_model_path, map_location=device)


In [None]:
import wandb
from datetime import datetime

current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
run_name = folder_name

# wandb 초기화
wandb.init(
    project='czii_UNet',  # 프로젝트 이름 설정
    name=run_name,         # 실행(run) 이름 설정
    config={
        'num_epochs': num_epochs,
        'learning_rate': lr,
        'batch_size': batch_size,
        'lambda': lamda,
        "cross_entropy_weight": ce_weight,
        'feature_size': feature_size,
        'img_size': img_size,
        'sampling_ratio': ratios_list,
        'device': device.type,
        "checkpoint_dir": str(checkpoint_dir),
        "class_weights": class_weights.tolist() if class_weights is not None else None,
        # "use_checkpoint": use_checkpoint,
        "drop_rate": drop_rate,
        # "attn_drop_rate": attn_drop_rate,
        # "use_v2": use_v2,
        "accumulation_steps": accumulation_steps,
        "num_repeat": num_repeat,
        # "num_bottleneck": num_bottleneck,
        
        # 필요한 하이퍼파라미터 추가
    }
)
# 모델을 wandb에 연결
wandb.watch(model, log='all')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mpook0612[0m ([33mlimbw[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    patience=10,
    device=device,
    start_epoch=start_epoch,
    best_val_loss=best_val_loss,
    best_val_fbeta_score=best_val_fbeta_score,
    calculate_dice_interval=1,
    accumulation_steps = accumulation_steps,
    pretrained=False,
    ) 

Epoch 33/4000


  class_weights = torch.tensor(self.ce.weight)  # CrossEntropy의 weight를 사용
Training: 100%|██████████| 96/96 [01:54<00:00,  1.19s/it, loss=0.395]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s, loss=0.383]


Validation Dice Score
Class 0: 0.9857, Class 1: 0.7139, Class 2: 0.1690, Class 3: 0.4226, Class 4: 0.6580, Class 5: 0.4272, Class 6: 0.8507, 
Validation F-beta Score
Class 0: 0.9783, Class 1: 0.8727, Class 2: 0.2351, Class 3: 0.4607, Class 4: 0.8420, Class 5: 0.5677, Class 6: 0.9644, 

Overall Mean Dice Score: 0.6145
Overall Mean F-beta Score: 0.7415

Training Loss: 0.4120, Validation Loss: 0.3826, Validation F-beta: 0.7415
Epoch 34/4000


Training: 100%|██████████| 96/96 [01:37<00:00,  1.02s/it, loss=0.426]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.52it/s, loss=0.361]


Validation Dice Score
Class 0: 0.9904, Class 1: 0.7648, Class 2: 0.3382, Class 3: 0.3725, Class 4: 0.7841, Class 5: 0.4612, Class 6: 0.8681, 
Validation F-beta Score
Class 0: 0.9864, Class 1: 0.8621, Class 2: 0.3455, Class 3: 0.6053, Class 4: 0.8890, Class 5: 0.5687, Class 6: 0.9356, 

Overall Mean Dice Score: 0.6501
Overall Mean F-beta Score: 0.7722

Training Loss: 0.4112, Validation Loss: 0.3614, Validation F-beta: 0.7722
Epoch 35/4000


Training: 100%|██████████| 96/96 [01:32<00:00,  1.04it/s, loss=0.418]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.61it/s, loss=0.398]


Validation Dice Score
Class 0: 0.9891, Class 1: 0.7562, Class 2: 0.3958, Class 3: 0.2927, Class 4: 0.6327, Class 5: 0.4559, Class 6: 0.8529, 
Validation F-beta Score
Class 0: 0.9842, Class 1: 0.8637, Class 2: 0.4967, Class 3: 0.4051, Class 4: 0.7404, Class 5: 0.6271, Class 6: 0.9198, 

Overall Mean Dice Score: 0.5981
Overall Mean F-beta Score: 0.7112

Training Loss: 0.4077, Validation Loss: 0.3983, Validation F-beta: 0.7112
Epoch 36/4000


Training: 100%|██████████| 96/96 [01:34<00:00,  1.02it/s, loss=0.414]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s, loss=0.401]


Validation Dice Score
Class 0: 0.9873, Class 1: 0.6913, Class 2: 0.2903, Class 3: 0.4181, Class 4: 0.7145, Class 5: 0.4673, Class 6: 0.8062, 
Validation F-beta Score
Class 0: 0.9818, Class 1: 0.7887, Class 2: 0.4280, Class 3: 0.5968, Class 4: 0.8364, Class 5: 0.5448, Class 6: 0.9685, 

Overall Mean Dice Score: 0.6195
Overall Mean F-beta Score: 0.7470

Training Loss: 0.4078, Validation Loss: 0.4014, Validation F-beta: 0.7470
Epoch 37/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.01s/it, loss=0.405]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s, loss=0.418]


Validation Dice Score
Class 0: 0.9871, Class 1: 0.6945, Class 2: 0.1672, Class 3: 0.3453, Class 4: 0.7238, Class 5: 0.4055, Class 6: 0.8320, 
Validation F-beta Score
Class 0: 0.9810, Class 1: 0.8157, Class 2: 0.2271, Class 3: 0.5649, Class 4: 0.8320, Class 5: 0.5494, Class 6: 0.8752, 

Overall Mean Dice Score: 0.6002
Overall Mean F-beta Score: 0.7274

Training Loss: 0.4057, Validation Loss: 0.4176, Validation F-beta: 0.7274
Epoch 38/4000


Training: 100%|██████████| 96/96 [01:34<00:00,  1.02it/s, loss=0.394]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.58it/s, loss=0.373]


Validation Dice Score
Class 0: 0.9891, Class 1: 0.7572, Class 2: 0.3091, Class 3: 0.3170, Class 4: 0.6832, Class 5: 0.5402, Class 6: 0.8935, 
Validation F-beta Score
Class 0: 0.9862, Class 1: 0.8995, Class 2: 0.3444, Class 3: 0.4880, Class 4: 0.7546, Class 5: 0.5563, Class 6: 0.9786, 

Overall Mean Dice Score: 0.6382
Overall Mean F-beta Score: 0.7354

Training Loss: 0.4083, Validation Loss: 0.3727, Validation F-beta: 0.7354
Epoch 39/4000


Training: 100%|██████████| 96/96 [01:37<00:00,  1.02s/it, loss=0.401]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s, loss=0.383]


Validation Dice Score
Class 0: 0.9867, Class 1: 0.8091, Class 2: 0.3766, Class 3: 0.3582, Class 4: 0.6746, Class 5: 0.4507, Class 6: 0.9039, 
Validation F-beta Score
Class 0: 0.9811, Class 1: 0.8677, Class 2: 0.3471, Class 3: 0.4447, Class 4: 0.8370, Class 5: 0.5283, Class 6: 0.9783, 

Overall Mean Dice Score: 0.6393
Overall Mean F-beta Score: 0.7312

Training Loss: 0.4073, Validation Loss: 0.3825, Validation F-beta: 0.7312
Epoch 40/4000


Training: 100%|██████████| 96/96 [01:32<00:00,  1.04it/s, loss=0.425]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.58it/s, loss=0.378]


Validation Dice Score
Class 0: 0.9893, Class 1: 0.8123, Class 2: 0.2731, Class 3: 0.3342, Class 4: 0.7342, Class 5: 0.4350, Class 6: 0.9146, 
Validation F-beta Score
Class 0: 0.9842, Class 1: 0.8747, Class 2: 0.3927, Class 3: 0.3505, Class 4: 0.8245, Class 5: 0.6353, Class 6: 0.9515, 

Overall Mean Dice Score: 0.6461
Overall Mean F-beta Score: 0.7273

Training Loss: 0.4051, Validation Loss: 0.3777, Validation F-beta: 0.7273
Epoch 41/4000


Training: 100%|██████████| 96/96 [01:37<00:00,  1.01s/it, loss=0.435]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s, loss=0.364]


Validation Dice Score
Class 0: 0.9913, Class 1: 0.7506, Class 2: 0.2854, Class 3: 0.4010, Class 4: 0.7784, Class 5: 0.4440, Class 6: 0.9039, 
Validation F-beta Score
Class 0: 0.9897, Class 1: 0.8773, Class 2: 0.3367, Class 3: 0.5358, Class 4: 0.7626, Class 5: 0.5207, Class 6: 0.9301, 

Overall Mean Dice Score: 0.6556
Overall Mean F-beta Score: 0.7253

Training Loss: 0.4048, Validation Loss: 0.3636, Validation F-beta: 0.7253
Epoch 42/4000


Training: 100%|██████████| 96/96 [01:40<00:00,  1.04s/it, loss=0.396]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.48it/s, loss=0.374]


Validation Dice Score
Class 0: 0.9905, Class 1: 0.7529, Class 2: 0.2421, Class 3: 0.4988, Class 4: 0.7629, Class 5: 0.4515, Class 6: 0.9051, 
Validation F-beta Score
Class 0: 0.9866, Class 1: 0.9054, Class 2: 0.3614, Class 3: 0.6432, Class 4: 0.8053, Class 5: 0.6065, Class 6: 0.9823, 

Overall Mean Dice Score: 0.6743
Overall Mean F-beta Score: 0.7885

Training Loss: 0.4053, Validation Loss: 0.3739, Validation F-beta: 0.7885
Epoch 43/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.01s/it, loss=0.434]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s, loss=0.415]


Validation Dice Score
Class 0: 0.9868, Class 1: 0.7617, Class 2: 0.2082, Class 3: 0.2798, Class 4: 0.7050, Class 5: 0.4248, Class 6: 0.7222, 
Validation F-beta Score
Class 0: 0.9804, Class 1: 0.8408, Class 2: 0.2665, Class 3: 0.4767, Class 4: 0.8173, Class 5: 0.5870, Class 6: 0.9503, 

Overall Mean Dice Score: 0.5787
Overall Mean F-beta Score: 0.7344

Training Loss: 0.4025, Validation Loss: 0.4145, Validation F-beta: 0.7344
Epoch 44/4000


Training: 100%|██████████| 96/96 [01:38<00:00,  1.03s/it, loss=0.402]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.53it/s, loss=0.387]


Validation Dice Score
Class 0: 0.9863, Class 1: 0.8423, Class 2: 0.1528, Class 3: 0.3626, Class 4: 0.7292, Class 5: 0.4420, Class 6: 0.8825, 
Validation F-beta Score
Class 0: 0.9820, Class 1: 0.8514, Class 2: 0.1948, Class 3: 0.5163, Class 4: 0.7811, Class 5: 0.5552, Class 6: 0.9520, 

Overall Mean Dice Score: 0.6517
Overall Mean F-beta Score: 0.7312

Training Loss: 0.4058, Validation Loss: 0.3870, Validation F-beta: 0.7312
Epoch 45/4000


Training: 100%|██████████| 96/96 [01:34<00:00,  1.01it/s, loss=0.396]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.56it/s, loss=0.383]


Validation Dice Score
Class 0: 0.9882, Class 1: 0.7096, Class 2: 0.0659, Class 3: 0.2038, Class 4: 0.7102, Class 5: 0.4589, Class 6: 0.9020, 
Validation F-beta Score
Class 0: 0.9855, Class 1: 0.8325, Class 2: 0.0619, Class 3: 0.2529, Class 4: 0.7364, Class 5: 0.5412, Class 6: 0.9623, 

Overall Mean Dice Score: 0.5969
Overall Mean F-beta Score: 0.6651

Training Loss: 0.4042, Validation Loss: 0.3835, Validation F-beta: 0.6651
Epoch 46/4000


Training: 100%|██████████| 96/96 [01:32<00:00,  1.04it/s, loss=0.378]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s, loss=0.385]


Validation Dice Score
Class 0: 0.9891, Class 1: 0.6950, Class 2: 0.2638, Class 3: 0.2935, Class 4: 0.7378, Class 5: 0.3807, Class 6: 0.8212, 
Validation F-beta Score
Class 0: 0.9860, Class 1: 0.8729, Class 2: 0.3001, Class 3: 0.4036, Class 4: 0.7749, Class 5: 0.4660, Class 6: 0.9500, 

Overall Mean Dice Score: 0.5856
Overall Mean F-beta Score: 0.6935

Training Loss: 0.4048, Validation Loss: 0.3853, Validation F-beta: 0.6935
Epoch 47/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.00s/it, loss=0.399]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s, loss=0.385]


Validation Dice Score
Class 0: 0.9888, Class 1: 0.7048, Class 2: 0.2508, Class 3: 0.3333, Class 4: 0.6225, Class 5: 0.4549, Class 6: 0.8385, 
Validation F-beta Score
Class 0: 0.9842, Class 1: 0.8855, Class 2: 0.3075, Class 3: 0.4471, Class 4: 0.7439, Class 5: 0.5595, Class 6: 0.9628, 

Overall Mean Dice Score: 0.5908
Overall Mean F-beta Score: 0.7198

Training Loss: 0.4027, Validation Loss: 0.3846, Validation F-beta: 0.7198
Epoch 48/4000


Training: 100%|██████████| 96/96 [01:35<00:00,  1.00it/s, loss=0.406]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.56it/s, loss=0.4]


Validation Dice Score
Class 0: 0.9893, Class 1: 0.6620, Class 2: 0.1970, Class 3: 0.3344, Class 4: 0.7265, Class 5: 0.4415, Class 6: 0.8931, 
Validation F-beta Score
Class 0: 0.9839, Class 1: 0.8978, Class 2: 0.3642, Class 3: 0.6021, Class 4: 0.8390, Class 5: 0.5727, Class 6: 0.9588, 

Overall Mean Dice Score: 0.6115
Overall Mean F-beta Score: 0.7741

Training Loss: 0.4043, Validation Loss: 0.4001, Validation F-beta: 0.7741
Epoch 49/4000


Training: 100%|██████████| 96/96 [01:35<00:00,  1.01it/s, loss=0.41] 
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s, loss=0.377]


Validation Dice Score
Class 0: 0.9885, Class 1: 0.7761, Class 2: 0.3023, Class 3: 0.4037, Class 4: 0.6422, Class 5: 0.5331, Class 6: 0.8986, 
Validation F-beta Score
Class 0: 0.9833, Class 1: 0.8835, Class 2: 0.4381, Class 3: 0.5402, Class 4: 0.7844, Class 5: 0.6391, Class 6: 0.9613, 

Overall Mean Dice Score: 0.6508
Overall Mean F-beta Score: 0.7617

Training Loss: 0.4032, Validation Loss: 0.3772, Validation F-beta: 0.7617
Epoch 50/4000


Training: 100%|██████████| 96/96 [01:31<00:00,  1.05it/s, loss=0.422]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s, loss=0.385]


Validation Dice Score
Class 0: 0.9888, Class 1: 0.8455, Class 2: 0.0000, Class 3: 0.2499, Class 4: 0.7289, Class 5: 0.3679, Class 6: 0.9254, 
Validation F-beta Score
Class 0: 0.9842, Class 1: 0.8598, Class 2: 0.0000, Class 3: 0.4717, Class 4: 0.7956, Class 5: 0.5073, Class 6: 0.9579, 

Overall Mean Dice Score: 0.6235
Overall Mean F-beta Score: 0.7185

Training Loss: 0.4017, Validation Loss: 0.3848, Validation F-beta: 0.7185
Epoch 51/4000


Training: 100%|██████████| 96/96 [01:35<00:00,  1.01it/s, loss=0.404]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s, loss=0.397]


Validation Dice Score
Class 0: 0.9918, Class 1: 0.7763, Class 2: 0.2794, Class 3: 0.4091, Class 4: 0.7446, Class 5: 0.3753, Class 6: 0.9112, 
Validation F-beta Score
Class 0: 0.9885, Class 1: 0.8458, Class 2: 0.3009, Class 3: 0.5439, Class 4: 0.7968, Class 5: 0.5661, Class 6: 0.9671, 

Overall Mean Dice Score: 0.6433
Overall Mean F-beta Score: 0.7439

Training Loss: 0.4020, Validation Loss: 0.3971, Validation F-beta: 0.7439
Epoch 52/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.01s/it, loss=0.417]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s, loss=0.38]


Validation Dice Score
Class 0: 0.9894, Class 1: 0.7214, Class 2: 0.1491, Class 3: 0.2269, Class 4: 0.7308, Class 5: 0.5395, Class 6: 0.9212, 
Validation F-beta Score
Class 0: 0.9857, Class 1: 0.8522, Class 2: 0.2283, Class 3: 0.3564, Class 4: 0.7771, Class 5: 0.6399, Class 6: 0.9749, 

Overall Mean Dice Score: 0.6279
Overall Mean F-beta Score: 0.7201

Training Loss: 0.4023, Validation Loss: 0.3803, Validation F-beta: 0.7201
Epoch 53/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.00s/it, loss=0.405]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.56it/s, loss=0.37]


Validation Dice Score
Class 0: 0.9894, Class 1: 0.7022, Class 2: 0.2367, Class 3: 0.3929, Class 4: 0.6822, Class 5: 0.4985, Class 6: 0.8711, 
Validation F-beta Score
Class 0: 0.9851, Class 1: 0.8832, Class 2: 0.2998, Class 3: 0.4900, Class 4: 0.8155, Class 5: 0.6045, Class 6: 0.9688, 

Overall Mean Dice Score: 0.6294
Overall Mean F-beta Score: 0.7524

Training Loss: 0.4005, Validation Loss: 0.3702, Validation F-beta: 0.7524
Epoch 54/4000


Training: 100%|██████████| 96/96 [01:32<00:00,  1.03it/s, loss=0.408]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s, loss=0.366]


Validation Dice Score
Class 0: 0.9907, Class 1: 0.8719, Class 2: 0.2630, Class 3: 0.4590, Class 4: 0.7371, Class 5: 0.5135, Class 6: 0.8950, 
Validation F-beta Score
Class 0: 0.9880, Class 1: 0.9234, Class 2: 0.3382, Class 3: 0.4423, Class 4: 0.8032, Class 5: 0.5835, Class 6: 0.9705, 

Overall Mean Dice Score: 0.6953
Overall Mean F-beta Score: 0.7446

Training Loss: 0.4000, Validation Loss: 0.3663, Validation F-beta: 0.7446
Epoch 55/4000


Training: 100%|██████████| 96/96 [01:31<00:00,  1.05it/s, loss=0.394]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s, loss=0.382]


Validation Dice Score
Class 0: 0.9895, Class 1: 0.7497, Class 2: 0.3504, Class 3: 0.4563, Class 4: 0.7537, Class 5: 0.3585, Class 6: 0.9213, 
Validation F-beta Score
Class 0: 0.9866, Class 1: 0.8162, Class 2: 0.4192, Class 3: 0.6268, Class 4: 0.7873, Class 5: 0.4354, Class 6: 0.9709, 

Overall Mean Dice Score: 0.6479
Overall Mean F-beta Score: 0.7273

Training Loss: 0.4002, Validation Loss: 0.3822, Validation F-beta: 0.7273
Epoch 56/4000


Training: 100%|██████████| 96/96 [01:31<00:00,  1.05it/s, loss=0.404]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s, loss=0.357]


Validation Dice Score
Class 0: 0.9881, Class 1: 0.8026, Class 2: 0.3226, Class 3: 0.4632, Class 4: 0.7030, Class 5: 0.4158, Class 6: 0.8740, 
Validation F-beta Score
Class 0: 0.9842, Class 1: 0.8787, Class 2: 0.3405, Class 3: 0.7033, Class 4: 0.7842, Class 5: 0.4682, Class 6: 0.9642, 

Overall Mean Dice Score: 0.6517
Overall Mean F-beta Score: 0.7597

Training Loss: 0.4024, Validation Loss: 0.3567, Validation F-beta: 0.7597
Epoch 57/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.01s/it, loss=0.379]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s, loss=0.343]


Validation Dice Score
Class 0: 0.9886, Class 1: 0.7847, Class 2: 0.2856, Class 3: 0.4451, Class 4: 0.7133, Class 5: 0.4771, Class 6: 0.8455, 
Validation F-beta Score
Class 0: 0.9850, Class 1: 0.8854, Class 2: 0.3368, Class 3: 0.4999, Class 4: 0.7746, Class 5: 0.5798, Class 6: 0.9575, 

Overall Mean Dice Score: 0.6531
Overall Mean F-beta Score: 0.7394

Training Loss: 0.4050, Validation Loss: 0.3433, Validation F-beta: 0.7394
Epoch 58/4000


Training: 100%|██████████| 96/96 [01:36<00:00,  1.00s/it, loss=0.407]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.51it/s, loss=0.394]

Validation Dice Score
Class 0: 0.9895, Class 1: 0.7912, Class 2: 0.2461, Class 3: 0.3238, Class 4: 0.7846, Class 5: 0.4903, Class 6: 0.8826, 
Validation F-beta Score
Class 0: 0.9856, Class 1: 0.8812, Class 2: 0.3583, Class 3: 0.5325, Class 4: 0.8302, Class 5: 0.5960, Class 6: 0.9808, 

Overall Mean Dice Score: 0.6545
Overall Mean F-beta Score: 0.7641

Training Loss: 0.3993, Validation Loss: 0.3940, Validation F-beta: 0.7641
Early stopping





0,1
class_0_dice_score,▁▆▅▃▃▅▂▅▇▇▂▂▄▅▅▅▄▅█▅▅▇▅▄▄▅
class_0_f_beta_score,▁▆▅▃▃▆▃▅█▆▂▃▅▆▅▄▄▅▇▆▅▇▆▅▅▆
class_1_dice_score,▃▄▄▂▂▄▆▆▄▄▄▇▃▂▂▁▅▇▅▃▂█▄▆▅▅
class_1_f_beta_score,▅▅▅▁▂▇▅▅▆▇▄▄▃▅▆▇▆▅▄▄▆█▂▆▆▆
class_2_dice_score,▄▇█▆▄▆█▆▆▅▅▄▂▆▅▄▆▁▆▄▅▆▇▇▆▅
class_2_f_beta_score,▄▆█▇▄▆▆▇▆▆▅▄▂▅▅▆▇▁▅▄▅▆▇▆▆▆
class_3_dice_score,▆▅▃▆▄▄▅▄▆█▃▅▁▃▄▄▆▂▆▂▅▇▇▇▇▄
class_3_f_beta_score,▄▆▃▆▆▅▄▃▅▇▄▅▁▃▄▆▅▄▆▃▅▄▇█▅▅
class_4_dice_score,▃█▁▅▅▄▃▆█▇▅▆▅▆▁▅▂▆▆▆▄▆▇▄▅█
class_4_f_beta_score,▆█▁▆▅▂▆▅▂▄▅▃▁▃▁▆▃▄▄▃▅▄▃▃▃▅

0,1
class_0_dice_score,0.98947
class_0_f_beta_score,0.9856
class_1_dice_score,0.79117
class_1_f_beta_score,0.88115
class_2_dice_score,0.24611
class_2_f_beta_score,0.35833
class_3_dice_score,0.32377
class_3_f_beta_score,0.53247
class_4_dice_score,0.78458
class_4_f_beta_score,0.83022


In [None]:
if:

SyntaxError: invalid syntax (879943805.py, line 1)

# VAl

In [None]:
from monai.data import DataLoader, Dataset, CacheDataset
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, NormalizeIntensityd,
    Orientationd, CropForegroundd, GaussianSmoothd, ScaleIntensityd,
    RandSpatialCropd, RandRotate90d, RandFlipd, RandGaussianNoised,
    ToTensord, RandCropByLabelClassesd
)
from monai.metrics import DiceMetric
from monai.networks.nets import UNETR, SwinUNETR
from monai.losses import TverskyLoss
import torch
import numpy as np
from tqdm import tqdm
import wandb
from src.dataset.dataset import make_val_dataloader

val_img_dir = "./datasets/val/images"
val_label_dir = "./datasets/val/labels"
img_depth = 96
img_size = 96  # Match your patch size
n_classes = 7
batch_size = 2 # 13.8GB GPU memory required for 128x128 img size
num_samples = batch_size # 한 이미지에서 뽑을 샘플 수
loader_batch = 1
lamda = 0.52

wandb.init(
    project='czii_SwinUnetR_val',  # 프로젝트 이름 설정
    name='SwinUNETR96_96_lr0.001_lambda0.52_batch2',         # 실행(run) 이름 설정
    config={
        'learning_rate': 0.001,
        'batch_size': batch_size,
        'lambda': lamda,
        'img_size': img_size,
        'device': 'cuda',
        "checkpoint_dir": "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2",
        
    }
)

non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image", "label"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
])
random_transforms = Compose([
    RandCropByLabelClassesd(
        keys=["image", "label"],
        label_key="label",
        spatial_size=[img_depth, img_size, img_size],
        num_classes=n_classes,
        num_samples=num_samples, 
        ratios=ratios_list,
    ),
    RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2]),
    RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0),
])

val_loader = make_val_dataloader(
    val_img_dir, 
    val_label_dir, 
    non_random_transforms = non_random_transforms, 
    random_transforms = random_transforms, 
    batch_size = loader_batch,
    num_workers=0
)
criterion = TverskyLoss(
    alpha= 1 - lamda,  # FP에 대한 가중치
    beta=lamda,       # FN에 대한 가중치
    include_background=False,  # 배경 클래스 제외
    softmax=True
)
    
    
from monai.metrics import DiceMetric

img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

val_loss, overall_mean_fbeta_score = validate_one_epoch(
    model=model, 
    val_loader=val_loader, 
    criterion=criterion, 
    device=device, 
    epoch=0, 
    calculate_dice_interval=1
)

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
class_0_dice_score,▁
class_0_f_beta_score,▁
class_1_dice_score,▁
class_1_f_beta_score,▁
class_2_dice_score,▁
class_2_f_beta_score,▁
class_3_dice_score,▁
class_3_f_beta_score,▁
class_4_dice_score,▁
class_4_f_beta_score,▁

0,1
class_0_dice_score,0.65703
class_0_f_beta_score,0.50748
class_1_dice_score,0.53332
class_1_f_beta_score,0.64703
class_2_dice_score,0.00286
class_2_f_beta_score,0.02334
class_3_dice_score,0.23703
class_3_f_beta_score,0.23033
class_4_dice_score,0.65487
class_4_f_beta_score,0.62525


Loading dataset: 100%|██████████| 4/4 [00:06<00:00,  1.58s/it]
  checkpoint = torch.load(pretrain_path, map_location=device)
Validation: 100%|██████████| 4/4 [00:01<00:00,  2.38it/s, loss=0.865]

Validation Dice Score
Class 0: 0.6570, Class 1: 0.5333, Class 2: 0.0029, Class 3: 0.2370, 
Class 4: 0.6549, Class 5: 0.4790, Class 6: 0.4255, 
Validation F-beta Score
Class 0: 0.5075, Class 1: 0.6470, Class 2: 0.0233, Class 3: 0.2303, 
Class 4: 0.6252, Class 5: 0.5145, Class 6: 0.4720, 
Overall Mean Dice Score: 0.4659
Overall Mean F-beta Score: 0.4978






: 

: 

# Inference

In [None]:
from src.dataset.preprocessing import Preprocessor

: 

: 

In [None]:
from monai.inferers import sliding_window_inference
from monai.transforms import Compose, EnsureChannelFirstd, NormalizeIntensityd, Orientationd, GaussianSmoothd
from monai.data import DataLoader, Dataset, CacheDataset
from monai.networks.nets import SwinUNETR
from pathlib import Path
import numpy as np
import copick

import torch
print("Done.")

Done.


: 

: 

In [None]:
config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/test/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
preprocessor = Preprocessor(config_blob,copick_config_path=copick_config_path)
non_random_transforms = Compose([
    EnsureChannelFirstd(keys=["image"], channel_dim="no_channel"),
    NormalizeIntensityd(keys="image"),
    Orientationd(keys=["image"], axcodes="RAS"),
    GaussianSmoothd(
        keys=["image"],      # 변환을 적용할 키
        sigma=[1.0, 1.0, 1.0]  # 각 축(x, y, z)의 시그마 값
        ),
    ])

Config file written to ./kaggle/working/copick.config
file length: 7


: 

: 

In [None]:
img_size = 96
img_depth = img_size
n_classes = 7 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_path = "./model_checkpoints/SwinUNETR96_96_lr0.001_lambda0.52_batch2/best_model.pt"
model = SwinUNETR(
    img_size=(img_depth, img_size, img_size),
    in_channels=1,
    out_channels=n_classes,
    feature_size=48,
    use_checkpoint=True,
).to(device)
# Pretrained weights 불러오기
checkpoint = torch.load(pretrain_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])


  checkpoint = torch.load(pretrain_path, map_location=device)


<All keys matched successfully>

: 

: 

In [None]:
val_loss = validate_one_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device, 
            epoch=1, 
            calculate_dice_interval=0
        )

Validation:   0%|          | 0/4 [00:03<?, ?it/s, loss=0.764]


ZeroDivisionError: integer modulo by zero

: 

: 

In [None]:
import torch
import numpy as np
from scipy.ndimage import label, center_of_mass
import pandas as pd
from tqdm import tqdm
from monai.data import CacheDataset, DataLoader
from monai.transforms import Compose, NormalizeIntensity
import cc3d

def dict_to_df(coord_dict, experiment_name):
    all_coords = []
    all_labels = []
    
    for label, coords in coord_dict.items():
        all_coords.append(coords)
        all_labels.extend([label] * len(coords))
    
    all_coords = np.vstack(all_coords)
    df = pd.DataFrame({
        'experiment': experiment_name,
        'particle_type': all_labels,
        'x': all_coords[:, 0],
        'y': all_coords[:, 1],
        'z': all_coords[:, 2]
    })
    return df

id_to_name = {1: "apo-ferritin", 
              2: "beta-amylase",
              3: "beta-galactosidase", 
              4: "ribosome", 
              5: "thyroglobulin", 
              6: "virus-like-particle"}
BLOB_THRESHOLD = 200
CERTAINTY_THRESHOLD = 0.05

classes = [1, 2, 3, 4, 5, 6]

model.eval()
with torch.no_grad():
    location_dfs = []  # DataFrame 리스트로 초기화
    
    for vol_idx, run in enumerate(preprocessor.root.runs):
        print(f"Processing volume {vol_idx + 1}/{len(preprocessor.root.runs)}")
        tomogram = preprocessor.processing(run=run, task="task")
        task_files = [{"image": tomogram}]
        task_ds = CacheDataset(data=task_files, transform=non_random_transforms)
        task_loader = DataLoader(task_ds, batch_size=1, num_workers=0)
        
        for task_data in task_loader:
            images = task_data['image'].to("cuda")
            outputs = sliding_window_inference(
                inputs=images,
                roi_size=(96, 96, 96),  # ROI 크기
                sw_batch_size=4,
                predictor=model.forward,
                overlap=0.1,
                sw_device="cuda",
                device="cpu",
                buffer_steps=1,
                buffer_dim=-1
            )
            outputs = outputs.argmax(dim=1).squeeze(0).cpu().numpy()  # 클래스 채널 예측
            location = {}  # 좌표 저장용 딕셔너리
            for c in classes:
                cc = cc3d.connected_components(outputs == c)  # cc3d 라벨링
                stats = cc3d.statistics(cc)
                zyx = stats['centroids'][1:] * 10.012444  # 스케일 변환
                zyx_large = zyx[stats['voxel_counts'][1:] > BLOB_THRESHOLD]  # 크기 필터링
                xyz = np.ascontiguousarray(zyx_large[:, ::-1])  # 좌표 스왑 (z, y, x -> x, y, z)

                location[id_to_name[c]] = xyz  # ID 이름 매칭 저장

            # 데이터프레임 변환
            df = dict_to_df(location, run.name)
            location_dfs.append(df)  # 리스트에 추가
        
        # if vol_idx == 2:
        #     break
    
    # DataFrame 병합
    final_df = pd.concat(location_dfs, ignore_index=True)
    
    # ID 추가 및 CSV 저장
    final_df.insert(loc=0, column='id', value=np.arange(len(final_df)))
    final_df.to_csv("submission.csv", index=False)
    print("Submission saved to: submission.csv")


Processing volume 1/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing volume 2/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing volume 3/7


Loading dataset: 100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Submission saved to: submission.csv


: 

: 