In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
import math

# from torch.nn.init import constant_, xavier_uniform_

# from ultralytics.nn.modules.block import C3k2, SPPF, C2PSA
# from ultralytics.nn.modules.conv import Conv
# from ultralytics.nn.modules.head import Detect

x = torch.randn(8, 1, 630, 630)

## YOLO Module 불러오기

In [2]:
import torch
import torch.nn as nn
#YOLO's nn module
class SPPF(nn.Module):
    """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""

    def __init__(self, c1, c2, k=5):
        """
        Initializes the SPPF layer with given input/output channels and kernel size.

        This module is equivalent to SPP(k=(5, 9, 13)).
        """
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * 4, c2, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

    def forward(self, x):
        """Forward pass through Ghost Convolution block."""
        y = [self.cv1(x)]
        y.extend(self.m(y[-1]) for _ in range(3))
        return self.cv2(torch.cat(y, 1))
    
def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class Conv(nn.Module):
    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Perform transposed convolution of 2D data."""
        return self.act(self.conv(x))
    
class DWConv(Conv):
    """Depth-wise convolution."""

    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
        """Initialize Depth-wise convolution with given parameters."""
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)


class DWConvTranspose2d(nn.ConvTranspose2d):
    """Depth-wise transpose convolution."""

    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
        """Initialize DWConvTranspose2d class with given parameters."""
        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))


class ConvTranspose(nn.Module):
    """Convolution transpose 2d layer."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
        """Initialize ConvTranspose2d layer with batch normalization and activation function."""
        super().__init__()
        self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
        self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Applies transposed convolutions, batch normalization and activation to input."""
        return self.act(self.bn(self.conv_transpose(x)))

    def forward_fuse(self, x):
        """Applies activation and convolution transpose operation to input."""
        return self.act(self.conv_transpose(x))
    
class Bottleneck(nn.Module):
    """Standard bottleneck."""

    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        """Initializes a standard bottleneck module with optional shortcut connection and configurable parameters."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, k[0], 1)
        self.cv2 = Conv(c_, c2, k[1], 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        """Applies the YOLO FPN to input data."""
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
    
class Proto(nn.Module):
    """YOLOv8 mask Proto module for segmentation models."""

    def __init__(self, c1, c_=256, c2=32):
        """
        Initializes the YOLOv8 mask Proto module with specified number of protos and masks.

        Input arguments are ch_in, number of protos, number of masks.
        """
        super().__init__()
        self.cv1 = Conv(c1, c_, k=3)
        self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
        self.cv2 = Conv(c_, c_, k=3)
        self.cv3 = Conv(c_, c2)

    def forward(self, x):
        """Performs a forward pass through layers using an upsampled input image."""
        return self.cv3(self.cv2(self.upsample(self.cv1(x))))

    
class C2f(nn.Module):
    """Faster Implementation of CSP Bottleneck with 2 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
        """Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing."""
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
        """Forward pass through C2f layer."""
        y = list(self.cv1(x).chunk(2, 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
        y = self.cv1(x).split((self.c, self.c), 1)
        y = [y[0], y[1]]
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

class C3(nn.Module):
    """CSP Bottleneck with 3 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))

    def forward(self, x):
        """Forward pass through the CSP bottleneck with 2 convolutions."""
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
    
class C3k2(C2f):
    """Faster Implementation of CSP Bottleneck with 2 convolutions."""

    def __init__(self, c1, c2, n=1, c3k=False, e=0.5, g=1, shortcut=True):
        """Initializes the C3k2 module, a faster CSP Bottleneck with 2 convolutions and optional C3k blocks."""
        super().__init__(c1, c2, n, shortcut, g, e)
        self.m = nn.ModuleList(
            C3k(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n)
        )

class C3k(C3):
    """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3):
        """Initializes the C3k module with specified channels, number of layers, and configurations."""
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
        # self.m = nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, k=(k, k), e=1.0) for _ in range(n)))
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=(k, k), e=1.0) for _ in range(n)))
        
class Attention(nn.Module):
    """
    Attention module that performs self-attention on the input tensor.

    Args:
        dim (int): The input tensor dimension.
        num_heads (int): The number of attention heads.
        attn_ratio (float): The ratio of the attention key dimension to the head dimension.

    Attributes:
        num_heads (int): The number of attention heads.
        head_dim (int): The dimension of each attention head.
        key_dim (int): The dimension of the attention key.
        scale (float): The scaling factor for the attention scores.
        qkv (Conv): Convolutional layer for computing the query, key, and value.
        proj (Conv): Convolutional layer for projecting the attended values.
        pe (Conv): Convolutional layer for positional encoding.
    """

    def __init__(self, dim, num_heads=8, attn_ratio=0.5):
        """Initializes multi-head attention module with query, key, and value convolutions and positional encoding."""
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = dim // num_heads
        self.key_dim = int(self.head_dim * attn_ratio)
        self.scale = self.key_dim**-0.5
        nh_kd = self.key_dim * num_heads
        h = dim + nh_kd * 2
        self.qkv = Conv(dim, h, 1, act=False)
        self.proj = Conv(dim, dim, 1, act=False)
        self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)

    def forward(self, x):
        """
        Forward pass of the Attention module.

        Args:
            x (torch.Tensor): The input tensor.

        Returns:
            (torch.Tensor): The output tensor after self-attention.
        """
        B, C, H, W = x.shape
        N = H * W
        qkv = self.qkv(x)
        q, k, v = qkv.view(B, self.num_heads, self.key_dim * 2 + self.head_dim, N).split(
            [self.key_dim, self.key_dim, self.head_dim], dim=2
        )

        attn = (q.transpose(-2, -1) @ k) * self.scale
        attn = attn.softmax(dim=-1)
        x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))
        x = self.proj(x)
        return x
    
class PSABlock(nn.Module):
    """
    PSABlock class implementing a Position-Sensitive Attention block for neural networks.

    This class encapsulates the functionality for applying multi-head attention and feed-forward neural network layers
    with optional shortcut connections.

    Attributes:
        attn (Attention): Multi-head attention module.
        ffn (nn.Sequential): Feed-forward neural network module.
        add (bool): Flag indicating whether to add shortcut connections.

    Methods:
        forward: Performs a forward pass through the PSABlock, applying attention and feed-forward layers.

    Examples:
        Create a PSABlock and perform a forward pass
        >>> psablock = PSABlock(c=128, attn_ratio=0.5, num_heads=4, shortcut=True)
        >>> input_tensor = torch.randn(1, 128, 32, 32)
        >>> output_tensor = psablock(input_tensor)
    """

    def __init__(self, c, attn_ratio=0.5, num_heads=4, shortcut=True) -> None:
        """Initializes the PSABlock with attention and feed-forward layers for enhanced feature extraction."""
        super().__init__()

        self.attn = Attention(c, attn_ratio=attn_ratio, num_heads=num_heads)
        self.ffn = nn.Sequential(Conv(c, c * 2, 1), Conv(c * 2, c, 1, act=False))
        self.add = shortcut

    def forward(self, x):
        """Executes a forward pass through PSABlock, applying attention and feed-forward layers to the input tensor."""
        x = x + self.attn(x) if self.add else self.attn(x)
        x = x + self.ffn(x) if self.add else self.ffn(x)
        return x
    
class C2PSA(nn.Module):
    """
    C2PSA module with attention mechanism for enhanced feature extraction and processing.

    This module implements a convolutional block with attention mechanisms to enhance feature extraction and processing
    capabilities. It includes a series of PSABlock modules for self-attention and feed-forward operations.

    Attributes:
        c (int): Number of hidden channels.
        cv1 (Conv): 1x1 convolution layer to reduce the number of input channels to 2*c.
        cv2 (Conv): 1x1 convolution layer to reduce the number of output channels to c.
        m (nn.Sequential): Sequential container of PSABlock modules for attention and feed-forward operations.

    Methods:
        forward: Performs a forward pass through the C2PSA module, applying attention and feed-forward operations.

    Notes:
        This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.

    Examples:
        >>> c2psa = C2PSA(c1=256, c2=256, n=3, e=0.5)
        >>> input_tensor = torch.randn(1, 256, 64, 64)
        >>> output_tensor = c2psa(input_tensor)
    """

    def __init__(self, c1, c2, n=1, e=0.5):
        """Initializes the C2PSA module with specified input/output channels, number of layers, and expansion ratio."""
        super().__init__()
        assert c1 == c2
        self.c = int(c1 * e)
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv(2 * self.c, c1, 1)

        self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))

    def forward(self, x):
        """Processes the input tensor 'x' through a series of PSA blocks and returns the transformed tensor."""
        a, b = self.cv1(x).split((self.c, self.c), dim=1)
        b = self.m(b)
        return self.cv2(torch.cat((a, b), 1))
    
def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
    """Transform distance(ltrb) to box(xywh or xyxy)."""
    lt, rb = distance.chunk(2, dim)
    x1y1 = anchor_points - lt
    x2y2 = anchor_points + rb
    if xywh:
        c_xy = (x1y1 + x2y2) / 2
        wh = x2y2 - x1y1
        return torch.cat((c_xy, wh), dim)  # xywh bbox
    return torch.cat((x1y1, x2y2), dim)  # xyxy bbox



## U-Net 구조기반 YOLO Module로 Segmentation 모델 짜기

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from monai.losses import TverskyLoss
from monai.metrics import DiceMetric

# Backbone 정의
import torch
import torch.nn as nn
# from ultralytics.nn.modules import *

class Backbone(nn.Module):
    def __init__(self, nc=1):
        super().__init__()
        # 인코더 (C3K2 blocks 추가)
        self.enc1 = nn.Sequential( 
            C2f(1, 32, 1), # 1, 630, 630 -> 64, 630, 630
            C3k2(32, 32, shortcut=False) # 64, 630, 630 -> 64, 630, 630
        )
        self.pool1 = nn.MaxPool2d(2) # 64, 630, 630 -> 64, 315, 315
        self.enc2 = nn.Sequential( 
            C2f(32, 64, 1), # 64, 315, 315 -> 128, 315, 315
            C3k2(64, 64, shortcut=False) # 128, 315, 315 -> 128, 315, 315
        )
        self.pool2 = nn.MaxPool2d(2) # 128, 315, 315 -> 128, 157, 157
        self.enc3 = nn.Sequential( 
            C2f(64, 128, 1), # 128, 157, 157 -> 256, 157, 157
            C3k2(128, 128, shortcut=False) # 256, 157, 157 -> 256, 157, 157
        )
        self.pool3 = nn.MaxPool2d(2) # 256, 157, 157 -> 256, 78, 78
        self.enc4 = nn.Sequential(
            C2f(128, 256, 1), # 256, 78, 78 -> 512, 78, 78
            C3k2(256, 256, shortcut=False) # 512, 78, 78 -> 512, 78, 78
        )
        self.pool4 = nn.MaxPool2d(2)
        
        # 브릿지 (SPPF + C2PSA)
        self.bridge = nn.Sequential(
            C2f(256, 512, 1), # 512, 39, 39 -> 1024, 39, 39
            SPPF(512, 512),  # Spatial Pyramid Pooling - Fast # 1024, 39, 39 -> 1024, 39, 39
            C2PSA(512, 512)  # CSP with Parallel Spatial Attention # 1024, 39, 39 -> 1024, 39, 39
        )
        
        # 디코더 (C3K2 blocks 추가)
        self.upconv4 = nn.ConvTranspose2d(512, 256, 2, stride=2) # 1024, 39, 39 -> 512, 78, 78
        self.dec4 = nn.Sequential(
            C2f(512, 256, 1), # 1024, 78, 78 -> 512, 78, 78
            C3k2(256, 256, shortcut=False) # 512, 78, 78 -> 512, 78, 78
        )
        self.upconv3 = nn.ConvTranspose2d(256, 128, 2, stride=2) # 512, 78, 78 -> 256, 157, 157
        self.dec3 = nn.Sequential(
            C2f(256, 128, 1), # 512, 157, 157 -> 256, 157, 157
            C3k2(128, 128, shortcut=False) # 256, 157, 157 -> 256, 157, 157
        )
        self.upconv2 = nn.ConvTranspose2d(128, 64, 2, stride=2) # 256, 157, 157 -> 128, 315, 315
        self.dec2 = nn.Sequential(
            C2f(128, 64, 1), # 256, 315, 315 -> 128, 315, 315
            C3k2(64, 64, shortcut=False) # 128, 315, 315 -> 128, 315, 315
        )
        self.upconv1 = nn.ConvTranspose2d(64, 32, 2, stride=2) # 128, 315, 315 -> 64, 630, 630
        self.dec1 = nn.Sequential(
            C2f(64, 32, 1), # 128, 630, 630 -> 64, 630, 630
            C3k2(32, 32, shortcut=False) # 64, 630, 630 -> 64, 630, 630
        )
        
        # self.out = nn.Conv2d(64, nc, 1)

    def center_crop(self, skip, x):
        """스킵 커넥션과 업샘플링된 특징 맵의 크기를 맞춤"""
        if x.shape[2:] != skip.shape[2:]: # 크기가 다를 경우
            x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=True) # 업샘플링
        return x

    def forward(self, x):
        # 인코더
        # x : 1, 630, 630
        e1 = self.enc1(x) # 64, 630, 630
        e2 = self.enc2(self.pool1(e1)) # 128, 315, 315
        e3 = self.enc3(self.pool2(e2)) # 256, 157, 157
        e4 = self.enc4(self.pool3(e3)) # 512, 78, 78
        
        # 브릿지
        b = self.bridge(self.pool4(e4)) # 1024, 39, 39
        
        # 디코더 (크기 맞추기 추가)
        x = self.upconv4(b) # 1024, 39, 39 -> 512, 78, 78
        x = self.center_crop(e4, x) # 512, 78, 78
        d4 = self.dec4(torch.cat([x, e4], 1)) # 512, 78, 78
        
        x = self.upconv3(d4) # 512, 78, 78 -> 256, 157, 157
        x = self.center_crop(e3, x) # 256, 157, 157
        d3 = self.dec3(torch.cat([x, e3], 1)) # 256, 157, 157
        
        x = self.upconv2(d3) # 256, 157, 157 -> 128, 315, 315
        x = self.center_crop(e2, x) # 128, 315, 315
        d2 = self.dec2(torch.cat([x, e2], 1)) # 128, 315, 315
        
        x = self.upconv1(d2) # 128, 315, 315 -> 64, 630, 630
        x = self.center_crop(e1, x) # 64, 630, 630
        d1 = self.dec1(torch.cat([x, e1], 1)) # 64, 630, 630
        
        return d1 # 64, 630, 630



class SegmentationHead(nn.Module):
    def __init__(self, in_channels=32, out_channels=6):
        super().__init__()
        
        # 다중 스케일 특징 추출
        self.aspp = nn.ModuleList([ # 64, 630, 630 -> 64, 630, 630
            # 1x1 convolution
            nn.Sequential( # 64, 630, 630 -> 64, 630, 630
                nn.Conv2d(in_channels, 32, 1),
                nn.BatchNorm2d(32),
                nn.ReLU()
            ),
            # 3x3 dilated convolutions
            # Output Size = [Input Size + 2*padding - dilation*(kernel_size - 1) - 1 / stride] + 1

            nn.Sequential( # 64, 630, 630 -> 64, 630, 630
                nn.Conv2d(in_channels, 32, 3, padding=6, dilation=6),
                nn.BatchNorm2d(32),
                nn.ReLU()
            ),
            nn.Sequential( # 64, 630, 630 -> 64, 630, 630
                nn.Conv2d(in_channels, 32, 3, padding=12, dilation=12),
                nn.BatchNorm2d(32),
                nn.ReLU()
            ),
            # Global context
            nn.Sequential( # 64, 630, 630 -> 64, 630, 630
                nn.AdaptiveAvgPool2d((630,630)),
                nn.Conv2d(in_channels, 32, 1),
                nn.BatchNorm2d(32),
                nn.ReLU()
            )
        ])
        
        # 특징 융합
        self.fusion = nn.Sequential( # 64, 630, 630 -> 64, 630, 630
            C3k2(32 * 4, 64, shortcut=False), # 64 * 4, 630, 630 -> 128, 630, 630
            C2f(64, 32, 1), # 128, 630, 630 -> 64, 630, 630
            SPPF(32, 32) # 64, 630, 630 -> 64, 630, 630
        )
        
        # 최종 예측
        self.final = nn.Sequential( # 64, 630, 630 -> 64, 630, 630
            nn.Conv2d(32, 16, 3, padding=1), # 64, 630, 630 -> 32, 630, 630
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv2d(16, out_channels, 1) # 32, 630, 630 -> 1, 630, 630
        )

    def forward(self, x):
        # ASPP 특징 추출
        aspp_features = []
        for aspp_module in self.aspp:
            if isinstance(aspp_module[-1], nn.ReLU):
                aspp_features.append(aspp_module(x))
            else:
                # Global context 처리
                global_feature = aspp_module(x)
                aspp_features.append(F.interpolate(
                    global_feature, 
                    size=x.shape[2:], 
                    mode='bilinear', 
                    align_corners=True
                ))
        
        # 특징 연결 및 융합
        fused = torch.cat(aspp_features, dim=1) # 64, 630, 630
        refined = self.fusion(fused) # 64, 630, 630
        
        # 최종 예측
        return self.final(refined) # 1, 630, 630
    
class CoordPredictionHead(nn.Module):
    def __init__(self, in_channels=64):
        super().__init__()
        self.coord_head = nn.Sequential(
            SPPF(in_channels, 32),       # 특징 피라미드 풀링
            C2PSA(32, 16),              # 공간 attention
            nn.Conv2d(16, 2, 1)         # x,y 좌표 예측
        )
    
    def forward(self, x):
        return self.coord_head(x)

# 전체 모델 정의
class ETModel(nn.Module):
    def __init__(self, backbone, detect_head):
        """
        Full model combining backbone and detection head.
        """
        super(ETModel, self).__init__()
        self.backbone = backbone
        self.detect_head = detect_head

    def forward(self, x):
        """
        Forward pass for the full model.
        """
        features = self.backbone(x)  # Extract features
        output = self.detect_head(features)  # Detection/segmentation output
        return output



  from .autonotebook import tqdm as notebook_tqdm


In [4]:


# 손실 함수 및 평가 지표 정의
loss_function = TverskyLoss(include_background=True, to_onehot_y=True, softmax=True)
dice_metric = DiceMetric(include_background=False, reduction="mean", ignore_empty=True)

# 모델 초기화
backbone = Backbone()
detect_head = CoordPredictionHead()
model = ETModel(backbone, detect_head)

# # 학습용 데이터 예시 for segmentation
# x = torch.randn(2, 1, 630, 630)  # Batch size 8, 1-channel ET images
# y = torch.randint(0, 6, (2, 630, 630))  # Multi-class segmentation labels

# 학습용 데이터 예시 for detection
x = torch.randn(2, 1, 630, 630)  # 입력 이미지
y = torch.randn(2, 2, 630, 630)  # x,y 좌표 레이블

# Forward Pass
# outputs = model(x)

from torchinfo import summary

model = model.to('cuda')
x = x.to('cuda')
y = y.to('cuda')
summary(model, input_size=(2, 1, 630, 630))



AssertionError: 

In [None]:
# import torch
# from ultralytics import YOLO
# from ultralytics.nn.tasks import BaseModel

# class CustomYOLOUNet(BaseModel):
#     def __init__(self):
#         super().__init__()
#         self.backbone = Backbone()
#         self.head = SegmentationHead()
#         self.model = ETModel(self.backbone, self.head)
        
#     def forward(self, x):
#         return self.model(x)
        
#     def _predict_once(self, batch):
#         # YOLO 예측 메서드 구현
#         return self.forward(batch['img'])
        
#     def get_state_dict(self):
#         return {
#             'model': self.model.state_dict(),
#             'backbone': self.backbone.state_dict(),
#             'head': self.head.state_dict()
#         }

# # 모델 초기화 및 저장
# model = CustomYOLOUNet()
# ckpt = {
#     'model': model.get_state_dict(),  # state_dict 형태로 저장
#     'epoch': -1,
#     'version': None,
#     'optimizer': None,
#     'model_args': {},
#     'train_args': {},
#     'task': 'segment',
#     'box': None,
#     'names': {0: 'apo-ferritin',
#             1: 'beta-amylase',
#             2: 'beta-galactosidase',
#             3: 'ribosome',
#             4: 'thyroglobulin',
#             5: 'virus-like-particle'}
# }

# torch.save(ckpt, 'custom_yolo_unet.pt')

# # YOLO로 로드
# yolo_model = YOLO('custom_yolo_unet.pt')
# result = yolo_model.train(
#     name="s_640_dropout025_",
#     data="./data.yaml",
#     epochs=300,
#     patience=10,
#     batch=8,
#     imgsz=630,
#     optimizer='AdamW',
#     lr0=0.001,
#     dropout=0.25,
#     exist_ok=True
# )

AttributeError: 'dict' object has no attribute 'to'

In [None]:
# # 손실 계산
# y_onehot = F.one_hot(y, num_classes=6).permute(0, 3, 1, 2).float()  # One-hot encode the target
# # 손실 계산
# loss = loss_function(outputs, y.unsqueeze(1))  # 타겟에 채널 차원 추가 (B, 1, H, W)

# # 평가 지표 계산
# dice_metric(outputs, y_onehot)
# dice_score = dice_metric.aggregate().item()
# dice_metric.reset()

# print("Loss:", loss.item())
# print("Dice Metric:", dice_score)


In [None]:

# # Optimizer (학습 루프에서 활용)
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# # 학습 루프 (간단한 예)
# for epoch in range(10):
#     model.train()
#     optimizer.zero_grad()

#     outputs = model(x)
#     loss = loss_function(outputs, y_onehot)
#     loss.backward()
#     optimizer.step()

#     print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

#     model.eval()
#     with torch.no_grad():
#         dice_metric(outputs, y_onehot)
#         dice_score = dice_metric.aggregate().item()
#         dice_metric.reset()
#         print(f"Epoch {epoch + 1}, Dice Metric: {dice_score}")