In [1]:
import torch
print(torch.version.cuda)


12.4


In [1]:
from thop import profile
import torch
from torch.profiler import ProfilerActivity
from torch.profiler import profile as profilee
    

def print_model_summary(model, input_size):
    input_tensor = torch.randn(input_size)
    device = next(model.parameters()).device
    input_tensor = input_tensor.to(device)
    flops, params = profile(model, inputs=(input_tensor,))

    print(f"Model: {model.__class__.__name__}")
    print(f"FLOPs: {flops:,}, GFLOPs: {flops / 1e9:.2f}")
    print(f"Parameters: {params:,}")
    print("-" * 50)

def profile_model(model, input_size, log_dir='./log'):

    input_tensor = torch.randn(input_size)
    device = next(model.parameters()).device
    input_tensor = input_tensor.to(device)
    # 프로파일링
    with profilee(
        activities=[
            ProfilerActivity.CPU, 
            ProfilerActivity.CUDA,
        ],
        on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir),  # TensorBoard 연동
        record_shapes=True,
        with_stack=True
    ) as prof:
        model(input_tensor)

    # 프로파일링 결과 출력
    print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = (1, 1, 96, 96, 96)

In [2]:
from src.models import *

enc_channels = (32, 64, 128, 256)
enc_strides = (2, 2, 2)
num_layers_enc = (1, 1, 1, 1)

core_channels = 64
dec_channels = (128, 64, 32)
dec_strides = (2, 2, 2)
num_layers_dec = (1, 1, 1)

# 디코더간 스킵 예시:
#   디코더0: ("enc",2), ("dec", ?) -- 가능하지만 보통 dec_? < dec_0
#   디코더1: ("enc",1), ("dec",0)
#   디코더2: ("enc",0), ("dec",1)
# 반드시 "dec", j => j < i 여야함
skip_map = {
0: [("enc", 2)],       # 디코더0 => 인코더2
1: [("enc", 3), ("enc", 1)],  # 디코더1 => 인코더1 + 디코더0
2: [("enc", 3), ("dec", 0), ("enc", 0)]   # 디코더2 => 인코더0 + 디코더1
}


net = FlexibleUNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=7,
    encoder_channels=enc_channels,
    encoder_strides=enc_strides,
    decoder_channels=dec_channels,
    decoder_strides=dec_strides,
    num_layers_encoder=num_layers_enc,
    num_layers_decoder=num_layers_dec,
    skip_connections=skip_map,
    kernel_size=3,
    up_kernel_size=3,
    dropout=0.0,
    bias=True,
)

print(net)
x = torch.randn(1, 1, 96, 96, 96)
with torch.no_grad():
    y = net(x)
print("Output shape:", y.shape)

print_model_summary(net, x.shape)
profile_model(net, x.shape)

FlexibleUNet(
  (encoder_blocks): ModuleList(
    (0): SingleEncoderBlock(
      (stack): Sequential(
        (0): Convolution(
          (conv): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
          (adn): ADN(
            (N): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (D): Dropout(p=0.0, inplace=False)
            (A): LeakyReLU(negative_slope=0.01)
          )
        )
      )
    )
    (1): SingleEncoderBlock(
      (stack): Sequential(
        (0): Convolution(
          (conv): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
          (adn): ADN(
            (N): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (D): Dropout(p=0.0, inplace=False)
            (A): LeakyReLU(negative_slope=0.01)
          )
        )
      )
    )
    (2): SingleEncoderBlock(
      (stack): Sequential(
        (0): Convolution(
          (conv): Conv3

  warn("CUDA is not available, disabling CUDA profiling")
STAGE:2025-01-18 21:43:01 86212:2197662 ActivityProfilerController.cpp:314] Completed Stage: Warm Up


--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                        aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             7  
                    aten::conv3d         0.01%      41.000us        16.59%      51.522ms       3.963ms            13  
               aten::convolution         0.06%     171.000us        81.83%     254.076ms      15.880ms            16  
              aten::_convolution        -0.39%   -1203.000us        81.78%     253.905ms      15.869ms            16  
               aten::slow_conv3d         0.43%       1.344ms        16.51%      51.273ms       3.944ms            13  
       aten::slow_conv3d_forward        13.71%  

STAGE:2025-01-18 21:43:02 86212:2197662 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-01-18 21:43:02 86212:2197662 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
x = (1, 128, 18, 18, 18)
feature = 256

conv3d = nn.Conv3d(1, feature, 3, padding=1, bias=False)

class conv3d_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
        super(conv3d_block, self).__init__()
        self.d3conv = nn.Conv3d(in_channels, out_channels, 3, padding=1, bias=False)
                                    
    def forward(self, x):
        x = self.d3conv(x)
        return x

class conv3d_dp_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
        super(conv3d_dp_block, self).__init__()
        self.depthwise_conv = nn.Conv3d(in_channels, in_channels, kernel_size, stride, padding,
                                      groups=in_channels, bias=bias)
        self.pointwise_conv = nn.Conv3d(in_channels, out_channels, 1, 1, 0, bias=bias)
    
    def forward(self, x):
        depthwise_conv = self.depthwise_conv(x)
        
        x = self.pointwise_conv(depthwise_conv)
        return x 
    
conv3d_block = conv3d_block(128, feature, 3, 2, 1, False)
conv3d_dp_block = conv3d_dp_block(128, feature, 3, 2, 1, False)

print_model_summary(conv3d_block, x)
print_model_summary(conv3d_dp_block, x)
profile_model(conv3d_block, x, './log')
profile_model(conv3d_dp_block, x, './log')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
Model: conv3d_block
FLOPs: 5,159,780,352.0, GFLOPs: 5.16
Parameters: 884,736.0
--------------------------------------------------
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
Model: conv3d_dp_block
FLOPs: 26,407,296.0, GFLOPs: 0.03
Parameters: 36,224.0
--------------------------------------------------
----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                        Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::conv3d         0.25%      38.000us       100.00%      1

In [5]:
from src.models import UNet_CBAM
model = UNet_CBAM(
    spatial_dims=3,
    in_channels=1,
    out_channels=7,
    channels=[32, 64, 128, 256],
    strides=(2,2,2),

).to(device)
print_model_summary(model, x)
profile_model(model, x, './log')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_prelu() for <class 'torch.nn.modules.activation.PReLU'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: UNet_CBAM
FLOPs: 34,904,951,936.0, GFLOPs: 34.90
Parameters: 1,965,635.0
--------------------------------------------------
-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 Name    Self 

In [6]:
from src.models import UNet
model = UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=7,
    channels=[32, 64, 128, 256],
    strides=(2,2,2),

).to(device)
print_model_summary(model, x)
profile_model(model, x, './log')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_prelu() for <class 'torch.nn.modules.activation.PReLU'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: UNet
FLOPs: 34,904,825,856.0, GFLOPs: 34.90
Parameters: 1,948,909.0
--------------------------------------------------
-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------  ------------  ------------  ------------  ------------  -------



In [3]:
from src.models import  D_LKA_Net
from src.models.deformer_lka_blocks import TransformerBlock

model = D_LKA_Net(
    in_channels=x[1],
    out_channels=7,
    
    ).to(device)
# Print summaries
print("D_LKA_Net Summary:")
print_model_summary(model, x)

Using transformerblock: <class 'src.models.deformer_lka_blocks.TransformerBlock_3D_single_deform_LKA'>
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using skip connection in decoder: True
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using skip connection in decoder: True
Using LKA Attention with one deformable layer
Using LKA Attention with one deformable layer
Using LKA Attention with one deformab

In [1]:
import torch
print(torch.cuda.get_device_name(0))  # GPU 이름 출력


NVIDIA GeForce RTX 4070 Ti


In [3]:
from src.models import MiTUnet
# (7, 1, 3))
x = (1, 1, 96, 96, 96)
model = MiTUnet(
    img_size=(96, 96, 96),
    feature_size = 16,
    heads=(1, 2, 2, 4, 8),
    ff_expansion=(2, 8, 8, 4, 4),
    reduction_ratio=(16, 8, 4, 2, 1),
    num_layers=(1,1,2,2,2),
    channels=1,
    stage_kernel_stride_pad = ((3,1,1), (3, 2, 1), (3, 2, 1), (3, 2, 1), (3, 2, 1)),
    
    spatial_dims=3,
    out_channels=7,
    norm_name="instance",
    act_name = ("leakyrelu ", {"inplace": True, "negative_slope": 0.01}),
    n=2,
)

# Print summaries
print("SwinCSPUNET3Plus Summary:")
print_model_summary(model, x)

SwinCSPUNET3Plus Summary:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: MiTUnet
FLOPs: 62,496,175,104.0, GFLOPs: 62.50
Parameters: 12,031,811.0
--------------------------------------------------


In [3]:
from src.models import MiTCSPUnet
# (7, 1, 3))
x = (1, 1, 96, 96, 96)
model = MiTCSPUnet(
    img_size=(96, 96, 96),
    feature_size = 16,
    heads=(1, 2, 2, 4, 8),
    ff_expansion=(2, 8, 8, 4, 4),
    reduction_ratio=(16, 8, 4, 2, 1),
    num_layers=(1,1,2,2,2),
    channels=1,
    stage_kernel_stride_pad = ((3,1,1), (3, 2, 1), (3, 2, 1), (3, 2, 1), (3, 2, 1)),
    
    spatial_dims=3,
    out_channels=7,
    norm_name="instance",
    act_name = ("leakyrelu ", {"inplace": True, "negative_slope": 0.01}),
    n=2,
)

# Print summaries
print("SwinCSPUNET3Plus Summary:")
print_model_summary(model, x)

SwinCSPUNET3Plus Summary:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: MiTCSPUnet
FLOPs: 80,926,995,456.0, GFLOPs: 80.93
Parameters: 15,698,243.0
--------------------------------------------------


In [None]:
# from src.models import CSPBlock, UnetResBlock


# in_channels = 64
# out_channels = 128
# imgsz  = 96
# block = CSPBlock(
#         spatial_dims=3,
#         in_channels=in_channels,  # 입력 채널 수정
#         out_channels=out_channels,
#         kernel_size=3,
#         stride=2,
#         norm_name="batch",
#         act_name=("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
#         dropout=None,
#         split_ratio=0.5,
#         n=2
#     )
# x = (1, in_channels, imgsz, imgsz, imgsz)

# # Print summaries
# print("SwinTransformer Summary:")
# print_model_summary(block, x)

# block = UnetResBlock(
#         spatial_dims=3,
#         in_channels=in_channels,
#         out_channels=out_channels,
#         kernel_size=3,
#         stride=2,
#         norm_name="batch",
#         act_name=("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
#         dropout=None,
     
#     )

# # Print summaries
# print("UnetResBlock Summary:")
# print_model_summary(block, x)

SwinTransformer Summary:


NameError: name 'print_model_summary' is not defined

In [6]:
from src.models.swincspunetr_unet import SwinCSPUNETR_unet
x = (1, 1, 96, 96, 96)
swin_unetr = SwinCSPUNETR_unet(
    img_size=(96, 96, 96),
    in_channels=1,
    out_channels=7,
    feature_size=48,
    depths=(2, 2, 2, 2),
    num_heads=(3, 6, 12, 24),
    norm_name="instance",
    drop_rate=0.0,
    attn_drop_rate=0.0,
    dropout_path_rate=0.0,
    normalize=True,
    use_checkpoint=True,
    spatial_dims=3,
    downsample="merging",
    use_v2=True,
    n=2,
)

# Print summaries
print("SwinCSPUNETR_unet Summary:")
print_model_summary(swin_unetr, x)

SwinCSPUNETR_mix Summary:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: SwinCSPUNETR_unet
FLOPs: 290,186,906,136.0, GFLOPs: 290.19
Parameters: 43,524,919.0
--------------------------------------------------


In [2]:
from src.models.swincspunetr import SwinCSPUNETR
x = (1, 1, 96, 96, 96)
swin_unetr = SwinCSPUNETR(
    img_size=(96, 96, 96),
    in_channels=1,
    out_channels=7,
    feature_size=48,
    depths=(2, 2, 2, 2),
    num_heads=(3, 6, 12, 24),
    norm_name="instance",
    drop_rate=0.0,
    attn_drop_rate=0.0,
    dropout_path_rate=0.0,
    normalize=True,
    use_checkpoint=True,
    spatial_dims=3,
    downsample="merging",
    use_v2=True,
    n=2,
)

# Print summaries
print("SwinCSPUNETR Summary:")
print_model_summary(swin_unetr, x)



SwinCSPUNETR Summary:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
Model: SwinCSPUNETR
FLOPs: 289,518,045,720.0, GFLOPs: 289.52
Parameters: 62,104,375.0
--------------------------------------------------


In [5]:


# Example model
import torch.nn as nn
from monai.networks.nets import SwinUNETR, SwinTransformer

# SwinTransformer 테스트
swin_transformer = SwinTransformer(
    in_chans=1,
    embed_dim=48,
    window_size=(7, 7, 7),
    patch_size=(2, 2, 2),
    depths=(2, 2, 2, 2),
    num_heads=(3, 6, 12, 24),
    mlp_ratio=4.0,
    qkv_bias=True,
    drop_rate=0.0,
    attn_drop_rate=0.0,
    drop_path_rate=0.0,
    norm_layer=nn.LayerNorm,
    use_checkpoint=True,
    spatial_dims=3,
    downsample="merging",
    use_v2=True
)

# 전체 SwinUNETR 모델
swin_unetr = SwinUNETR(
    img_size=(96, 96, 96),
    in_channels=1,
    out_channels=7,
    feature_size=48,
    depths=(2, 2, 2, 2),
    num_heads=(3, 6, 12, 24),
    norm_name="instance",
    drop_rate=0.0,
    attn_drop_rate=0.0,
    dropout_path_rate=0.0,
    normalize=True,
    use_checkpoint=True,
    spatial_dims=3,
    downsample="merging",
    use_v2=True
)

# Input sizes
swin_transformer_input = (1, 1, 96, 96, 96)
swin_unetr_input = (1, 1, 96, 96, 96)

# # Print summaries
# print("SwinTransformer Summary:")
# print_model_summary(swin_transformer, swin_transformer_input)

print("\nComplete SwinUNETR Summary:")
print_model_summary(swin_unetr, swin_unetr_input)




Complete SwinUNETR Summary:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv3d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose3d'>.
torch.Size([1, 48, 48, 48, 48])
torch.Size([1, 96, 24, 24, 24])
torch.Size([1, 192, 12, 12, 12])
torch.Size([1, 384, 6, 6, 6])
torch.Size([1, 768, 3, 3, 3])
enc0: torch.Size([1, 48, 96, 96, 96])
enc1 torch.Size([1, 48, 48, 48, 48])
torch.Size([1, 96, 24, 24, 24])
torch.Size([1, 192, 12, 12, 12])
torch.

torch.Size([1, 48, 48, 48, 48])
torch.Size([1, 96, 24, 24, 24])
torch.Size([1, 192, 12, 12, 12])
torch.Size([1, 384, 6, 6, 6])
torch.Size([1, 768, 3, 3, 3])
enc0: torch.Size([1, 48, 96, 96, 96])
enc1 torch.Size([1, 48, 48, 48, 48])
torch.Size([1, 96, 24, 24, 24])
torch.Size([1, 192, 12, 12, 12])
torch.Size([1, 768, 3, 3, 3])
Model: SwinUNETR
FLOPs: 329,543,087,640.0, GFLOPs: 329.54
Parameters: 61,989,223.0