In [1]:
# Model comparison
from pathlib import Path
import sys

sys.path.append(str(Path(sys.path[0]).parent))

from isegm.model.is_hrnet_model import HRNetModel

def params_hrnet_18s(**kwargs):
    model = HRNetModel(width=18, ocr_width=48, small=True, with_aux_output=True, use_rgb_conv=True, 
                       use_leaky_relu=True, use_disks=True, norm_radius=5, with_prev_mask=True)
    return model


def params_hrnet_18(**kwargs):
    model = HRNetModel(width=18, ocr_width=64, with_aux_output=True, use_rgb_conv=True, use_leaky_relu=True,
                       use_disks=True, norm_radius=5, with_prev_mask=True)
    return model


def params_hrnet_32(**kwargs):
    model = HRNetModel(width=32, ocr_width=128, with_aux_output=True, use_disks=True, use_rgb_conv=True, 
                       use_leaky_relu=True, norm_radius=5, with_prev_mask=True)
    return model

In [2]:
import torch
from thop import profile
from thop import clever_format


input = torch.randn(1, 4, 400, 400)
point = torch.randn(1, 2, 3)

models = params_hrnet_18s(), params_hrnet_18(), params_hrnet_32()
for model in models:
    model.eval()
    macs, params = profile(model, inputs=(input, point))
    gflops, params = clever_format([macs*2, params], "%.3f")

    print(gflops, params)


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


17.938G 4.220M
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
30.985G 10.032M
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
83.118G

In [3]:
# Analysis for Swin-B and Swin-L
from pathlib import Path
import sys

sys.path.append(str(Path(sys.path[0]).parent))

from isegm.model.modeling.transformer_helper.cross_entropy_loss import CrossEntropyLoss
from isegm.model.is_swinformer_model import SwinformerModel

def params_swin_b(**kwargs):
    backbone=dict(in_chans=3, in_coord_chans=3, embed_dim=128, depths=[2, 2, 18, 2], num_heads=[4, 8, 16, 32],
        window_size=12, ape=False, drop_path_rate=0.3, patch_norm=True, use_checkpoint=False,)

    head = dict(in_channels=[128, 256, 512, 1024], in_index=[0, 1, 2, 3], channels=256, dropout_ratio=0.1,
        num_classes=1, loss_decode=CrossEntropyLoss(), align_corners=False,)

    model = SwinformerModel(backbone_params=backbone, head_params=head, use_naive_concat=False, use_rgb_conv=False,
        use_deep_fusion=True, use_disks=True,  norm_radius=5, with_prev_mask=True,)

    return model 


def params_swin_l(**kwargs):
    backbone=dict(in_chans=3, in_coord_chans=3, embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48],
        window_size=12, ape=False, drop_path_rate=0.3, patch_norm=True, use_checkpoint=False,)

    head = dict(in_channels=[192, 384, 768, 1536], in_index=[0, 1, 2, 3], channels=256, dropout_ratio=0.1,
        num_classes=1, loss_decode=CrossEntropyLoss(), align_corners=False,)

    model = SwinformerModel(backbone_params=backbone, head_params=head, use_naive_concat=False,
        use_rgb_conv=False, use_deep_fusion=True, use_disks=True,  norm_radius=5, with_prev_mask=True,)

    return model 


import torch
from thop import profile
from thop import clever_format


input = torch.randn(1, 4, 400, 400)
point = torch.randn(1, 2, 3)

models = params_swin_b(), params_swin_l()
for model in models:
    model.eval()
    macs, params = profile(model, inputs=(input, point))
    gflops, params = clever_format([macs*2, params], "%.3f")

    print(gflops, params)



TypeError: __init__() got an unexpected keyword argument 'use_naive_concat'