No segmentation result after single-class sample training, boundary box support only #23

meowmeowxi · 2021-11-29T09:34:26Z

Hello, I build a new single-class dataset, training based on r101-refinemask-2x, and the modified configs are as follows.

However, after 93 epochs, I was stuck in some trouble. On the one hand, I found the detection results had no segmentation mask, and on the other hand, loss_instance was really difficult to converge. For example, {"mode": "train", "epoch": 94, "iter": 10, "lr": 3e-05, "time": 0.57583, "data_time": 0.2215, "memory": 5419, "loss_rpn_cls": 0.00174, "loss_rpn_bbox": 0.00334, "loss_cls": 0.01584, "acc": 99.6875, "loss_bbox": 0.03414, "loss_instance": 0.26567, "loss_semantic": 0.00546, "loss": 0.3262, "grad_norm": 2.08605}

I have two questions now:
1. Does refineMask support instance segmentation of elongated objects, or is there something wrong with my configuration file?
2. Any recommended epochs of training?
Thanks for your time.

model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='RefineRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=2.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='RefineMaskHead',
num_convs_instance=2,
num_convs_semantic=4,
conv_in_channels_instance=256,
conv_in_channels_semantic=256,
conv_kernel_size_instance=3,
conv_kernel_size_semantic=3,
conv_out_channels_instance=256,
conv_out_channels_semantic=256,
conv_cfg=None,
norm_cfg=None,
dilations=[1, 3, 5],
semantic_out_stride=4,
mask_use_sigmoid=True,
stage_num_classes=[1, 1, 1, 1],
stage_sup_size=[14, 28, 56, 112],
upsample_cfg=dict(type='bilinear', scale_factor=2),
loss_cfg=dict(
type='RefineCrossEntropyLoss',
stage_instance_loss_weight=[0.25, 0.5, 0.75, 1.0],
semantic_loss_weight=1.0,
boundary_width=2,
start_stage=1))))
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
]
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data_root = '../coco'
data = dict(
samples_per_gpu=1,
workers_per_gpu=2,
train=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
],
data_root='../coco',
classes=('street', )),
val=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
data_root='../coco',
classes=('street', )),
test=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
data_root='../coco',
classes=('street', )))
evaluation = dict(metric=['bbox', 'segm'], classwise=True, interval=12)
optimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
total_epochs = 1000
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[16, 22])
checkpoint_config = dict(interval=1)
log_config = dict(interval=10, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
workflow = [('train', 1)]
gpu_ids = range(0, 1)
work_dir = 'work_dirs/r101-refinemask-2x/street'
load_from = None
resume_from = 'work_dirs/r101-refinemask-2x/street/latest.pth'
classes = ('street', )

zhanggang001 · 2022-06-29T07:20:27Z

I am very sorry to forget to reply to you. Hope you have solved this problem.

zhanggang001 closed this as completed Jun 29, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

No segmentation result after single-class sample training, boundary box support only #23

No segmentation result after single-class sample training, boundary box support only #23

meowmeowxi commented Nov 29, 2021

zhanggang001 commented Jun 29, 2022

No segmentation result after single-class sample training, boundary box support only #23

No segmentation result after single-class sample training, boundary box support only #23

Comments

meowmeowxi commented Nov 29, 2021

zhanggang001 commented Jun 29, 2022