Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No segmentation result after single-class sample training, boundary box support only #23

Closed
meowmeowxi opened this issue Nov 29, 2021 · 1 comment

Comments

@meowmeowxi
Copy link

Hello, I build a new single-class dataset, training based on r101-refinemask-2x, and the modified configs are as follows.

However, after 93 epochs, I was stuck in some trouble. On the one hand, I found the detection results had no segmentation mask, and on the other hand, loss_instance was really difficult to converge. For example, {"mode": "train", "epoch": 94, "iter": 10, "lr": 3e-05, "time": 0.57583, "data_time": 0.2215, "memory": 5419, "loss_rpn_cls": 0.00174, "loss_rpn_bbox": 0.00334, "loss_cls": 0.01584, "acc": 99.6875, "loss_bbox": 0.03414, "loss_instance": 0.26567, "loss_semantic": 0.00546, "loss": 0.3262, "grad_norm": 2.08605}

I have two questions now:
1. Does refineMask support instance segmentation of elongated objects, or is there something wrong with my configuration file?
2. Any recommended epochs of training?
Thanks for your time.

model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='RefineRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=2.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='RefineMaskHead',
num_convs_instance=2,
num_convs_semantic=4,
conv_in_channels_instance=256,
conv_in_channels_semantic=256,
conv_kernel_size_instance=3,
conv_kernel_size_semantic=3,
conv_out_channels_instance=256,
conv_out_channels_semantic=256,
conv_cfg=None,
norm_cfg=None,
dilations=[1, 3, 5],
semantic_out_stride=4,
mask_use_sigmoid=True,
stage_num_classes=[1, 1, 1, 1],
stage_sup_size=[14, 28, 56, 112],
upsample_cfg=dict(type='bilinear', scale_factor=2),
loss_cfg=dict(
type='RefineCrossEntropyLoss',
stage_instance_loss_weight=[0.25, 0.5, 0.75, 1.0],
semantic_loss_weight=1.0,
boundary_width=2,
start_stage=1))))
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
]
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data_root = '../coco'
data = dict(
samples_per_gpu=1,
workers_per_gpu=2,
train=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
],
data_root='../coco',
classes=('street', )),
val=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
data_root='../coco',
classes=('street', )),
test=dict(
type='CocoDataset',
ann_file='annotations/instances_train2017.json',
img_prefix='train2017',
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 1024),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
data_root='../coco',
classes=('street', )))
evaluation = dict(metric=['bbox', 'segm'], classwise=True, interval=12)
optimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
total_epochs = 1000
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[16, 22])
checkpoint_config = dict(interval=1)
log_config = dict(interval=10, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
workflow = [('train', 1)]
gpu_ids = range(0, 1)
work_dir = 'work_dirs/r101-refinemask-2x/street'
load_from = None
resume_from = 'work_dirs/r101-refinemask-2x/street/latest.pth'
classes = ('street', )

@zhanggang001
Copy link
Owner

I am very sorry to forget to reply to you. Hope you have solved this problem.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants