-
Notifications
You must be signed in to change notification settings - Fork 110
/
detections_to_objects.go
117 lines (109 loc) · 3.24 KB
/
detections_to_objects.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package segmentation
import (
"context"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"go.viam.com/rdk/components/camera"
"go.viam.com/rdk/pointcloud"
"go.viam.com/rdk/resource"
"go.viam.com/rdk/rimage"
"go.viam.com/rdk/rimage/transform"
"go.viam.com/rdk/utils"
"go.viam.com/rdk/vision"
"go.viam.com/rdk/vision/objectdetection"
)
// DetectionSegmenterConfig are the optional parameters to turn a detector into a segmenter.
type DetectionSegmenterConfig struct {
resource.TriviallyValidateConfig
DetectorName string `json:"detector_name"`
ConfidenceThresh float64 `json:"confidence_threshold_pct"`
MeanK int `json:"mean_k"`
Sigma float64 `json:"sigma"`
}
// ConvertAttributes changes the AttributeMap input into a DetectionSegmenterConfig.
func (dsc *DetectionSegmenterConfig) ConvertAttributes(am utils.AttributeMap) error {
decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{TagName: "json", Result: dsc})
if err != nil {
return err
}
return decoder.Decode(am)
}
// DetectionSegmenter will take an objectdetector.Detector and turn it into a Segementer.
// The params for the segmenter are "mean_k" and "sigma" for the statistical filter on the point clouds.
func DetectionSegmenter(detector objectdetection.Detector, meanK int, sigma, confidenceThresh float64) (Segmenter, error) {
var err error
if detector == nil {
return nil, errors.New("detector cannot be nil")
}
filter := func(pc pointcloud.PointCloud) (pointcloud.PointCloud, error) {
return pc, nil
}
if meanK > 0 && sigma > 0.0 {
filter, err = pointcloud.StatisticalOutlierFilter(meanK, sigma)
if err != nil {
return nil, err
}
}
// return the segmenter
seg := func(ctx context.Context, src camera.VideoSource) ([]*vision.Object, error) {
proj, err := src.Projector(ctx)
if err != nil {
return nil, err
}
// get the 3D detections, and turn them into 2D image and depthmap
pc, err := src.NextPointCloud(ctx)
if err != nil {
return nil, errors.Wrapf(err, "detection segmenter")
}
img, dm, err := proj.PointCloudToRGBD(pc)
if err != nil {
return nil, err
}
im := rimage.CloneImage(img)
dets, err := detector(ctx, im) // detector may modify the input image
if err != nil {
return nil, err
}
objects := make([]*vision.Object, 0, len(dets))
for _, d := range dets {
if d.Score() < confidenceThresh {
continue
}
// TODO(bhaney): Is there a way to just project the detection boxes themselves?
pc, err := detectionToPointCloud(d, img, dm, proj)
if err != nil {
return nil, err
}
pc, err = filter(pc)
if err != nil {
return nil, err
}
// if object was filtered away, skip it
if pc.Size() == 0 {
continue
}
obj, err := vision.NewObjectWithLabel(pc, d.Label())
if err != nil {
return nil, err
}
objects = append(objects, obj)
}
return objects, nil
}
return seg, nil
}
func detectionToPointCloud(
d objectdetection.Detection,
im *rimage.Image, dm *rimage.DepthMap,
proj transform.Projector,
) (pointcloud.PointCloud, error) {
bb := d.BoundingBox()
if bb == nil {
return nil, errors.New("detection bounding box cannot be nil")
}
pc, err := proj.RGBDToPointCloud(im, dm, *bb)
if err != nil {
return nil, err
}
return pc, nil
}