-
Notifications
You must be signed in to change notification settings - Fork 110
/
detections_to_objects.go
132 lines (124 loc) · 3.63 KB
/
detections_to_objects.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package segmentation
import (
"context"
"image"
"github.com/go-viper/mapstructure/v2"
"github.com/pkg/errors"
"go.viam.com/rdk/components/camera"
"go.viam.com/rdk/pointcloud"
"go.viam.com/rdk/resource"
"go.viam.com/rdk/rimage"
"go.viam.com/rdk/rimage/transform"
"go.viam.com/rdk/utils"
"go.viam.com/rdk/vision"
"go.viam.com/rdk/vision/objectdetection"
)
// DetectionSegmenterConfig are the optional parameters to turn a detector into a segmenter.
type DetectionSegmenterConfig struct {
resource.TriviallyValidateConfig
DetectorName string `json:"detector_name"`
ConfidenceThresh float64 `json:"confidence_threshold_pct"`
MeanK int `json:"mean_k"`
Sigma float64 `json:"sigma"`
}
// ConvertAttributes changes the AttributeMap input into a DetectionSegmenterConfig.
func (dsc *DetectionSegmenterConfig) ConvertAttributes(am utils.AttributeMap) error {
decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{TagName: "json", Result: dsc})
if err != nil {
return err
}
return decoder.Decode(am)
}
// DetectionSegmenter will take an objectdetector.Detector and turn it into a Segementer.
// The params for the segmenter are "mean_k" and "sigma" for the statistical filter on the point clouds.
func DetectionSegmenter(detector objectdetection.Detector, meanK int, sigma, confidenceThresh float64) (Segmenter, error) {
var err error
if detector == nil {
return nil, errors.New("detector cannot be nil")
}
filter := func(pc pointcloud.PointCloud) (pointcloud.PointCloud, error) {
return pc, nil
}
if meanK > 0 && sigma > 0.0 {
filter, err = pointcloud.StatisticalOutlierFilter(meanK, sigma)
if err != nil {
return nil, err
}
}
// return the segmenter
seg := func(ctx context.Context, src camera.VideoSource) ([]*vision.Object, error) {
proj, err := src.Projector(ctx)
if err != nil {
return nil, err
}
// get the 3D detections, and turn them into 2D image and depthmap
imgs, _, err := src.Images(ctx)
if err != nil {
return nil, errors.Wrapf(err, "detection segmenter")
}
var img *rimage.Image
var dmimg image.Image
for _, i := range imgs {
thisI := i
if i.SourceName == "color" {
img = rimage.ConvertImage(thisI.Image)
}
if i.SourceName == "depth" {
dmimg = thisI.Image
}
}
if img == nil || dmimg == nil {
return nil, errors.New("source camera's getImages method did not have 'color' and 'depth' images")
}
dm, err := rimage.ConvertImageToDepthMap(ctx, dmimg)
if err != nil {
return nil, err
}
im := rimage.CloneImage(img)
dets, err := detector(ctx, im) // detector may modify the input image
if err != nil {
return nil, err
}
objects := make([]*vision.Object, 0, len(dets))
for _, d := range dets {
if d.Score() < confidenceThresh {
continue
}
// TODO(bhaney): Is there a way to just project the detection boxes themselves?
pc, err := detectionToPointCloud(d, img, dm, proj)
if err != nil {
return nil, err
}
pc, err = filter(pc)
if err != nil {
return nil, err
}
// if object was filtered away, skip it
if pc.Size() == 0 {
continue
}
obj, err := vision.NewObjectWithLabel(pc, d.Label(), nil)
if err != nil {
return nil, err
}
objects = append(objects, obj)
}
return objects, nil
}
return seg, nil
}
func detectionToPointCloud(
d objectdetection.Detection,
im *rimage.Image, dm *rimage.DepthMap,
proj transform.Projector,
) (pointcloud.PointCloud, error) {
bb := d.BoundingBox()
if bb == nil {
return nil, errors.New("detection bounding box cannot be nil")
}
pc, err := proj.RGBDToPointCloud(im, dm, *bb)
if err != nil {
return nil, err
}
return pc, nil
}