/
SGM.h
366 lines (301 loc) · 15.6 KB
/
SGM.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
#ifndef __SEMI_GLOBAL_MATCHING_H__
#define __SEMI_GLBOAL_MATCHING_H__
#include <vw/Image/ImageView.h>
#include <vw/Image/PixelMask.h>
#include <vw/Stereo/DisparityMap.h>
#include <vw/Stereo/Correlation.h>
#include <vw/InterestPoint/Detector.h> // TODO: REMOVE THIS!
namespace vw {
// Registering the Pixel Disparity type for FileIO
template<> struct PixelFormatID<Vector<uint8,2> > { static const PixelFormatEnum value = VW_PIXEL_GENERIC_2_CHANNEL; };
template<> struct PixelFormatID<Vector<int16,2> > { static const PixelFormatEnum value = VW_PIXEL_GENERIC_2_CHANNEL; };
/* TODO LIST
- Increase speed, it is way too slow!
- Clean up interfaces/ROI handling.
- Handle non-uint8 input images.
- Integrate with disparity code as an option for the low-
resolution disparity search.
NOTES:
- Using a block size in the initial cost calculation does not seem to improve the results.
Instead it gives splotchy output. No real effect on speed.
- All the required time is in the path iteration functions!
*/
namespace stereo {
class SemiGlobalMatcher {
public: // Definitions
// The types are chosen to minimize storage costs
typedef int16 DisparityType; ///< Contains the allowable dx, dy range.
typedef int16 CostType; ///< Used to describe a single disparity cost.
typedef int32 AccumCostType; ///< Used to accumulate CostType values.
typedef ImageView<PixelMask<Vector2i> > DisparityImage; // The usual VW disparity type
public: // Functions
/// Set the parameters to be used for future SGM calls
void set_parameters(int min_disp_x, int min_disp_y,
int max_disp_x, int max_disp_y,
int kernel_size);
/// Compute SGM stereo on the images.
/// - TODO: Make search_buffer a parameter
DisparityImage
semi_global_matching_func( ImageView<uint8> const& left_image,
ImageView<uint8> const& right_image,
DisparityImage const* prev_disparity=0,
int search_buffer = 4);
private: // Variables
// The core parameters
int m_min_disp_x, m_min_disp_y;
int m_max_disp_x, m_max_disp_y;
int m_kernel_size; ///< Must be odd. Use "1" for single pixel.
int m_min_row, m_max_row;
int m_min_col, m_max_col;
int m_num_output_cols, m_num_output_rows;
// Derived parameters for convenience
int m_num_disp_x, m_num_disp_y, m_num_disp;
size_t m_buffer_step_size;
boost::shared_array<CostType > m_cost_buffer;
boost::shared_array<AccumCostType> m_accum_buffer;
/// Image containing the disparity bounds for each pixel.
/// - Stored as min_col, min_row, max_col, max_row.
ImageView<Vector4i> m_disp_bound_image;
/// Lookup table of the adjacent disparities for each disparity
/// - Handles outer boundaries by repitition.
std::vector<DisparityType> m_adjacent_disp_lookup;
private: // Functions
/// Populate the lookup table m_adjacent_disp_lookup
void populate_adjacent_disp_lookup_table();
/// Fill in m_disp_bound_image using image-wide contstants
void populate_constant_disp_bound_image();
/// Fill in m_disp_bound_image using a zones object.
void populate_disp_bound_image(std::vector<stereo::SearchParam> const *zones);
/// Fill in m_disp_bound_image using a half resolution disparity image.
void populate_disp_bound_image(DisparityImage const* prev_disparity,
int search_buffer);
/// Return the index into one of the buffers for a given location
/// - The data is stored row major interleaved format.
size_t get_cost_index(int col, int row, DisparityType disp=0) const {
return row*m_buffer_step_size + col*m_num_disp + disp;
}
/// Add the value of the cost buffer to the accumulated cost vector at a pixel.
void add_cost_vector(int col, int row,
boost::shared_array<AccumCostType> accum_vec) {
size_t start_index = get_cost_index(col, row, 0);
for (int d=0; d<m_num_disp; ++d)
accum_vec[start_index+d] += m_cost_buffer[start_index+d];
}
CostType* get_cost_vector(int col, int row) {
size_t start_index = get_cost_index(col, row);
return &(m_cost_buffer[start_index]);
};
AccumCostType* get_accum_vector(boost::shared_array<AccumCostType> accum_vec,
int col, int row) {
size_t start_index = get_cost_index(col, row);
return &(accum_vec[start_index]);
};
/// Goes across all the viterbi diagrams and extracts out the minimum vector.
DisparityImage
create_disparity_view( boost::shared_array<AccumCostType> const accumulated_costs );
/// Converts from a linear disparity index to the dx, dy values it represents.
/// - This function is too slow to use inside the inner loop!
void disp_to_xy(DisparityType disp, DisparityType &dx, DisparityType &dy) {
dy = (disp / m_num_disp_x) + m_min_disp_y; // 2D implementation
dx = (disp % m_num_disp_x) + m_min_disp_x;
}
// This function is unused to allow more optimization at the inner loop.
///// Compute the physical distance between the disparity values of two adjacent pixels.
//AccumCostType get_disparity_dist(DisparityType d1, DisparityType d2);
/// v1 += v2.
void inplace_sum_views( boost::shared_array<AccumCostType> v1,
boost::shared_array<AccumCostType> const v2) {
size_t count = m_num_output_cols * m_num_output_rows * m_num_disp;
for (size_t i=0; i<count; ++i)
v1[i] = v1[i] + v2[i];
}
/// Get the index if the smallest element in a vector
DisparityType find_min_index( const AccumCostType* const vec ) {
AccumCostType value = std::numeric_limits<AccumCostType>::max();
int min_index = 0;
for (int i=0; i<m_num_disp; ++i) {
if (vec[i] < value) {
value = vec[i];
min_index = i;
}
}
return min_index;
}
/// Get the min disparity of an AccumCost vector
AccumCostType get_accum_vector_min(const AccumCostType* const vec){
AccumCostType value = std::numeric_limits<AccumCostType>::max();
for (int i=0; i<m_num_disp; ++i) {
if (vec[i] < value)
value = vec[i];
}
return value;
}
/// Returns a cost score at a given location
CostType get_cost(ImageView<uint8> const& left_image,
ImageView<uint8> const& right_image,
int left_x, int left_y, int right_x, int right_y, bool debug);
// Print out a disparity vector
template <typename T>
void print_disparity_vector(T* const vec){
std::cout << "V: ";
for (int i=0; i<m_num_disp; ++i)
std::cout << vec[i] << " ";
std::cout << std::endl;
}
/// Get the pixel diff along a line at a specified output location.
int get_path_pixel_diff(ImageView<uint8> const& left_image,
int col, int row, int dir_x, int dir_y) const {
// Take the offset between the output location and the input pixel coordinates.
return abs(left_image(col-m_min_col, row-m_min_row) -
left_image((col-dir_x)-m_min_col, (row-dir_y)-m_min_row));
}
/// Create an updated cost accumulation vector for the next pixel along an SGM evaluation path.
/// - For each disparity in the current pixel, add that disparity's cost with the "cheapest"
/// prior pixel disparity.
void evaluate_path( int col, int row, int col_p, int row_p,
AccumCostType* const prior, // Accumulated costs leading up to this pixel
CostType * const local, // The disparity costs of the current pixel
AccumCostType* output,
int path_intensity_gradient, bool debug=false ); // This variable is the magnitude of intensity change to this pixel
/// Compute the accumulated costs in a pixel direction from the local costs at each pixel.
/// - TODO: This implementation seems inefficient!
template <int DIRX, int DIRY>
void iterate_direction( ImageView<uint8 > const& left_image,
boost::shared_array<AccumCostType> & accumulated_costs ) {
/*
// Init the output costs to the max value.
// - This means that disparity/location pairs we don't update will never be used.
size_t num_cost_elements = m_num_output_cols*m_num_output_rows*m_num_disp;
for (size_t i=0; i<num_cost_elements; ++i)
accumulated_costs[i] = 0;//std::numeric_limits<AccumCostType>::max();
*/
// Walk along the edges in a clockwise fashion
if ( DIRX > 0 ) {
// LEFT MOST EDGE
// Init the edge pixels with just the cost (no accumulation yet)
for ( int32 j = 0; j < m_num_output_rows; j++ ) {
add_cost_vector(0, j, accumulated_costs);
}
// Loop across to the opposite edge
for ( int32 i = 1; i < m_num_output_cols; i++ ) {
// Loop through the pixels in this column, limiting the range according
// to the iteration direction progress.
int32 jstart = std::max( 0, 0 + DIRY * i );
int32 jstop = std::min( m_num_output_rows, m_num_output_rows + DIRY * i );
for ( int32 j = jstart; j < jstop; j++ ) {
int pixel_diff = get_path_pixel_diff(left_image, i, j, DIRX, DIRY);
evaluate_path( i, j, i-DIRX,j-DIRY,
get_accum_vector(accumulated_costs, i-DIRX,j-DIRY), // Previous pixel
get_cost_vector(i,j),
get_accum_vector(accumulated_costs, i,j),
pixel_diff, false ); // Current pixel
}
}
}
if ( DIRY > 0 ) {
// TOP MOST EDGE
// Process every pixel along this edge only if DIRX == 0. Otherwise skip the top left most pixel
for ( int32 i = (DIRX <= 0 ? 0 : 1 ); i < m_num_output_cols; i++ ) {
add_cost_vector(i, 0, accumulated_costs);
}
for ( int32 j = 1; j < m_num_output_rows; j++ ) {
int32 istart = std::max( (DIRX <= 0 ? 0 : 1),
(DIRX <= 0 ? 0 : 1) + DIRX * j );
int32 istop = std::min( m_num_output_cols, m_num_output_cols + DIRX * j );
for ( int32 i = istart; i < istop; i++ ) {
int pixel_diff = get_path_pixel_diff(left_image, i, j, DIRX, DIRY);
evaluate_path( i, j, i-DIRX,j-DIRY,
get_accum_vector(accumulated_costs, i-DIRX,j-DIRY), // Previous pixel
get_cost_vector(i,j),
get_accum_vector(accumulated_costs, i,j),
pixel_diff ); // Current pixel
}
}
}
if ( DIRX < 0 ) {
// RIGHT MOST EDGE
// Process every pixel along this edge only if DIRY == 0. Otherwise skip the top right most pixel
for ( int32 j = (DIRY <= 0 ? 0 : 1); j < m_num_output_rows; j++ ) {
add_cost_vector(m_num_output_cols-1, j, accumulated_costs);
}
for ( int32 i = m_num_output_cols-2; i >= 0; i-- ) {
int32 jstart = std::max( (DIRY <= 0 ? 0 : 1),
(DIRY <= 0 ? 0 : 1) - DIRY * (i - m_num_output_cols + 1) );
int32 jstop = std::min( m_num_output_rows, m_num_output_rows - DIRY * (i - m_num_output_cols + 1) );
for ( int32 j = jstart; j < jstop; j++ ) {
int pixel_diff = get_path_pixel_diff(left_image, i, j, DIRX, DIRY);
evaluate_path( i, j, i-DIRX,j-DIRY,
get_accum_vector(accumulated_costs, i-DIRX,j-DIRY), // Previous pixel
get_cost_vector(i,j),
get_accum_vector(accumulated_costs, i,j),
pixel_diff ); // Current pixel
}
}
}
if ( DIRY < 0 ) {
// BOTTOM MOST EDGE
// Process every pixel along this edge only if DIRX == 0. Otherwise skip the bottom left and bottom right pixel
for ( int32 i = (DIRX <= 0 ? 0 : 1);
i < (DIRX >= 0 ? m_num_output_cols : m_num_output_cols-1); i++ ) {
add_cost_vector(i,m_num_output_rows-1, accumulated_costs);
}
for ( int32 j = m_num_output_rows-2; j >= 0; j-- ) {
int32 istart = std::max( (DIRX <= 0 ? 0 : 1),
(DIRX <= 0 ? 0 : 1) - DIRX * (j - m_num_output_rows + 1) );
int32 istop = std::min( (DIRX >= 0 ? m_num_output_cols : m_num_output_cols - 1),
(DIRX >= 0 ? m_num_output_cols : m_num_output_cols - 1) - DIRX * (j - m_num_output_rows + 1) );
for ( int32 i = istart; i < istop; i++ ) {
int pixel_diff = get_path_pixel_diff(left_image, i, j, DIRX, DIRY);
evaluate_path( i, j, i-DIRX,j-DIRY,
get_accum_vector(accumulated_costs, i-DIRX,j-DIRY), // Previous pixel
get_cost_vector(i,j),
get_accum_vector(accumulated_costs, i,j),
pixel_diff ); // Current pixel
}
}
}
} // End function iterate_direction
}; // end class SemiGlobalMatcher
/// Wrapper function for SGM that handles ROIs.
/// - Merge with the function in Correlation.h!
/// - This function only searches positive disparities. The input images need to be
/// already cropped so that this makes sense.
template <class ImageT1, class ImageT2>
ImageView<PixelMask<Vector2i> >
calc_disparity_sgm(//CostFunctionType cost_type,
ImageViewBase<ImageT1> const& left_in,
ImageViewBase<ImageT2> const& right_in,
BBox2i const& left_region, // Valid region in the left image
Vector2i const& search_volume, // Max disparity to search in right image
Vector2i const& kernel_size, // Only really takes an N by N kernel!
SemiGlobalMatcher::DisparityImage const* prev_disparity=0){
// Sanity check the input:
VW_DEBUG_ASSERT( kernel_size[0] % 2 == 1 && kernel_size[1] % 2 == 1,
ArgumentErr() << "best_of_search_convolution: Kernel input not sized with odd values." );
VW_DEBUG_ASSERT( kernel_size[0] <= left_region.width() &&
kernel_size[1] <= left_region.height(),
ArgumentErr() << "best_of_search_convolution: Kernel size too large of active region." );
VW_DEBUG_ASSERT( search_volume[0] > 0 && search_volume[1] > 0,
ArgumentErr() << "best_of_search_convolution: Search volume must be greater than 0." );
VW_DEBUG_ASSERT( left_region.min().x() >= 0 && left_region.min().y() >= 0 &&
left_region.max().x() <= left_in.impl().cols() &&
left_region.max().y() <= left_in.impl().rows(),
ArgumentErr() << "best_of_search_convolution: Region not inside left image." );
// Rasterize input so that we can do a lot of processing on it.
BBox2i right_region = left_region;
right_region.max() += search_volume - Vector2i(1,1);
ImageView<typename ImageT1::pixel_type> left_crop ( crop(left_in.impl(), left_region) );
ImageView<typename ImageT2::pixel_type> right_crop( crop(right_in.impl(), right_region) );
// TODO: Make scaling optional
// Convert the input image to uint8 with 2%-98% intensity scaling.
ImageView<PixelGray<vw::uint8> > left, right;
ip::percentile_scale_convert(left_crop, left, 0.02, 0.98);
ip::percentile_scale_convert(right_crop, right, 0.02, 0.98);
// TODO: Support different cost types?
SemiGlobalMatcher matcher;
matcher.set_parameters(0, 0, search_volume[0], search_volume[1], kernel_size[0]);
return matcher.semi_global_matching_func(left, right, prev_disparity);
} // End function calc_disparity
} // end namespace stereo
} // end namespace vw
#endif //__SEMI_GLOBAL_MATCHING__