@@ -32,6 +32,81 @@ namespace vapid {
32
32
TupleDumper<std::integral_constant<uint16_t , 0 >, T>::dump (ss, t);
33
33
}
34
34
35
+ struct PermutationAnalysis {
36
+ PermutationAnalysis () {}
37
+ PermutationAnalysis (const std::vector<size_t >& permutation) {
38
+ store_analysis (permutation);
39
+ }
40
+
41
+ void reset (size_t perm_size) {
42
+ element_visited.resize (perm_size);
43
+ for (auto & b : element_visited) {
44
+ b = false ;
45
+ }
46
+
47
+ cycle_sizes.resize (perm_size);
48
+ for (auto & s : cycle_sizes) {
49
+ s = 0 ;
50
+ }
51
+
52
+ cycle_mins.resize (perm_size);
53
+ for (auto & s: cycle_mins) {
54
+ s = 0 ;
55
+ }
56
+ }
57
+
58
+ void store_analysis (const std::vector<size_t >& permutation) {
59
+ reset (permutation.size ());
60
+
61
+ size_t curr = 0 ;
62
+ size_t cycle_min = 0 ;
63
+ size_t cycle_len = 0 ;
64
+ size_t num_visited = 0 ;
65
+ size_t cycle_idx = 0 ;
66
+
67
+ while (num_visited < permutation.size ()) {
68
+ size_t next = permutation[curr];
69
+ cycle_min = std::min (cycle_min, next);
70
+ element_visited[curr] = 1 ;
71
+ ++num_visited;
72
+ ++cycle_len;
73
+
74
+ if (cycle_min == next) {
75
+ cycle_mins[cycle_idx] = cycle_min;
76
+ cycle_sizes[cycle_idx] = cycle_len;
77
+ // we have completely explored this cycle
78
+
79
+ if (num_visited == permutation.size ()) {
80
+ // we have explored every cycle
81
+ break ;
82
+ }
83
+
84
+ // walk up to the next cycle
85
+ curr = cycle_min;
86
+ while (element_visited[curr]) {
87
+ ++curr;
88
+ }
89
+ ++cycle_idx;
90
+ cycle_min = curr;
91
+ cycle_len = 0 ;
92
+ } else {
93
+ curr = next;
94
+ }
95
+ }
96
+ }
97
+
98
+ // work buffer to store which elements
99
+ // of the permutation have been touched
100
+ // only used when storing analysis
101
+ std::vector<bool > element_visited;
102
+
103
+ // each permutation can be decomposed into cycles
104
+ // these arrays store the size of each cycle
105
+ // and also the minimum element of each cycle
106
+ std::vector<size_t > cycle_sizes;
107
+ std::vector<size_t > cycle_mins;
108
+ };
109
+
35
110
template <typename ... Ts>
36
111
class soa {
37
112
public:
@@ -43,6 +118,20 @@ namespace vapid {
43
118
template <size_t col_idx>
44
119
using col_type = typename nth_col_type<col_idx>::value_type;
45
120
121
+
122
+ soa (bool no_double_buffering=false ) : no_double_buffering_(no_double_buffering) {
123
+ /* By default, each column is double-buffered by two std::vectors.
124
+ * When a sort order is determined, the data from the front (unsorted)
125
+ * column is moved, in order, into the back column (which is now sorted),
126
+ * and the two columns swapped.
127
+ *
128
+ * If memory usage is a concern, no_double_buffering=true will change
129
+ * this behavior. When a sort order is determined in this mode,
130
+ * elements are swapped within the single buffer. This is slower
131
+ * than the double-buffered approach, but uses half the memory.
132
+ */
133
+ }
134
+
46
135
template <size_t col_idx>
47
136
const nth_col_type<col_idx>& get_column () const {
48
137
return std::get<col_idx>(data_);
@@ -112,6 +201,10 @@ namespace vapid {
112
201
sort_order_reference_.end (),
113
202
comparator_wrapper);
114
203
204
+ if (no_double_buffering_) {
205
+ sort_order_analysis_.store_analysis (sort_order_reference_);
206
+ }
207
+
115
208
sort_by_reference_impl (std::index_sequence_for<Ts...>{});
116
209
}
117
210
@@ -146,6 +239,10 @@ namespace vapid {
146
239
return resize_impl (std::index_sequence_for<Ts...>{}, data_tmp_, size ());
147
240
}
148
241
242
+ void set_no_double_buffering (bool ndb = true ) {
243
+ no_double_buffering_ = ndb;
244
+ }
245
+
149
246
private:
150
247
template <typename T, size_t ... I>
151
248
void insert_impl (std::integer_sequence<size_t , I...>, T t) {
@@ -190,22 +287,49 @@ namespace vapid {
190
287
191
288
template <size_t col_idx>
192
289
void sort_col_by_reference (std::integral_constant<size_t , col_idx>) {
193
- auto & src = std::get<col_idx>(data_);
194
- auto & dst = std::get<col_idx>(data_tmp_);
195
- dst.resize (src.size ());
196
- for (size_t idx = 0 ; idx < src.size (); ++idx) {
197
- dst[idx] = std::move (src[sort_order_reference_[idx]]);
290
+ if (no_double_buffering_) {
291
+ auto & src = std::get<col_idx>(data_);
292
+ size_t curr = 0 ;
293
+ for (size_t cycle_idx = 0 ; cycle_idx <= sort_order_analysis_.cycle_sizes .size (); ++cycle_idx) {
294
+ curr = sort_order_analysis_.cycle_mins [cycle_idx];
295
+
296
+ size_t cycle_size = sort_order_analysis_.cycle_sizes [cycle_idx];
297
+ if (cycle_size == 0 ) {
298
+ break ;
299
+ }
300
+ size_t cycle_min = sort_order_analysis_.cycle_mins [cycle_idx];
301
+ for (size_t i = 0 ; i+1 < cycle_size; ++i) {
302
+ size_t next = sort_order_reference_[curr];
303
+ std::swap (src[curr], src[next]);
304
+ curr = next;
305
+ }
306
+ }
307
+ } else {
308
+ auto & src = std::get<col_idx>(data_);
309
+ auto & dst = std::get<col_idx>(data_tmp_);
310
+
311
+ dst.resize (src.size ());
312
+ for (size_t idx = 0 ; idx < src.size (); ++idx) {
313
+ dst[idx] = std::move (src[sort_order_reference_[idx]]);
314
+ }
315
+ std::swap (src, dst);
198
316
}
199
- std::swap (src, dst);
200
317
}
201
318
319
+ bool no_double_buffering_ = false ;
320
+
202
321
std::tuple<std::vector<Ts>...> data_;
203
322
204
323
// tmp buffers for reordering when sorting
324
+ // disable this by setting no_double_buffering=true
205
325
std::tuple<std::vector<Ts>...> data_tmp_;
206
326
207
327
// the reference permutation describing sorted order
208
328
std::vector<size_t > sort_order_reference_;
329
+
330
+ // permutation analysis used in single buffering mode
331
+ PermutationAnalysis sort_order_analysis_;
332
+
209
333
};
210
334
211
335
template <typename ... Ts>
0 commit comments