-
Notifications
You must be signed in to change notification settings - Fork 4k
/
Copy pathfilesort_utils.cc
453 lines (396 loc) · 15.3 KB
/
filesort_utils.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
/* Copyright (c) 2010, 2025, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include "sql/filesort_utils.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include "add_with_saturate.h"
#include "my_dbug.h"
#include "my_io.h"
#include "my_pointer_arithmetic.h"
#include "sql/cmp_varlen_keys.h"
#include "sql/opt_costmodel.h"
#include "sql/sort_param.h"
#include "sql/sql_sort.h"
#include "sql/thr_malloc.h"
PSI_memory_key key_memory_Filesort_buffer_sort_keys;
using std::max;
using std::min;
using std::nth_element;
using std::sort;
using std::stable_sort;
using std::unique;
using std::vector;
namespace {
/*
An inline function which does memcmp().
This one turns out to be pretty fast on all platforms, except sparc.
See the accompanying unit tests, which measure various implementations.
*/
inline bool my_mem_compare(const uchar *s1, const uchar *s2, size_t len) {
assert(s1 != nullptr);
assert(s2 != nullptr);
for (size_t i = 0; i < len; ++i) {
if (s1[i] != s2[i]) return s1[i] < s2[i];
}
return false;
}
#define COMPARE(N) \
if (s1[N] != s2[N]) return s1[N] < s2[N]
inline bool my_mem_compare_longkey(const uchar *s1, const uchar *s2,
size_t len) {
COMPARE(0);
COMPARE(1);
COMPARE(2);
COMPARE(3);
return memcmp(s1 + 4, s2 + 4, len - 4) < 0;
}
class Mem_compare {
public:
explicit Mem_compare(size_t n) : m_size(n) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return my_mem_compare(s1, s2, m_size);
}
private:
size_t m_size;
};
class Mem_compare_longkey {
public:
explicit Mem_compare_longkey(size_t n) : m_size(n) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return my_mem_compare_longkey(s1, s2, m_size);
}
private:
size_t m_size;
};
class Mem_compare_varlen_key {
public:
Mem_compare_varlen_key(const Bounds_checked_array<st_sort_field> sfa,
bool use_hash_arg)
: sort_field_array(sfa.array(), sfa.size()), use_hash(use_hash_arg) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return cmp_varlen_keys(sort_field_array, use_hash, s1, s2);
}
private:
Bounds_checked_array<st_sort_field> sort_field_array;
bool use_hash;
};
template <class Comp>
class Equality_from_less {
public:
explicit Equality_from_less(const Comp &comp) : m_comp(comp) {}
template <class A, class B>
bool operator()(const A &a, const B &b) const {
return !(m_comp(a, b) || m_comp(b, a));
}
private:
const Comp &m_comp;
};
} // namespace
size_t Filesort_buffer::sort_buffer(Sort_param *param, size_t num_input_rows,
size_t max_output_rows) {
param->m_sort_algorithm = Sort_param::FILESORT_ALG_NONE;
if (max_output_rows == 0) return max_output_rows;
if (num_input_rows <= 1) return num_input_rows;
if (param->max_compare_length() == 0) return num_input_rows;
const auto it_begin = begin(m_record_pointers);
auto it_end = begin(m_record_pointers) + num_input_rows;
// Due to LIMIT, we don't need to sort all of the elements, so find out
// which ones that we need and sort only those. This reduces the total
// running time from O(n log n) to O(n + k log k), which is a significant
// win for small k. nth_element() isn't guaranteed to be a stable sort,
// though, so we can only use it if an unstable one is okay.
const bool prefilter_nth_element =
max_output_rows < num_input_rows / 2 && !param->m_remove_duplicates;
if (param->using_varlen_keys()) {
const Mem_compare_varlen_key comp(param->local_sortorder, param->use_hash);
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end, comp);
it_end = it_begin + max_output_rows;
}
// TODO: Make more elaborate heuristics than just always picking
// std::sort.
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
sort(it_begin, it_end, comp);
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare_varlen_key>(comp)) -
it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
// If we don't use addon fields, we'll have the record position appended to
// the end of each record. This disturbs our equality comparisons, so we'll
// have to remove it. (Removing it also makes the comparisons ever so slightly
// cheaper.)
size_t key_len = param->max_compare_length();
if (!param->using_addon_fields()) {
key_len -= param->sum_ref_length;
}
/*
std::stable_sort has some extra overhead in allocating the temp buffer,
which takes some time. The cutover point where it starts to get faster
than quicksort seems to be somewhere around 10 to 40 records.
So we're a bit conservative, and stay with quicksort up to 100 records.
*/
if (num_input_rows <= 100) {
if (key_len < 10) {
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare(key_len));
it_end = it_begin + max_output_rows;
}
sort(it_begin, it_end, Mem_compare(key_len));
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare_longkey(key_len));
it_end = it_begin + max_output_rows;
}
sort(it_begin, it_end, Mem_compare_longkey(key_len));
if (param->m_remove_duplicates) {
auto new_end = unique(it_begin, it_end,
Equality_from_less<Mem_compare_longkey>(
Mem_compare_longkey(key_len)));
num_input_rows = new_end - it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_STABLE;
// Heuristics here: avoid function overhead call for short keys.
if (key_len < 10) {
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare(key_len));
it_end = it_begin + max_output_rows;
}
stable_sort(it_begin, it_end, Mem_compare(key_len));
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
it_begin;
}
} else {
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare_longkey(key_len));
it_end = it_begin + max_output_rows;
}
stable_sort(it_begin, it_end, Mem_compare_longkey(key_len));
if (param->m_remove_duplicates) {
num_input_rows = unique(it_begin, it_end,
Equality_from_less<Mem_compare_longkey>(
Mem_compare_longkey(key_len))) -
it_begin;
}
}
return std::min(num_input_rows, max_output_rows);
}
void Filesort_buffer::reset() {
update_peak_memory_used();
m_record_pointers.clear();
if (m_blocks.size() >= 2) {
// Free every block but the last.
m_blocks.erase(m_blocks.begin(), m_blocks.end() - 1);
}
/*
m_max_record_length can have changed since last time; if the remaining
(largest) block is not large enough for a single row of the next size,
then clear out that, too.
*/
if (m_max_record_length > m_current_block_size) {
free_sort_buffer();
}
if (m_blocks.empty()) {
assert(m_next_rec_ptr == nullptr);
assert(m_current_block_end == nullptr);
assert(m_current_block_size == 0);
} else {
m_next_rec_ptr = m_blocks[0].get();
assert(m_current_block_end == m_next_rec_ptr + m_current_block_size);
}
m_space_used_other_blocks = 0;
}
bool Filesort_buffer::preallocate_records(size_t num_records) {
if (m_max_record_length == 0xFFFFFFFFU) {
// The rest of the code uses this value for “infinite” and saturates to it,
// so even if we have a large sort buffer (> 4 GB), we we can't know for
// sure there's going to be room.
return true;
}
reset();
const size_t bytes_needed = num_records * m_max_record_length;
if (bytes_needed + num_records * sizeof(m_record_pointers[0]) >
m_max_size_in_bytes) {
return true;
}
/*
If the remaining block can't hold what we need, then it's of no
use to us (it doesn't save us any allocations), so get rid of it
and allocate one that's exactly the right size.
*/
if (m_next_rec_ptr == nullptr ||
m_next_rec_ptr + bytes_needed > m_current_block_end) {
free_sort_buffer();
if (allocate_sized_block(bytes_needed)) {
return true;
}
m_record_pointers.reserve(num_records);
}
while (m_record_pointers.size() < num_records) {
Bounds_checked_array<uchar> const ptr =
get_next_record_pointer(m_max_record_length);
(void)ptr;
assert(ptr.array() != nullptr);
commit_used_memory(m_max_record_length);
}
return false;
}
bool Filesort_buffer::allocate_block(size_t num_bytes) {
DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
DBUG_SET("+d,simulate_out_of_memory"););
size_t next_block_size;
if (m_current_block_size == 0) {
// First block.
next_block_size = MIN_SORT_MEMORY;
} else {
next_block_size = m_current_block_size + m_current_block_size / 2;
}
/*
If our last block isn't used at all, we can safely free it
before we try to allocate a larger one. Note that we do this
after the calculation above, which uses m_current_block_size.
*/
if (!m_blocks.empty() && m_blocks.back().get() == m_next_rec_ptr) {
m_current_block_size = 0;
m_next_rec_ptr = nullptr;
m_current_block_end = nullptr;
m_blocks.pop_back();
}
// Figure out how much space we've used, to see how much is left (if
// anything).
size_t space_used = m_current_block_size + m_space_used_other_blocks;
space_used += m_record_pointers.capacity() * sizeof(m_record_pointers[0]);
size_t space_left;
if (space_used > m_max_size_in_bytes)
space_left = 0;
else
space_left = m_max_size_in_bytes - space_used;
/*
Adjust space_left to take into account that filling this new buffer
with records would necessarily also add pointers to m_record_pointers.
Note that we know how much space record_pointers currently is using,
but not how much it could potentially be using in the future as we add
records; we take a best-case estimate based on maximum-size records.
It's also impossible to say how capacity() will change since this
is an implementation detail, so we don't take that into account.
This means that, for smaller records, we could go above the maximum
permitted total memory usage.
*/
size_t const min_num_rows_capacity =
m_record_pointers.size() +
space_left /
AddWithSaturate(m_max_record_length, sizeof(m_record_pointers[0]));
if (min_num_rows_capacity > m_record_pointers.capacity()) {
space_left -= (min_num_rows_capacity - m_record_pointers.capacity()) *
sizeof(m_record_pointers[0]);
}
next_block_size = min(max(next_block_size, num_bytes), space_left);
if (next_block_size < num_bytes) {
/*
If we're really out of space, but have at least 32 kB unused in
m_record_pointers, try to reclaim some space and try again. This should
only be needed in some very rare cases where we first sort a lot of very
short rows (yielding a huge amount of record pointers) and then need to
sort huge rows that wouldn't fit in the buffer otherwise -- in other
words, nearly never.
*/
size_t const excess_bytes =
(m_record_pointers.capacity() - m_record_pointers.size()) *
sizeof(m_record_pointers[0]);
if (excess_bytes >= 32768) {
size_t const old_capacity = m_record_pointers.capacity();
m_record_pointers.shrink_to_fit();
if (m_record_pointers.capacity() < old_capacity) {
return allocate_block(num_bytes);
}
}
// We're full.
return true;
}
return allocate_sized_block(next_block_size);
}
bool Filesort_buffer::allocate_sized_block(size_t block_size) {
unique_ptr_my_free<uchar[]> new_block((uchar *)my_malloc(
key_memory_Filesort_buffer_sort_keys, block_size, MYF(0)));
if (new_block == nullptr) {
return true;
}
m_space_used_other_blocks += m_current_block_size;
m_current_block_size = block_size;
m_next_rec_ptr = new_block.get();
m_current_block_end = new_block.get() + m_current_block_size;
m_blocks.push_back(std::move(new_block));
return false;
}
void Filesort_buffer::free_sort_buffer() {
update_peak_memory_used();
// std::vector::clear() does not necessarily free all the memory,
// but moving or swapping in an empty vector typically does (and we
// rely on this, even though we really shouldn't). This shouldn't have
// been a problem since they will be cleared in the destructor, but
// there are _many_ places scattered around the code that construct TABLE
// objects (which indirectly contain Filesort_buffer objects) and never
// destroy them properly. (You can find lots of them easily by adding an
// std::unique_ptr<int> to Filesort_buffer and giving it a value in the
// constructor; it will leak all over the place.) We should fix that,
// but for the time being, we have this workaround instead.
m_record_pointers = vector<uchar *>();
m_blocks = vector<unique_ptr_my_free<uchar[]>>();
m_space_used_other_blocks = 0;
m_next_rec_ptr = nullptr;
m_current_block_end = nullptr;
m_current_block_size = 0;
}
Bounds_checked_array<uchar> Filesort_buffer::get_contiguous_buffer() {
if (m_current_block_size != m_max_size_in_bytes) {
free_sort_buffer();
if (allocate_sized_block(m_max_size_in_bytes)) {
return {nullptr, 0};
}
}
return {m_blocks.back().get(), m_max_size_in_bytes};
}
void Filesort_buffer::update_peak_memory_used() const {
m_peak_memory_used =
max(m_peak_memory_used,
m_record_pointers.capacity() * sizeof(m_record_pointers[0]) +
m_current_block_size + m_space_used_other_blocks);
}