-
Notifications
You must be signed in to change notification settings - Fork 4k
/
Copy pathsql_sort.h
300 lines (234 loc) · 9.66 KB
/
sql_sort.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#ifndef SQL_SORT_INCLUDED
#define SQL_SORT_INCLUDED
/* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include <assert.h>
#include "map_helpers.h"
#include "my_base.h" // ha_rows
#include "my_sys.h"
#include "sql/filesort_utils.h" // Filesort_buffer
#include "sql/mem_root_array.h"
class Addon_fields;
struct TABLE;
/* Defines used by filesort and uniques */
constexpr size_t MERGEBUFF = 7;
constexpr size_t MERGEBUFF2 = 15;
// Number of bytes used to store varlen key's length
constexpr size_t VARLEN_PREFIX = 4;
/**
Descriptor for a merge chunk to be sort-merged.
A merge chunk is a sequence of pre-sorted records, written to a
temporary file. A Merge_chunk instance describes where this chunk is stored
in the file, and where it is located when it is in memory.
It is a POD because we read/write them from/to files (but note,
only m_file_position and m_rowcount are actually used in that
situation).
We have accessors (getters/setters) for all struct members.
*/
struct Merge_chunk {
public:
my_off_t file_position() const { return m_file_position; }
void set_file_position(my_off_t val) { m_file_position = val; }
void advance_file_position(my_off_t val) { m_file_position += val; }
uchar *buffer_start() { return m_buffer_start; }
const uchar *buffer_end() const { return m_buffer_end; }
const uchar *valid_buffer_end() const { return m_valid_buffer_end; }
void set_buffer(uchar *start, uchar *end) {
m_buffer_start = start;
m_buffer_end = end;
}
void set_buffer_start(uchar *start) { m_buffer_start = start; }
void set_buffer_end(uchar *end) {
assert(m_buffer_end == nullptr || end <= m_buffer_end);
m_buffer_end = end;
}
void set_valid_buffer_end(uchar *end) {
assert(end <= m_buffer_end);
m_valid_buffer_end = end;
}
void init_current_key() { m_current_key = m_buffer_start; }
uchar *current_key() const { return m_current_key; }
void advance_current_key(uint val) { m_current_key += val; }
void decrement_rowcount(ha_rows val) { m_rowcount -= val; }
void set_rowcount(ha_rows val) { m_rowcount = val; }
ha_rows rowcount() const { return m_rowcount; }
ha_rows mem_count() const { return m_mem_count; }
void set_mem_count(ha_rows val) { m_mem_count = val; }
ha_rows decrement_mem_count() { return --m_mem_count; }
ha_rows max_keys() const { return m_max_keys; }
void set_max_keys(ha_rows val) { m_max_keys = val; }
size_t buffer_size() const { return m_buffer_end - m_buffer_start; }
/**
Tries to merge *this with *mc, returns true if successful.
The assumption is that *this is no longer in use,
and the space it has been allocated can be handed over to a
buffer which is adjacent to it.
*/
bool merge_freed_buff(Merge_chunk *mc) const {
if (mc->m_buffer_end == m_buffer_start) {
mc->m_buffer_end = m_buffer_end;
mc->m_max_keys += m_max_keys;
return true;
} else if (mc->m_buffer_start == m_buffer_end) {
mc->m_buffer_start = m_buffer_start;
mc->m_max_keys += m_max_keys;
return true;
}
return false;
}
private:
/// The current key for this chunk.
uchar *m_current_key = nullptr;
/// Current position in the file to be sorted.
my_off_t m_file_position = 0;
/// Start of main-memory buffer for this chunk.
uchar *m_buffer_start = nullptr;
/// End of main-memory buffer for this chunk.
uchar *m_buffer_end = nullptr;
/// End of actual, valid data for this chunk.
uchar *m_valid_buffer_end;
/// Number of unread rows in this chunk.
ha_rows m_rowcount = 0;
/// Number of rows in the main-memory buffer.
ha_rows m_mem_count = 0;
/// If we have fixed-size rows: max number of rows in buffer.
ha_rows m_max_keys = 0;
};
typedef Bounds_checked_array<Merge_chunk> Merge_chunk_array;
/*
The result of Unique or filesort; can either be stored on disk
(in which case io_cache points to the file) or in memory in one
of two ways. See sorted_result_in_fsbuf.
Note if sort_result points into memory, it does _not_ own the sort buffer;
Filesort_info does.
TODO: Clean up so that Filesort / Filesort_info / Filesort_buffer /
Sort_result have less confusing overlap.
*/
class Sort_result {
public:
Sort_result() : sorted_result_in_fsbuf(false), sorted_result_end(nullptr) {}
bool has_result_in_memory() const {
return sorted_result || sorted_result_in_fsbuf;
}
bool has_result() const {
return has_result_in_memory() || (io_cache && my_b_inited(io_cache));
}
IO_CACHE *io_cache{nullptr};
/**
If the entire result fits in memory, we skip the merge phase.
We may leave the result in the parent Filesort_info's filesort_buffer
(indicated by sorted_result_in_fsbuf), or we may strip away
the sort keys, and copy the sorted result into a new buffer.
Unique always uses the latter.
This new buffer is [sorted_result ... sorted_result_end]
@see save_index()
*/
bool sorted_result_in_fsbuf{false};
unique_ptr_my_free<uchar> sorted_result{nullptr};
uchar *sorted_result_end{nullptr};
ha_rows found_records{0}; ///< How many records in sort.
};
/**
A class wrapping misc buffers used for sorting.
*/
class Filesort_info {
/// Buffer for sorting keys.
Filesort_buffer filesort_buffer;
public:
Merge_chunk_array merge_chunks; ///< Array of chunk descriptors
Addon_fields *addon_fields{nullptr}; ///< Addon field descriptors.
bool m_using_varlen_keys{false};
uint m_sort_length{0};
Filesort_info(const Filesort_info &) = delete;
Filesort_info &operator=(const Filesort_info &) = delete;
Filesort_info() : m_using_varlen_keys(false), m_sort_length(0) {}
/** Sort filesort_buffer
@return Number of records, after any deduplication
*/
size_t sort_buffer(Sort_param *param, size_t num_input_rows,
size_t max_output_rows) {
return filesort_buffer.sort_buffer(param, num_input_rows, max_output_rows);
}
/**
Copies (unpacks) values appended to sorted fields from a buffer back to
their regular positions specified by the Field::ptr pointers.
@param tables Tables in the join; for NULL row flags.
@param buff Buffer which to unpack the value from.
*/
template <bool Packed_addon_fields>
inline void unpack_addon_fields(const Mem_root_array<TABLE *> &tables,
uchar *buff);
/**
Reads 'count' number of chunk descriptors into the merge_chunks array.
In case of error, the merge_chunks array will be empty.
@param chunk_file File containing the descriptors.
@param count Number of chunks to read.
*/
void read_chunk_descriptors(IO_CACHE *chunk_file, uint count);
/// Are we using "addon fields"?
bool using_addon_fields() const { return addon_fields != nullptr; }
void reset() { filesort_buffer.reset(); }
void clear_peak_memory_used() { filesort_buffer.clear_peak_memory_used(); }
Bounds_checked_array<uchar> get_next_record_pointer(size_t min_size) {
return filesort_buffer.get_next_record_pointer(min_size);
}
void commit_used_memory(size_t num_bytes) {
filesort_buffer.commit_used_memory(num_bytes);
}
uchar *get_sorted_record(uint idx) {
return filesort_buffer.get_sorted_record(idx);
}
uchar **get_sort_keys() { return filesort_buffer.get_sort_keys(); }
Bounds_checked_array<uchar> get_contiguous_buffer() {
return filesort_buffer.get_contiguous_buffer();
}
void set_max_size(size_t max_size, size_t record_length) {
filesort_buffer.set_max_size(max_size, record_length);
}
void free_sort_buffer() { filesort_buffer.free_sort_buffer(); }
bool preallocate_records(size_t num_records) {
return filesort_buffer.preallocate_records(num_records);
}
size_t peak_memory_used() const { return filesort_buffer.peak_memory_used(); }
size_t max_size_in_bytes() const {
return filesort_buffer.max_size_in_bytes();
}
uint sort_length() const { return m_sort_length; }
bool using_varlen_keys() const { return m_using_varlen_keys; }
void set_sort_length(uint val, bool is_varlen) {
m_sort_length = val;
m_using_varlen_keys = is_varlen;
}
};
typedef Bounds_checked_array<uchar> Sort_buffer;
/**
Put all room used by freed buffer to use in adjacent buffer.
Note, that we can't simply distribute memory evenly between all buffers,
because new areas must not overlap with old ones.
*/
template <typename Heap_type>
void reuse_freed_buff(Merge_chunk *old_top, Heap_type *heap) {
typename Heap_type::iterator it = heap->begin();
typename Heap_type::iterator end = heap->end();
for (; it != end; ++it) {
if (old_top->merge_freed_buff(*it)) return;
}
assert(0);
}
#endif /* SQL_SORT_INCLUDED */