forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 0
/
key_encoding.go
527 lines (494 loc) · 15.7 KB
/
key_encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
// Copyright 2014 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License. See the AUTHORS file
// for names of contributors.
//
// An ordered key encoding scheme based on sqlite4's key encoding:
// http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
//
// Author: Andrew Bonventre (andybons@gmail.com)
// TODO(andybons): Add get* functions for decoding.
package encoding
import (
"bytes"
"fmt"
"math"
"unicode/utf8"
)
// Maximum sizes for allocating buffers to hold encoded values.
const (
// MaxIntSize is the maximum encoded integer size in bytes.
MaxIntSize = 12
)
// Direct mappings or prefixes of encoded data dependent on the type.
const (
orderedEncodingNil = 0x05
orderedEncodingNaN = 0x06
orderedEncodingNegativeInfinity = 0x07
orderedEncodingZero = 0x15
orderedEncodingInfinity = 0x23
orderedEncodingText = 0x24
orderedEncodingBinary = 0x25
orderedEncodingBinaryNoTermination = 0x26
orderedEncodingTerminator = 0x00
)
// EncodeNil returns a byte slice containing a nil-encoded value.
func EncodeNil() []byte {
return []byte{orderedEncodingNil}
}
// EncodeString returns the resulting byte slice with s encoded
// and appended to b. If b is nil, it is treated as an empty
// byte slice. If s is not a valid utf8-encoded string or
// contains an intervening 0x00 byte, EncodeString will panic.
//
// Each value that is TEXT begins with a single byte of 0x24
// and ends with a single byte of 0x00. There are zero or more
// intervening bytes that encode the text value. The intervening
// bytes are chosen so that the encoding will sort in the desired
// collating order. The default sequence of bytes is simply UTF8.
// The intervening bytes may not contain a 0x00 character; the only
// 0x00 byte allowed in a text encoding is the final byte.
//
// Note that all key-encoded text with the BINARY collating sequence
// is simply UTF8 text. UTF8 not UTF16. Strings must be converted to
// UTF8 so that equivalent strings in different encodings compare the
// same and so that the strings contain no embedded 0x00 bytes. In other
// words, strcmp() should be sufficient for comparing two text keys.
//
// The text encoding ends in 0x00 in order to ensure that when there
// are two strings where one is a prefix of the other that the shorter
// string will sort first.
func EncodeString(b []byte, s string) []byte {
if !utf8.ValidString(s) {
panic("invalid utf8 string passed")
}
b = append(b, orderedEncodingText)
for _, v := range []byte(s) {
if v == 0x00 {
panic("string contains intervening 0x00 byte")
}
b = append(b, v)
}
return append(b, orderedEncodingTerminator)
}
// DecodeString returns the remaining byte slice after decoding and the
// decoded string from b.
func DecodeString(b []byte) ([]byte, string) {
if b[0] != orderedEncodingText {
panic("first byte of encoded string must be 0x24")
}
for i, v := range b[1:] {
if v == orderedEncodingTerminator {
return b[1+i:], string(b[1 : 1+i])
}
}
panic("encoded string must have terminator byte")
}
// EncodeBinary returns the resulting byte slice with i encoded
// and appended to b.
//
// The encoding of binaries fields is different depending
// on whether or not the value to be encoded is the last value
// (the right-most value) in the key.
//
// Each value that is BINARY that is not the last value of the key
// begins with a single byte of 0x25 and ends with a single terminator
// byte of 0x00. There are zero or more intervening bytes that encode
// the binary value. None of the intervening bytes may be zero. Each
// of the intervening bytes contains 7 bits of blob content with a 1
// in the high-order bit (the 0x80 bit). Each encoded byte thereafter
// consists of a high-order bit followed by 7 bits of payload. An
// empty input value is encoded as the header byte immediately
// followed by a termination byte 0x00.
//
// When the very last value of a key is BINARY, then it is encoded
// as a single byte of 0x26 and is followed by a byte-for-byte
// copy of the BINARY value. This alternative encoding is more efficient,
// but it only works if there are no subsequent values in the key,
// since there is no termination mark on the BLOB being encoded.
//
// The initial byte of a binary value, 0x25 or 0x26, is larger than
// the initial byte of a text value, 0x24, ensuring that every binary
// value will sort after every text value.
func EncodeBinary(b []byte, i []byte) []byte {
b = append(b, orderedEncodingBinary)
s := uint(1)
t := byte(0)
for _, v := range i {
b = append(b, byte(0x80|t|((v&0xff)>>s)))
if s < 7 {
t = v << (7 - s)
s++
} else {
b = append(b, byte(0x80|v))
s = 1
t = 0
}
}
if s > 1 {
b = append(b, byte(0x80|t))
}
return append(b, orderedEncodingTerminator)
}
// DecodeBinary decodes the given key-encoded byte slice, returning
// the unencoded byte slice value. (see documentation for EncodeBinary
// for more details). The first return argument is the remainder of
// the input buffer, after decoding the binary value.
func DecodeBinary(buf []byte) ([]byte, []byte) {
if buf[0] != orderedEncodingBinary {
panic(fmt.Sprintf("%q doesn't begin with binary encoding byte", buf))
}
out := new(bytes.Buffer)
s := uint(6)
i := int(1)
if buf[i] == orderedEncodingTerminator {
return buf[2:], out.Bytes()
}
t := (buf[i] << 1) & 0xff
for i = 2; buf[i] != orderedEncodingTerminator; i++ {
if s == 7 {
out.WriteByte(t | (buf[i] & 0x7f))
i++
} else {
out.WriteByte(t | ((buf[i] & 0x7f) >> s))
}
t = (buf[i] << (8 - s)) & 0xff
if buf[i] == orderedEncodingTerminator {
break
}
if s == 1 {
s = 7
} else {
s--
}
}
if t != 0 {
panic("unexpected bits remaining after decoding blob")
}
return buf[i+1:], out.Bytes()
}
// DecodeBinaryFinal decodes a byte slice and returns the
// result presuming that it is the last part of the buffer
// (see documentation for EncodeBinary for more details).
func DecodeBinaryFinal(buf []byte) []byte {
if buf[0] != orderedEncodingBinaryNoTermination {
panic(fmt.Sprintf("%q doesn't begin with binary-no-termination encoding byte", buf))
}
out := make([]byte, len(buf)-1)
copy(buf[1:], out)
return out
}
// EncodeBinaryFinal encodes a byte slice and returns
// the result presuming that the byte slice is the last
// portion of the buffer (see the comment for EncodeBinary).
func EncodeBinaryFinal(b []byte) []byte {
buf := make([]byte, 1+len(b))
buf[0] = orderedEncodingBinaryNoTermination
for i, v := range b {
buf[i+1] = v
}
return buf
}
// EncodeInt returns the resulting byte slice with the encoded int64 and
// appended to b. See the notes for EncodeFloat for a complete description.
func EncodeInt(b []byte, i int64) []byte {
if i == 0 {
return append(b, orderedEncodingZero)
}
e, m := intMandE(i)
if e <= 0 {
panic("integer values should not have negative exponents")
}
buf := make([]byte, len(m)+maxVarintSize+2)
switch {
case e > 0 && e <= 10:
return append(b, encodeMediumNumber(i < 0, e, m, buf)...)
case e >= 11:
return append(b, encodeLargeNumber(i < 0, e, m, buf)...)
}
panic(fmt.Sprintf("unexpected value e: %d", e))
}
// EncodeIntDecreasing returns the resulting byte slice with the
// encoded int64 values in decreasing order and appended to b.
func EncodeIntDecreasing(b []byte, i int64) []byte {
return EncodeInt(b, ^i)
}
// DecodeInt returns the remaining byte slice after decoding and the decoded
// int64 from buf.
func DecodeInt(buf []byte) ([]byte, int64) {
if buf[0] == 0x15 {
return buf[1:], 0
}
if buf[0] == 0x14 || buf[0] == 0x16 {
// Negative small or positive small.
panic("integer values should not have negative exponents")
}
idx := bytes.Index(buf, []byte{orderedEncodingTerminator})
switch {
case buf[0] == 0x08:
// Negative large.
e, m := decodeLargeNumber(true, buf[:idx+1])
return buf[idx+1:], makeIntFromMandE(true, e, m)
case buf[0] > 0x08 && buf[0] <= 0x13:
// Negative medium.
e, m := decodeMediumNumber(true, buf[:idx+1])
return buf[idx+1:], makeIntFromMandE(true, e, m)
case buf[0] == 0x22:
// Positive large.
e, m := decodeLargeNumber(false, buf[:idx+1])
return buf[idx+1:], makeIntFromMandE(false, e, m)
case buf[0] >= 0x17 && buf[0] < 0x22:
// Positive large.
e, m := decodeMediumNumber(false, buf[:idx+1])
return buf[idx+1:], makeIntFromMandE(false, e, m)
default:
panic(fmt.Sprintf("unknown prefix of the encoded byte slice: %q", buf))
}
}
// DecodeIntDecreasing returns the remaining byte slice after decoding and
// the decoded int64 in decreasing order from buf.
func DecodeIntDecreasing(buf []byte) ([]byte, int64) {
b, v := DecodeInt(buf)
return b, ^v
}
// intMandE computes and returns the mantissa M and exponent E for i.
//
// See the comments in floatMandE for more details of the representation
// of M. This method computes E and M in a simpler loop.
func intMandE(i int64) (int, []byte) {
var e int
var buf bytes.Buffer
for v := i; v != 0; v /= 100 {
mod := v % 100
if mod == 0 && buf.Len() == 0 {
// Trailing X==0 digits are omitted.
} else if mod < 0 {
buf.WriteByte(byte(2*-mod + 1))
} else {
buf.WriteByte(byte(2*mod + 1))
}
e++
}
m := buf.Bytes()
// Reverse the mantissa so highest byte sorts first.
for i := range m {
j := len(m) - i - 1
if i >= j {
break
}
m[i], m[j] = m[j], m[i]
}
// The last byte is encoded as 2n+0.
m[len(m)-1]--
return e, m
}
// makeIntFromMandE reconstructs the integer from the mantissa M
// and exponent E.
func makeIntFromMandE(negative bool, e int, m []byte) int64 {
var i int64
for v := 0; v < e; v++ {
var t int64
if v < len(m) {
t = int64(m[v])
} else {
// Trailing X==0 digits were omitted.
t = 0
}
// The last byte was encoded as 2n+0.
if v != len(m)-1 {
t--
}
if negative {
t = -t
}
i = i*100 + t/2
}
return i
}
func removeTrailingZeros(m []byte) []byte {
for i := len(m); i > 0; i-- {
if m[i-1] != 0 {
return m[:i]
}
}
return []byte{}
}
// EncodeFloat returns the resulting byte slice with the encoded
// float64 and appended to b.
//
// Values are classified as large, medium, or small
// according to the value of E. If E is 11 or more,
// the value is large. For E between 0 and 10, the
// value is medium. For E less than zero, the value is small.
//
// Large positive values are encoded as a single byte 0x22
// followed by E as a varint and then M. Medium positive
// values are a single byte of 0x17+E followed by M. Small
// positive values are encoded as a single byte 0x16 followed
// by the ones-complement of the varint for -E followed by M.
//
// Small negative values are encoded as a single byte 0x14
// followed by -E as a varint and then the ones-complement
// of M. Medium negative values are encoded as a byte 0x13-E
// followed by the ones-complement of M. Large negative values
// consist of the single byte 0x08 followed by the ones-complement
// of the varint encoding of E followed by the ones-complement of M.
func EncodeFloat(b []byte, f float64) []byte {
// Handle the simplistic cases first.
switch {
case math.IsNaN(f):
return append(b, orderedEncodingNaN)
case math.IsInf(f, 1):
return append(b, orderedEncodingInfinity)
case math.IsInf(f, -1):
return append(b, orderedEncodingNegativeInfinity)
case f == 0:
return append(b, orderedEncodingZero)
}
e, m := floatMandE(f)
buf := make([]byte, len(m)+maxVarintSize+2)
switch {
case e < 0:
return append(b, encodeSmallNumber(f < 0, e, m, buf)...)
case e >= 0 && e <= 10:
return append(b, encodeMediumNumber(f < 0, e, m, buf)...)
case e >= 11:
return append(b, encodeLargeNumber(f < 0, e, m, buf)...)
}
return nil
}
// floatMandE computes and returns the mantissa M and exponent E for f.
//
// The mantissa is a base-100 representation of the value. The exponent
// E determines where to put the decimal point.
//
// Each centimal digit of the mantissa is stored in a byte. If the value
// of the centimal digit is X (hence X>=0 and X<=99) then the byte value
// will be 2*X+1 for every byte of the mantissa, except for the last byte
// which will be 2*X+0. The mantissa must be the minimum number of bytes
// necessary to represent the value; trailing X==0 digits are omitted.
// This means that the mantissa will never contain a byte with the value 0x00.
//
// If we assume all digits of the mantissa occur to the right of the decimal
// point, then the exponent E is the power of one hundred by which one must
// multiply the mantissa to recover the original value.
func floatMandE(f float64) (int, []byte) {
if f < 0 {
f = -f
}
i := 0
if f >= 1 {
for t := f; t >= 1; t /= 100 {
i++
}
} else {
for t := f; t < 0.01; t *= 100 {
i--
}
}
// Iterate through the centimal digits of the
// mantissa and add them to the buffer.
// For a number like 9999.00001, start with
// 99.9900001, write 99 to the buffer, then
// multiply by 100 until there is no fractional
// portion left.
d := f * math.Pow(100, float64(-i+1))
var buf bytes.Buffer
n, frac := math.Modf(d)
buf.WriteByte(byte(2*n + 1))
for frac != 0 {
// Remove the integral portion and shift to the left.
// So given the above example:
// 99.9900001 -> [-99] -> 00.9900001 -> [*100] -> 99.00001
// 99.00001 -> [-99] -> 00.00001 -> [*100] -> 00.001
// 00.001 -> [-00] -> 00.001 -> [*100] -> 00.1
// 00.1 -> [-00] -> 00.1 -> [*100] -> 10
// Done as frac == 0
d = (d - n) * 100
n, frac = math.Modf(d)
buf.WriteByte(byte(2*n + 1)) // Write the integral to the buf.
}
b := buf.Bytes()
// The last byte is encoded as 2n+0.
b[len(b)-1]--
// Trailing X==0 digits are omitted.
return i, removeTrailingZeros(b)
}
// onesComplement inverts each byte in buf from index start to end.
func onesComplement(buf []byte, start, end int) {
for i := start; i < end; i++ {
buf[i] = ^buf[i]
}
}
func encodeSmallNumber(negative bool, e int, m []byte, buf []byte) []byte {
n := PutUvarint(buf[1:], uint64(-e))
copy(buf[n+1:], m)
l := 1 + n + len(m)
if negative {
buf[0] = 0x14
onesComplement(buf, n, l) // ones complement of mantissa
} else {
buf[0] = 0x16
onesComplement(buf, 1, n) // ones complement of exponent
}
buf[l] = orderedEncodingTerminator
return buf[:l+1]
}
func encodeMediumNumber(negative bool, e int, m []byte, buf []byte) []byte {
copy(buf[1:], m)
l := 1 + len(m)
if negative {
buf[0] = 0x13 - byte(e)
onesComplement(buf, 1, l)
} else {
buf[0] = 0x17 + byte(e)
}
buf[l] = orderedEncodingTerminator
return buf[:l+1]
}
func encodeLargeNumber(negative bool, e int, m []byte, buf []byte) []byte {
n := PutUvarint(buf[1:], uint64(e))
copy(buf[n+1:], m)
l := 1 + n + len(m)
if negative {
buf[0] = 0x08
onesComplement(buf, 1, l)
} else {
buf[0] = 0x22
}
buf[l] = orderedEncodingTerminator
return buf[:l+1]
}
func decodeMediumNumber(negative bool, buf []byte) (int, []byte) {
// We don't need the prefix and last terminator.
m := make([]byte, len(buf)-2)
copy(m, buf[1:len(buf)-1])
var e int
if negative {
e = 0x13 - int(buf[0])
onesComplement(m, 0, len(m))
} else {
e = int(buf[0]) - 0x17
}
return e, m
}
func decodeLargeNumber(negative bool, buf []byte) (int, []byte) {
m := make([]byte, len(buf))
copy(m, buf)
if negative {
onesComplement(m, 1, len(m))
}
e, l := GetUVarint(m[1:])
// We don't need the prefix and last terminator.
return int(e), m[l+1 : len(m)-1]
}