forked from influxdata/influxdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tombstone.go
329 lines (277 loc) · 7.21 KB
/
tombstone.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
package tsm1
import (
"bufio"
"encoding/binary"
"io/ioutil"
"math"
"os"
"path/filepath"
"strings"
"sync"
)
const (
v2header = 0x1502
v2headerSize = 4
)
// Tombstoner records tombstones when entries are deleted.
type Tombstoner struct {
mu sync.RWMutex
// Path is the location of the file to record tombstone. This should be the
// full path to a TSM file.
Path string
// cache of the stats for this tombstone
fileStats []FileStat
// indicates that the stats may be out of sync with what is on disk and they
// should be refreshed.
statsLoaded bool
}
// Tombstone represents an individual deletion.
type Tombstone struct {
// Key is the tombstoned series key.
Key string
// Min and Max are the min and max unix nanosecond time ranges of Key that are deleted. If
// the full range is deleted, both values are -1.
Min, Max int64
}
// Add adds the all keys, across all timestamps, to the tombstone.
func (t *Tombstoner) Add(keys []string) error {
return t.AddRange(keys, math.MinInt64, math.MaxInt64)
}
// AddRange adds all keys to the tombstone specifying only the data between min and max to be removed.
func (t *Tombstoner) AddRange(keys []string, min, max int64) error {
if len(keys) == 0 {
return nil
}
t.mu.Lock()
defer t.mu.Unlock()
// If this TSMFile has not been written (mainly in tests), don't write a
// tombstone because the keys will not be written when it's actually saved.
if t.Path == "" {
return nil
}
t.statsLoaded = false
tombstones, err := t.readTombstone()
if err != nil {
return nil
}
for _, k := range keys {
tombstones = append(tombstones, Tombstone{
Key: k,
Min: min,
Max: max,
})
}
return t.writeTombstone(tombstones)
}
// ReadAll returns all the tombstones in the Tombstoner's directory.
func (t *Tombstoner) ReadAll() ([]Tombstone, error) {
return t.readTombstone()
}
// Delete removes all the tombstone files from disk.
func (t *Tombstoner) Delete() error {
t.mu.Lock()
defer t.mu.Unlock()
if err := os.RemoveAll(t.tombstonePath()); err != nil {
return err
}
t.statsLoaded = false
return nil
}
// HasTombstones return true if there are any tombstone entries recorded.
func (t *Tombstoner) HasTombstones() bool {
files := t.TombstoneFiles()
return len(files) > 0 && files[0].Size > 0
}
// TombstoneFiles returns any tombstone files associated with Tombstoner's TSM file.
func (t *Tombstoner) TombstoneFiles() []FileStat {
t.mu.RLock()
if t.statsLoaded {
stats := t.fileStats
t.mu.RUnlock()
return stats
}
t.mu.RUnlock()
stat, err := os.Stat(t.tombstonePath())
if os.IsNotExist(err) || err != nil {
t.mu.Lock()
// The file doesn't exist so record that we tried to load it so
// we don't continue to keep trying. This is the common case.
t.statsLoaded = os.IsNotExist(err)
t.fileStats = t.fileStats[:0]
t.mu.Unlock()
return nil
}
t.mu.Lock()
t.fileStats = append(t.fileStats[:0], FileStat{
Path: t.tombstonePath(),
LastModified: stat.ModTime().UnixNano(),
Size: uint32(stat.Size()),
})
t.statsLoaded = true
stats := t.fileStats
t.mu.Unlock()
return stats
}
// Walk calls fn for every Tombstone under the Tombstoner.
func (t *Tombstoner) Walk(fn func(t Tombstone) error) error {
f, err := os.Open(t.tombstonePath())
if os.IsNotExist(err) {
return nil
} else if err != nil {
return err
}
defer f.Close()
var b [4]byte
if _, err := f.Read(b[:]); err != nil {
// Might be a zero length file which should not exist, but
// an old bug allowed them to occur. Treat it as an empty
// v1 tombstone file so we don't abort loading the TSM file.
return t.readTombstoneV1(f, fn)
}
if _, err := f.Seek(0, os.SEEK_SET); err != nil {
return err
}
if binary.BigEndian.Uint32(b[:]) == v2header {
return t.readTombstoneV2(f, fn)
}
return t.readTombstoneV1(f, fn)
}
func (t *Tombstoner) writeTombstone(tombstones []Tombstone) error {
tmp, err := ioutil.TempFile(filepath.Dir(t.Path), "tombstone")
if err != nil {
return err
}
defer tmp.Close()
var b [8]byte
binary.BigEndian.PutUint32(b[:4], v2header)
if _, err := tmp.Write(b[:4]); err != nil {
return err
}
for _, t := range tombstones {
binary.BigEndian.PutUint32(b[:4], uint32(len(t.Key)))
if _, err := tmp.Write(b[:4]); err != nil {
return err
}
if _, err := tmp.Write([]byte(t.Key)); err != nil {
return err
}
binary.BigEndian.PutUint64(b[:], uint64(t.Min))
if _, err := tmp.Write(b[:]); err != nil {
return err
}
binary.BigEndian.PutUint64(b[:], uint64(t.Max))
if _, err := tmp.Write(b[:]); err != nil {
return err
}
}
// fsync the file to flush the write
if err := tmp.Sync(); err != nil {
return err
}
tmpFilename := tmp.Name()
tmp.Close()
if err := renameFile(tmpFilename, t.tombstonePath()); err != nil {
return err
}
return syncDir(filepath.Dir(t.tombstonePath()))
}
func (t *Tombstoner) readTombstone() ([]Tombstone, error) {
var tombstones []Tombstone
if err := t.Walk(func(t Tombstone) error {
tombstones = append(tombstones, t)
return nil
}); err != nil {
return nil, err
}
return tombstones, nil
}
// readTombstoneV1 reads the first version of tombstone files that were not
// capable of storing a min and max time for a key. This is used for backwards
// compatibility with versions prior to 0.13. This format is a simple newline
// separated text file.
func (t *Tombstoner) readTombstoneV1(f *os.File, fn func(t Tombstone) error) error {
r := bufio.NewScanner(f)
for r.Scan() {
line := r.Text()
if line == "" {
continue
}
if err := fn(Tombstone{
Key: line,
Min: math.MinInt64,
Max: math.MaxInt64,
}); err != nil {
return err
}
}
return r.Err()
}
// readTombstoneV2 reads the second version of tombstone files that are capable
// of storing keys and the range of time for the key that points were deleted. This
// format is binary.
func (t *Tombstoner) readTombstoneV2(f *os.File, fn func(t Tombstone) error) error {
// Skip header, already checked earlier
if _, err := f.Seek(v2headerSize, os.SEEK_SET); err != nil {
return err
}
n := int64(4)
fi, err := f.Stat()
if err != nil {
return err
}
size := fi.Size()
var (
min, max int64
key string
)
b := make([]byte, 4096)
for {
if n >= size {
return nil
}
if _, err = f.Read(b[:4]); err != nil {
return err
}
n += 4
keyLen := int(binary.BigEndian.Uint32(b[:4]))
if keyLen > len(b) {
b = make([]byte, keyLen)
}
if _, err := f.Read(b[:keyLen]); err != nil {
return err
}
key = string(b[:keyLen])
n += int64(keyLen)
if _, err := f.Read(b[:8]); err != nil {
return err
}
n += 8
min = int64(binary.BigEndian.Uint64(b[:8]))
if _, err := f.Read(b[:8]); err != nil {
return err
}
n += 8
max = int64(binary.BigEndian.Uint64(b[:8]))
if err := fn(Tombstone{
Key: key,
Min: min,
Max: max,
}); err != nil {
return err
}
}
}
func (t *Tombstoner) tombstonePath() string {
if strings.HasSuffix(t.Path, "tombstone") {
return t.Path
}
// Filename is 0000001.tsm1
filename := filepath.Base(t.Path)
// Strip off the tsm1
ext := filepath.Ext(filename)
if ext != "" {
filename = strings.TrimSuffix(filename, ext)
}
// Append the "tombstone" suffix to create a 0000001.tombstone file
return filepath.Join(filepath.Dir(t.Path), filename+".tombstone")
}