-
Notifications
You must be signed in to change notification settings - Fork 0
/
storage.go
359 lines (302 loc) · 10.1 KB
/
storage.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
package storage
import (
"context"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
"github.com/docker/swarmkit/log"
"github.com/docker/swarmkit/manager/encryption"
"github.com/pkg/errors"
)
// ErrNoWAL is returned if there are no WALs on disk
var ErrNoWAL = errors.New("no WAL present")
type walSnapDirs struct {
wal string
snap string
}
// the wal/snap directories in decreasing order of preference/version
var versionedWALSnapDirs = []walSnapDirs{
{wal: "wal-v3-encrypted", snap: "snap-v3-encrypted"},
{wal: "wal-v3", snap: "snap-v3"},
{wal: "wal", snap: "snap"},
}
// EncryptedRaftLogger saves raft data to disk
type EncryptedRaftLogger struct {
StateDir string
EncryptionKey []byte
// FIPS specifies whether the encryption should be FIPS-compliant
FIPS bool
// mutex is locked for writing only when we need to replace the wal object and snapshotter
// object, not when we're writing snapshots or wals (in which case it's locked for reading)
encoderMu sync.RWMutex
wal WAL
snapshotter Snapshotter
}
// BootstrapFromDisk creates a new snapshotter and wal, and also reads the latest snapshot and WALs from disk
func (e *EncryptedRaftLogger) BootstrapFromDisk(ctx context.Context, oldEncryptionKeys ...[]byte) (*raftpb.Snapshot, WALData, error) {
e.encoderMu.Lock()
defer e.encoderMu.Unlock()
walDir := e.walDir()
snapDir := e.snapDir()
encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS)
if oldEncryptionKeys != nil {
decrypters := []encryption.Decrypter{decrypter}
for _, key := range oldEncryptionKeys {
_, d := encryption.Defaults(key, e.FIPS)
decrypters = append(decrypters, d)
}
decrypter = encryption.NewMultiDecrypter(decrypters...)
}
snapFactory := NewSnapFactory(encrypter, decrypter)
if !fileutil.Exist(snapDir) {
// If snapshots created by the etcd-v2 code exist, or by swarmkit development version,
// read the latest snapshot and write it encoded to the new path. The new path
// prevents etc-v2 creating snapshots that are visible to us, but not encoded and
// out of sync with our WALs, after a downgrade.
for _, dirs := range versionedWALSnapDirs[1:] {
legacySnapDir := filepath.Join(e.StateDir, dirs.snap)
if fileutil.Exist(legacySnapDir) {
if err := MigrateSnapshot(legacySnapDir, snapDir, OriginalSnap, snapFactory); err != nil {
return nil, WALData{}, err
}
break
}
}
}
// ensure the new directory exists
if err := os.MkdirAll(snapDir, 0700); err != nil {
return nil, WALData{}, errors.Wrap(err, "failed to create snapshot directory")
}
var (
snapshotter Snapshotter
walObj WAL
err error
)
// Create a snapshotter and load snapshot data
snapshotter = snapFactory.New(snapDir)
snapshot, err := snapshotter.Load()
if err != nil && err != snap.ErrNoSnapshot {
return nil, WALData{}, err
}
walFactory := NewWALFactory(encrypter, decrypter)
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index = snapshot.Metadata.Index
walsnap.Term = snapshot.Metadata.Term
}
if !wal.Exist(walDir) {
var walExists bool
// If wals created by the etcd-v2 wal code exist, read the latest ones based
// on this snapshot and encode them to wals in the new path to avoid adding
// backwards-incompatible entries to those files.
for _, dirs := range versionedWALSnapDirs[1:] {
legacyWALDir := filepath.Join(e.StateDir, dirs.wal)
if !wal.Exist(legacyWALDir) {
continue
}
if err = MigrateWALs(ctx, legacyWALDir, walDir, OriginalWAL, walFactory, walsnap); err != nil {
return nil, WALData{}, err
}
walExists = true
break
}
if !walExists {
return nil, WALData{}, ErrNoWAL
}
}
walObj, waldata, err := ReadRepairWAL(ctx, walDir, walsnap, walFactory)
if err != nil {
return nil, WALData{}, err
}
e.snapshotter = snapshotter
e.wal = walObj
return snapshot, waldata, nil
}
// BootstrapNew creates a new snapshotter and WAL writer, expecting that there is nothing on disk
func (e *EncryptedRaftLogger) BootstrapNew(metadata []byte) error {
e.encoderMu.Lock()
defer e.encoderMu.Unlock()
encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS)
walFactory := NewWALFactory(encrypter, decrypter)
for _, dirpath := range []string{filepath.Dir(e.walDir()), e.snapDir()} {
if err := os.MkdirAll(dirpath, 0700); err != nil {
return errors.Wrapf(err, "failed to create %s", dirpath)
}
}
var err error
// the wal directory must not already exist upon creation
e.wal, err = walFactory.Create(e.walDir(), metadata)
if err != nil {
return errors.Wrap(err, "failed to create WAL")
}
e.snapshotter = NewSnapFactory(encrypter, decrypter).New(e.snapDir())
return nil
}
func (e *EncryptedRaftLogger) walDir() string {
return filepath.Join(e.StateDir, versionedWALSnapDirs[0].wal)
}
func (e *EncryptedRaftLogger) snapDir() string {
return filepath.Join(e.StateDir, versionedWALSnapDirs[0].snap)
}
// RotateEncryptionKey swaps out the encoders and decoders used by the wal and snapshotter
func (e *EncryptedRaftLogger) RotateEncryptionKey(newKey []byte) {
e.encoderMu.Lock()
defer e.encoderMu.Unlock()
if e.wal != nil { // if the wal exists, the snapshotter exists
// We don't want to have to close the WAL, because we can't open a new one.
// We need to know the previous snapshot, because when you open a WAL you
// have to read out all the entries from a particular snapshot, or you can't
// write. So just rotate the encoders out from under it. We already
// have a lock on writing to snapshots and WALs.
wrapped, ok := e.wal.(*wrappedWAL)
if !ok {
panic(fmt.Errorf("EncryptedRaftLogger's WAL is not a wrappedWAL"))
}
wrapped.encrypter, wrapped.decrypter = encryption.Defaults(newKey, e.FIPS)
e.snapshotter = NewSnapFactory(wrapped.encrypter, wrapped.decrypter).New(e.snapDir())
}
e.EncryptionKey = newKey
}
// SaveSnapshot actually saves a given snapshot to both the WAL and the snapshot.
func (e *EncryptedRaftLogger) SaveSnapshot(snapshot raftpb.Snapshot) error {
walsnap := walpb.Snapshot{
Index: snapshot.Metadata.Index,
Term: snapshot.Metadata.Term,
}
e.encoderMu.RLock()
if err := e.wal.SaveSnapshot(walsnap); err != nil {
e.encoderMu.RUnlock()
return err
}
snapshotter := e.snapshotter
e.encoderMu.RUnlock()
if err := snapshotter.SaveSnap(snapshot); err != nil {
return err
}
return e.wal.ReleaseLockTo(snapshot.Metadata.Index)
}
// GC garbage collects snapshots and wals older than the provided index and term
func (e *EncryptedRaftLogger) GC(index uint64, term uint64, keepOldSnapshots uint64) error {
// Delete any older snapshots
curSnapshot := fmt.Sprintf("%016x-%016x%s", term, index, ".snap")
snapshots, err := ListSnapshots(e.snapDir())
if err != nil {
return err
}
// Ignore any snapshots that are older than the current snapshot.
// Delete the others. Rather than doing lexical comparisons, we look
// at what exists before/after the current snapshot in the slice.
// This means that if the current snapshot doesn't appear in the
// directory for some strange reason, we won't delete anything, which
// is the safe behavior.
curSnapshotIdx := -1
var (
removeErr error
oldestSnapshot string
)
for i, snapFile := range snapshots {
if curSnapshotIdx >= 0 && i > curSnapshotIdx {
if uint64(i-curSnapshotIdx) > keepOldSnapshots {
err := os.Remove(filepath.Join(e.snapDir(), snapFile))
if err != nil && removeErr == nil {
removeErr = err
}
continue
}
} else if snapFile == curSnapshot {
curSnapshotIdx = i
}
oldestSnapshot = snapFile
}
if removeErr != nil {
return removeErr
}
// Remove any WAL files that only contain data from before the oldest
// remaining snapshot.
if oldestSnapshot == "" {
return nil
}
// Parse index out of oldest snapshot's filename
var snapTerm, snapIndex uint64
_, err = fmt.Sscanf(oldestSnapshot, "%016x-%016x.snap", &snapTerm, &snapIndex)
if err != nil {
return errors.Wrapf(err, "malformed snapshot filename %s", oldestSnapshot)
}
wals, err := ListWALs(e.walDir())
if err != nil {
return err
}
found := false
deleteUntil := -1
for i, walName := range wals {
var walSeq, walIndex uint64
_, err = fmt.Sscanf(walName, "%016x-%016x.wal", &walSeq, &walIndex)
if err != nil {
return errors.Wrapf(err, "could not parse WAL name %s", walName)
}
if walIndex >= snapIndex {
deleteUntil = i - 1
found = true
break
}
}
// If all WAL files started with indices below the oldest snapshot's
// index, we can delete all but the newest WAL file.
if !found && len(wals) != 0 {
deleteUntil = len(wals) - 1
}
for i := 0; i < deleteUntil; i++ {
walPath := filepath.Join(e.walDir(), wals[i])
l, err := fileutil.TryLockFile(walPath, os.O_WRONLY, fileutil.PrivateFileMode)
if err != nil {
return errors.Wrapf(err, "could not lock old WAL file %s for removal", wals[i])
}
err = os.Remove(walPath)
l.Close()
if err != nil {
return errors.Wrapf(err, "error removing old WAL file %s", wals[i])
}
}
return nil
}
// SaveEntries saves only entries to disk
func (e *EncryptedRaftLogger) SaveEntries(st raftpb.HardState, entries []raftpb.Entry) error {
e.encoderMu.RLock()
defer e.encoderMu.RUnlock()
if e.wal == nil {
return fmt.Errorf("raft WAL has either been closed or has never been created")
}
return e.wal.Save(st, entries)
}
// Close closes the logger - it will have to be bootstrapped again to start writing
func (e *EncryptedRaftLogger) Close(ctx context.Context) {
e.encoderMu.Lock()
defer e.encoderMu.Unlock()
if e.wal != nil {
if err := e.wal.Close(); err != nil {
log.G(ctx).WithError(err).Error("error closing raft WAL")
}
}
e.wal = nil
e.snapshotter = nil
}
// Clear closes the existing WAL and removes the WAL and snapshot.
func (e *EncryptedRaftLogger) Clear(ctx context.Context) error {
e.encoderMu.Lock()
defer e.encoderMu.Unlock()
if e.wal != nil {
if err := e.wal.Close(); err != nil {
log.G(ctx).WithError(err).Error("error closing raft WAL")
}
}
e.snapshotter = nil
os.RemoveAll(e.walDir())
os.RemoveAll(e.snapDir())
return nil
}