/
azblob.go
431 lines (369 loc) · 13 KB
/
azblob.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
/*
Copyright 2020 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package azblobbackupstorage implements the BackupStorage interface
// for Azure Blob Storage
package azblobbackupstorage
import (
"context"
"fmt"
"io"
"net/url"
"os"
"strings"
"sync"
"time"
"github.com/Azure/azure-pipeline-go/pipeline"
"github.com/Azure/azure-storage-blob-go/azblob"
"github.com/spf13/pflag"
"vitess.io/vitess/go/vt/concurrency"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/mysqlctl/backupstorage"
"vitess.io/vitess/go/vt/servenv"
)
var (
// This is the account name
accountName string
// This is the private access key
accountKeyFile string
// This is the name of the container that will store the backups
containerName string
// This is an optional prefix to prepend to all files
storageRoot string
azBlobParallelism int
)
func registerFlags(fs *pflag.FlagSet) {
fs.StringVar(&accountName, "azblob_backup_account_name", "", "Azure Storage Account name for backups; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_NAME will be used.")
fs.StringVar(&accountKeyFile, "azblob_backup_account_key_file", "", "Path to a file containing the Azure Storage account key; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_KEY will be used as the key itself (NOT a file path).")
fs.StringVar(&containerName, "azblob_backup_container_name", "", "Azure Blob Container Name.")
fs.StringVar(&storageRoot, "azblob_backup_storage_root", "", "Root prefix for all backup-related Azure Blobs; this should exclude both initial and trailing '/' (e.g. just 'a/b' not '/a/b/').")
fs.IntVar(&azBlobParallelism, "azblob_backup_parallelism", 1, "Azure Blob operation parallelism (requires extra memory when increased).")
}
func init() {
servenv.OnParseFor("vtbackup", registerFlags)
servenv.OnParseFor("vtctl", registerFlags)
servenv.OnParseFor("vtctld", registerFlags)
servenv.OnParseFor("vttablet", registerFlags)
}
const (
defaultRetryCount = 5
delimiter = "/"
)
// Return a Shared credential from the available credential sources.
// We will use credentials in the following order
// 1. Direct Command Line Flag (azblob_backup_account_name, azblob_backup_account_key)
// 2. Environment variables
func azInternalCredentials() (string, string, error) {
actName := accountName
if actName == "" {
// Check the Environmental Value
actName = os.Getenv("VT_AZBLOB_ACCOUNT_NAME")
}
var actKey string
if accountKeyFile != "" {
log.Infof("Getting Azure Storage Account key from file: %s", accountKeyFile)
dat, err := os.ReadFile(accountKeyFile)
if err != nil {
return "", "", err
}
actKey = string(dat)
} else {
actKey = os.Getenv("VT_AZBLOB_ACCOUNT_KEY")
}
if actName == "" || actKey == "" {
return "", "", fmt.Errorf("Azure Storage Account credentials not found in command-line flags or environment variables")
}
return actName, actKey, nil
}
func azCredentials() (*azblob.SharedKeyCredential, error) {
actName, actKey, err := azInternalCredentials()
if err != nil {
return nil, err
}
return azblob.NewSharedKeyCredential(actName, actKey)
}
func azServiceURL(credentials *azblob.SharedKeyCredential) azblob.ServiceURL {
pipeline := azblob.NewPipeline(credentials, azblob.PipelineOptions{
Retry: azblob.RetryOptions{
Policy: azblob.RetryPolicyFixed,
MaxTries: defaultRetryCount,
// Per https://godoc.org/github.com/Azure/azure-storage-blob-go/azblob#RetryOptions
// this should be set to a very nigh number (they claim 60s per MB).
// That could end up being days so we are limiting this to four hours.
TryTimeout: 4 * time.Hour,
},
Log: pipeline.LogOptions{
Log: func(level pipeline.LogLevel, message string) {
switch level {
case pipeline.LogFatal, pipeline.LogPanic:
log.Fatal(message)
case pipeline.LogError:
log.Error(message)
case pipeline.LogWarning:
log.Warning(message)
case pipeline.LogInfo, pipeline.LogDebug:
log.Info(message)
}
},
ShouldLog: func(level pipeline.LogLevel) bool {
switch level {
case pipeline.LogFatal, pipeline.LogPanic:
return bool(log.V(3))
case pipeline.LogError:
return bool(log.V(3))
case pipeline.LogWarning:
return bool(log.V(2))
case pipeline.LogInfo, pipeline.LogDebug:
return bool(log.V(1))
}
return false
},
},
})
u := url.URL{
Scheme: "https",
Host: credentials.AccountName() + ".blob.core.windows.net",
Path: "/",
}
return azblob.NewServiceURL(u, pipeline)
}
// AZBlobBackupHandle implements BackupHandle for Azure Blob service.
type AZBlobBackupHandle struct {
bs *AZBlobBackupStorage
dir string
name string
readOnly bool
waitGroup sync.WaitGroup
errors concurrency.AllErrorRecorder
ctx context.Context
cancel context.CancelFunc
}
// Directory implements BackupHandle.
func (bh *AZBlobBackupHandle) Directory() string {
return bh.dir
}
// Name implements BackupHandle.
func (bh *AZBlobBackupHandle) Name() string {
return bh.name
}
// RecordError is part of the concurrency.ErrorRecorder interface.
func (bh *AZBlobBackupHandle) RecordError(err error) {
bh.errors.RecordError(err)
}
// HasErrors is part of the concurrency.ErrorRecorder interface.
func (bh *AZBlobBackupHandle) HasErrors() bool {
return bh.errors.HasErrors()
}
// Error is part of the concurrency.ErrorRecorder interface.
func (bh *AZBlobBackupHandle) Error() error {
return bh.errors.Error()
}
// AddFile implements BackupHandle.
func (bh *AZBlobBackupHandle) AddFile(ctx context.Context, filename string, filesize int64) (io.WriteCloser, error) {
if bh.readOnly {
return nil, fmt.Errorf("AddFile cannot be called on read-only backup")
}
// Error out if the file size it too large ( ~4.75 TB)
if filesize > azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks {
return nil, fmt.Errorf("filesize (%v) is too large to upload to az blob (max size %v)", filesize, azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks)
}
obj := objName(bh.dir, bh.name, filename)
containerURL, err := bh.bs.containerURL()
if err != nil {
return nil, err
}
blockBlobURL := containerURL.NewBlockBlobURL(obj)
reader, writer := io.Pipe()
bh.waitGroup.Add(1)
go func() {
defer bh.waitGroup.Done()
_, err := azblob.UploadStreamToBlockBlob(bh.ctx, reader, blockBlobURL, azblob.UploadStreamToBlockBlobOptions{
BufferSize: azblob.BlockBlobMaxStageBlockBytes,
MaxBuffers: azBlobParallelism,
})
if err != nil {
reader.CloseWithError(err)
bh.RecordError(err)
}
}()
return writer, nil
}
// EndBackup implements BackupHandle.
func (bh *AZBlobBackupHandle) EndBackup(ctx context.Context) error {
if bh.readOnly {
return fmt.Errorf("EndBackup cannot be called on read-only backup")
}
bh.waitGroup.Wait()
return bh.Error()
}
// AbortBackup implements BackupHandle.
func (bh *AZBlobBackupHandle) AbortBackup(ctx context.Context) error {
if bh.readOnly {
return fmt.Errorf("AbortBackup cannot be called on read-only backup")
}
// Cancel the context of any uploads.
bh.cancel()
// Remove the backup
return bh.bs.RemoveBackup(ctx, bh.dir, bh.name)
}
// ReadFile implements BackupHandle.
func (bh *AZBlobBackupHandle) ReadFile(ctx context.Context, filename string) (io.ReadCloser, error) {
if !bh.readOnly {
return nil, fmt.Errorf("ReadFile cannot be called on read-write backup")
}
obj := objName(bh.dir, filename)
containerURL, err := bh.bs.containerURL()
if err != nil {
return nil, err
}
blobURL := containerURL.NewBlobURL(obj)
resp, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false)
if err != nil {
return nil, err
}
return resp.Body(azblob.RetryReaderOptions{
MaxRetryRequests: defaultRetryCount,
NotifyFailedRead: func(failureCount int, lastError error, offset int64, count int64, willRetry bool) {
log.Warningf("ReadFile: [azblob] container: %s, directory: %s, filename: %s, error: %v", containerName, objName(bh.dir, ""), filename, lastError)
},
TreatEarlyCloseAsError: true,
}), nil
}
// AZBlobBackupStorage structs implements the BackupStorage interface for AZBlob
type AZBlobBackupStorage struct {
}
func (bs *AZBlobBackupStorage) containerURL() (*azblob.ContainerURL, error) {
credentials, err := azCredentials()
if err != nil {
return nil, err
}
u := azServiceURL(credentials).NewContainerURL(containerName)
return &u, nil
}
// ListBackups implements BackupStorage.
func (bs *AZBlobBackupStorage) ListBackups(ctx context.Context, dir string) ([]backupstorage.BackupHandle, error) {
var searchPrefix string
if dir == "/" {
searchPrefix = "/"
} else {
searchPrefix = objName(dir, "")
}
log.Infof("ListBackups: [azblob] container: %s, directory: %v", containerName, searchPrefix)
containerURL, err := bs.containerURL()
if err != nil {
return nil, err
}
result := make([]backupstorage.BackupHandle, 0)
var subdirs []string
for marker := (azblob.Marker{}); marker.NotDone(); {
// This returns Blobs in sorted order so we don't need to sort them a second time.
resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{
Prefix: searchPrefix,
MaxResults: 0,
})
if err != nil {
return nil, err
}
for _, item := range resp.Segment.BlobPrefixes {
subdir := strings.TrimPrefix(item.Name, searchPrefix)
subdir = strings.TrimSuffix(subdir, delimiter)
subdirs = append(subdirs, subdir)
}
marker = resp.NextMarker
}
for _, subdir := range subdirs {
cancelableCtx, cancel := context.WithCancel(ctx)
result = append(result, &AZBlobBackupHandle{
bs: bs,
dir: strings.Join([]string{dir, subdir}, "/"),
name: subdir,
readOnly: true,
ctx: cancelableCtx,
cancel: cancel,
})
}
return result, nil
}
// StartBackup implements BackupStorage.
func (bs *AZBlobBackupStorage) StartBackup(ctx context.Context, dir, name string) (backupstorage.BackupHandle, error) {
cancelableCtx, cancel := context.WithCancel(ctx)
return &AZBlobBackupHandle{
bs: bs,
dir: dir,
name: name,
readOnly: false,
ctx: cancelableCtx,
cancel: cancel,
}, nil
}
// RemoveBackup implements BackupStorage.
func (bs *AZBlobBackupStorage) RemoveBackup(ctx context.Context, dir, name string) error {
log.Infof("ListBackups: [azblob] container: %s, directory: %s", containerName, objName(dir, ""))
containerURL, err := bs.containerURL()
if err != nil {
return err
}
searchPrefix := objName(dir, name, "")
for marker := (azblob.Marker{}); marker.NotDone(); {
resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{
Prefix: searchPrefix,
MaxResults: 0,
})
if err != nil {
return err
}
// Right now there is no batch delete so we must iterate over all the blobs to delete them one by one
// One day we will be able to use this https://docs.microsoft.com/en-us/rest/api/storageservices/blob-batch
// but currently it is listed as a preview and its not in the go API
for _, item := range resp.Segment.BlobItems {
_, err := containerURL.NewBlobURL(item.Name).Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
if err != nil {
return err
}
}
marker = resp.NextMarker
}
// Delete the blob representing the folder of the backup, remove any trailing slash to signify we want to remove the folder
// NOTE: you must set DeleteSnapshotsOptionNone or this will error out with a server side error
for retry := 0; retry < defaultRetryCount; retry = retry + 1 {
// Since the deletion of blob's is asyncronious we may need to wait a bit before we delete the folder
// Also refresh the client just for good measure
time.Sleep(10 * time.Second)
containerURL, err = bs.containerURL()
if err != nil {
return err
}
log.Infof("Removing backup directory: %v", strings.TrimSuffix(searchPrefix, "/"))
_, err = containerURL.NewBlobURL(strings.TrimSuffix(searchPrefix, "/")).Delete(ctx, azblob.DeleteSnapshotsOptionNone, azblob.BlobAccessConditions{})
if err == nil {
break
}
}
return err
}
// Close implements BackupStorage.
func (bs *AZBlobBackupStorage) Close() error {
// This function is a No-op
return nil
}
// objName joins path parts into an object name.
// Unlike path.Join, it doesn't collapse ".." or strip trailing slashes.
// It also adds the value of the -azblob_backup_storage_root flag if set.
func objName(parts ...string) string {
if storageRoot != "" {
return storageRoot + "/" + strings.Join(parts, "/")
}
return strings.Join(parts, "/")
}
func init() {
backupstorage.BackupStorageMap["azblob"] = &AZBlobBackupStorage{}
}