Skip to content

Commit

Permalink
Merge pull request #7576 from planetscale/ds-restore-with-orc
Browse files Browse the repository at this point in the history
Respect -disable_active_reparents in backup/restore
  • Loading branch information
deepthi committed Mar 10, 2021
2 parents 8e55997 + 66450d8 commit ff90ed1
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 7 deletions.
2 changes: 1 addition & 1 deletion go/vt/mysqlctl/builtinbackupengine.go
Expand Up @@ -152,7 +152,7 @@ func (be *BuiltinBackupEngine) ExecuteBackup(ctx context.Context, params BackupP
replicaStatus, err := params.Mysqld.ReplicationStatus()
switch err {
case nil:
replicaStartRequired = replicaStatus.ReplicationRunning()
replicaStartRequired = replicaStatus.ReplicationRunning() && !*DisableActiveReparents
case mysql.ErrNotReplica:
// keep going if we're the master, might be a degenerate case
sourceIsMaster = true
Expand Down
30 changes: 28 additions & 2 deletions go/vt/vttablet/tabletmanager/restore.go
Expand Up @@ -78,7 +78,28 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger,
if tm.Cnf == nil {
return fmt.Errorf("cannot perform restore without my.cnf, please restart vttablet with a my.cnf file specified")
}
return tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore)
// Tell Orchestrator we're stopped on purpose for some Vitess task.
// Do this in the background, as it's best-effort.
go func() {
if tm.orc == nil {
return
}
if err := tm.orc.BeginMaintenance(tm.Tablet(), "vttablet has been told to Restore"); err != nil {
log.Warningf("Orchestrator BeginMaintenance failed: %v", err)
}
}()
err := tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore)
// Tell Orchestrator we're no longer stopped on purpose.
// Do this in the background, as it's best-effort.
go func() {
if tm.orc == nil {
return
}
if err := tm.orc.EndMaintenance(tm.Tablet()); err != nil {
log.Warningf("Orchestrator EndMaintenance failed: %v", err)
}
}()
return err
}

func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error {
Expand Down Expand Up @@ -470,10 +491,15 @@ func (tm *TabletManager) startReplication(ctx context.Context, pos mysql.Positio
}

// Set master and start replication.
if err := tm.MysqlDaemon.SetMaster(ctx, ti.Tablet.MysqlHostname, int(ti.Tablet.MysqlPort), false /* stopReplicationBefore */, true /* startReplicationAfter */); err != nil {
if err := tm.MysqlDaemon.SetMaster(ctx, ti.Tablet.MysqlHostname, int(ti.Tablet.MysqlPort), false /* stopReplicationBefore */, !*mysqlctl.DisableActiveReparents /* startReplicationAfter */); err != nil {
return vterrors.Wrap(err, "MysqlDaemon.SetMaster failed")
}

// If active reparents are disabled, we don't restart replication. So it makes no sense to wait for an update on the replica.
// Return immediately.
if !*mysqlctl.DisableActiveReparents {
return nil
}
// wait for reliable seconds behind master
// we have pos where we want to resume from
// if MasterPosition is the same, that means no writes
Expand Down
20 changes: 20 additions & 0 deletions go/vt/vttablet/tabletmanager/rpc_backup.go
Expand Up @@ -88,6 +88,16 @@ func (tm *TabletManager) Backup(ctx context.Context, concurrency int, logger log
if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_BACKUP, DBActionNone); err != nil {
return err
}
// Tell Orchestrator we're stopped on purpose for some Vitess task.
// Do this in the background, as it's best-effort.
go func() {
if tm.orc == nil {
return
}
if err := tm.orc.BeginMaintenance(tm.Tablet(), "vttablet has been told to run an offline backup"); err != nil {
logger.Warningf("Orchestrator BeginMaintenance failed: %v", err)
}
}()
}
// create the loggers: tee to console and source
l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger)
Expand Down Expand Up @@ -124,6 +134,16 @@ func (tm *TabletManager) Backup(ctx context.Context, concurrency int, logger log
}
returnErr = err
}
// Tell Orchestrator we're no longer stopped on purpose.
// Do this in the background, as it's best-effort.
go func() {
if tm.orc == nil {
return
}
if err := tm.orc.EndMaintenance(tm.Tablet()); err != nil {
logger.Warningf("Orchestrator EndMaintenance failed: %v", err)
}
}()
}

return returnErr
Expand Down
5 changes: 1 addition & 4 deletions go/vt/vttablet/tabletmanager/tm_init.go
Expand Up @@ -597,10 +597,7 @@ func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) {
return false, fmt.Errorf("you cannot enable -restore_from_backup without a my.cnf file")
}

// two cases then:
// - restoreFromBackup is set: we restore, then initHealthCheck, all
// in the background
// - restoreFromBackup is not set: we initHealthCheck right away
// Restore in the background
if *restoreFromBackup {
go func() {
// Open the state manager after restore is done.
Expand Down
151 changes: 151 additions & 0 deletions go/vt/wrangler/testlib/backup_test.go
Expand Up @@ -394,3 +394,154 @@ func TestRestoreUnreachableMaster(t *testing.T) {
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)
}

func TestDisableActiveReparents(t *testing.T) {
*mysqlctl.DisableActiveReparents = true
delay := discovery.GetTabletPickerRetryDelay()
defer func() {
// When you mess with globals you must remember to reset them
*mysqlctl.DisableActiveReparents = false
discovery.SetTabletPickerRetryDelay(delay)
}()
discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)

// Initialize our environment
ctx := context.Background()
db := fakesqldb.New(t)
defer db.Close()
ts := memorytopo.NewServer("cell1", "cell2")
wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
vp := NewVtctlPipe(t, ts)
defer vp.Close()

// Set up mock query results.
db.AddQuery("CREATE DATABASE IF NOT EXISTS _vt", &sqltypes.Result{})
db.AddQuery("BEGIN", &sqltypes.Result{})
db.AddQuery("COMMIT", &sqltypes.Result{})
db.AddQueryPattern(`SET @@session\.sql_log_bin = .*`, &sqltypes.Result{})
db.AddQueryPattern(`CREATE TABLE IF NOT EXISTS _vt\.shard_metadata .*`, &sqltypes.Result{})
db.AddQueryPattern(`CREATE TABLE IF NOT EXISTS _vt\.local_metadata .*`, &sqltypes.Result{})
db.AddQueryPattern(`ALTER TABLE _vt\.local_metadata .*`, &sqltypes.Result{})
db.AddQueryPattern(`ALTER TABLE _vt\.shard_metadata .*`, &sqltypes.Result{})
db.AddQueryPattern(`UPDATE _vt\.local_metadata SET db_name=.*`, &sqltypes.Result{})
db.AddQueryPattern(`UPDATE _vt\.shard_metadata SET db_name=.*`, &sqltypes.Result{})
db.AddQueryPattern(`INSERT INTO _vt\.local_metadata .*`, &sqltypes.Result{})

// Initialize our temp dirs
root, err := ioutil.TempDir("", "backuptest")
require.NoError(t, err)
defer os.RemoveAll(root)

// Initialize BackupStorage
fbsRoot := path.Join(root, "fbs")
*filebackupstorage.FileBackupStorageRoot = fbsRoot
*backupstorage.BackupStorageImplementation = "file"

// Initialize the fake mysql root directories
sourceInnodbDataDir := path.Join(root, "source_innodb_data")
sourceInnodbLogDir := path.Join(root, "source_innodb_log")
sourceDataDir := path.Join(root, "source_data")
sourceDataDbDir := path.Join(sourceDataDir, "vt_db")
for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} {
require.NoError(t, os.MkdirAll(s, os.ModePerm))
}
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm))

// create a master tablet, set its master position
master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db)
master.FakeMysqlDaemon.ReadOnly = false
master.FakeMysqlDaemon.Replicating = false
master.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{
GTIDSet: mysql.MariadbGTIDSet{
2: mysql.MariadbGTID{
Domain: 2,
Server: 123,
Sequence: 457,
},
},
}

// start master so that replica can fetch master position from it
master.StartActionLoop(t, wr)
defer master.StopActionLoop(t)

// create a single tablet, set it up so we can do backups
// set its position same as that of master so that backup doesn't wait for catchup
sourceTablet := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, db)
sourceTablet.FakeMysqlDaemon.ReadOnly = true
sourceTablet.FakeMysqlDaemon.Replicating = true
sourceTablet.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{
GTIDSet: mysql.MariadbGTIDSet{
2: mysql.MariadbGTID{
Domain: 2,
Server: 123,
Sequence: 457,
},
},
}
sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
}
sourceTablet.StartActionLoop(t, wr)
defer sourceTablet.StopActionLoop(t)

sourceTablet.TM.Cnf = &mysqlctl.Mycnf{
DataDir: sourceDataDir,
InnodbDataHomeDir: sourceInnodbDataDir,
InnodbLogGroupHomeDir: sourceInnodbLogDir,
}

// run the backup
require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}))

// verify the full status
require.NoError(t, sourceTablet.FakeMysqlDaemon.CheckSuperQueryList())
assert.False(t, sourceTablet.FakeMysqlDaemon.Replicating)
assert.True(t, sourceTablet.FakeMysqlDaemon.Running)

// create a destination tablet, set it up so we can do restores
destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db)
destTablet.FakeMysqlDaemon.ReadOnly = true
destTablet.FakeMysqlDaemon.Replicating = true
destTablet.FakeMysqlDaemon.CurrentMasterPosition = mysql.Position{
GTIDSet: mysql.MariadbGTIDSet{
2: mysql.MariadbGTID{
Domain: 2,
Server: 123,
Sequence: 457,
},
},
}
destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
"RESET SLAVE ALL",
"FAKE SET SLAVE POSITION",
"FAKE SET MASTER",
}
destTablet.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {},
}
destTablet.FakeMysqlDaemon.SetReplicationPositionPos = sourceTablet.FakeMysqlDaemon.CurrentMasterPosition
destTablet.FakeMysqlDaemon.SetMasterInput = topoproto.MysqlAddr(master.Tablet)

destTablet.StartActionLoop(t, wr)
defer destTablet.StopActionLoop(t)

destTablet.TM.Cnf = &mysqlctl.Mycnf{
DataDir: sourceDataDir,
InnodbDataHomeDir: sourceInnodbDataDir,
InnodbLogGroupHomeDir: sourceInnodbLogDir,
BinLogPath: path.Join(root, "bin-logs/filename_prefix"),
RelayLogPath: path.Join(root, "relay-logs/filename_prefix"),
RelayLogIndexPath: path.Join(root, "relay-log.index"),
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.False(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)
}

0 comments on commit ff90ed1

Please sign in to comment.