-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Respect -disable_active_reparents in backup/restore #7576
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,7 +78,28 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger, | |
if tm.Cnf == nil { | ||
return fmt.Errorf("cannot perform restore without my.cnf, please restart vttablet with a my.cnf file specified") | ||
} | ||
return tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore) | ||
// Tell Orchestrator we're stopped on purpose for some Vitess task. | ||
// Do this in the background, as it's best-effort. | ||
go func() { | ||
if tm.orc == nil { | ||
return | ||
} | ||
if err := tm.orc.BeginMaintenance(tm.Tablet(), "vttablet has been told to Restore"); err != nil { | ||
log.Warningf("Orchestrator BeginMaintenance failed: %v", err) | ||
} | ||
}() | ||
err := tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore) | ||
// Tell Orchestrator we're no longer stopped on purpose. | ||
// Do this in the background, as it's best-effort. | ||
go func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems reasonable. |
||
if tm.orc == nil { | ||
return | ||
} | ||
if err := tm.orc.EndMaintenance(tm.Tablet()); err != nil { | ||
log.Warningf("Orchestrator EndMaintenance failed: %v", err) | ||
} | ||
}() | ||
return err | ||
} | ||
|
||
func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error { | ||
|
@@ -470,10 +491,15 @@ func (tm *TabletManager) startReplication(ctx context.Context, pos mysql.Positio | |
} | ||
|
||
// Set master and start replication. | ||
if err := tm.MysqlDaemon.SetMaster(ctx, ti.Tablet.MysqlHostname, int(ti.Tablet.MysqlPort), false /* stopReplicationBefore */, true /* startReplicationAfter */); err != nil { | ||
if err := tm.MysqlDaemon.SetMaster(ctx, ti.Tablet.MysqlHostname, int(ti.Tablet.MysqlPort), false /* stopReplicationBefore */, !*mysqlctl.DisableActiveReparents /* startReplicationAfter */); err != nil { | ||
return vterrors.Wrap(err, "MysqlDaemon.SetMaster failed") | ||
} | ||
|
||
// If active reparents are disabled, we don't restart replication. So it makes no sense to wait for an update on the replica. | ||
// Return immediately. | ||
if !*mysqlctl.DisableActiveReparents { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it be the opposite? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for pointing this out. It would have been a regression. I have opened #7703 to fix this. |
||
return nil | ||
} | ||
// wait for reliable seconds behind master | ||
// we have pos where we want to resume from | ||
// if MasterPosition is the same, that means no writes | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,6 +88,16 @@ func (tm *TabletManager) Backup(ctx context.Context, concurrency int, logger log | |
if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_BACKUP, DBActionNone); err != nil { | ||
return err | ||
} | ||
// Tell Orchestrator we're stopped on purpose for some Vitess task. | ||
// Do this in the background, as it's best-effort. | ||
go func() { | ||
if tm.orc == nil { | ||
return | ||
} | ||
if err := tm.orc.BeginMaintenance(tm.Tablet(), "vttablet has been told to run an offline backup"); err != nil { | ||
logger.Warningf("Orchestrator BeginMaintenance failed: %v", err) | ||
} | ||
}() | ||
} | ||
// create the loggers: tee to console and source | ||
l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) | ||
|
@@ -124,6 +134,16 @@ func (tm *TabletManager) Backup(ctx context.Context, concurrency int, logger log | |
} | ||
returnErr = err | ||
} | ||
// Tell Orchestrator we're no longer stopped on purpose. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Zhannan what about here? Should this also be called only if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense! |
||
// Do this in the background, as it's best-effort. | ||
go func() { | ||
if tm.orc == nil { | ||
return | ||
} | ||
if err := tm.orc.EndMaintenance(tm.Tablet()); err != nil { | ||
logger.Warningf("Orchestrator EndMaintenance failed: %v", err) | ||
} | ||
}() | ||
} | ||
|
||
return returnErr | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
from our experiment,
tm.Tablet()
seems to give an out dated state and we are gettingcannot find mysql port error
, we ended up usingtm.tmState.Tablet()
. Let me know if you also experience it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tm.Tablet()
simply returnstm.tmState.Tablet()
so they should be equivalent. I do see that it is possible for a race condition to occur betweencheckMysql
andhandleRestore
.