From 635997bb36d6e8f5ef078e001f417deffc1d6016 Mon Sep 17 00:00:00 2001 From: Ilia Shakhov Date: Wed, 26 Nov 2025 19:50:23 +0300 Subject: [PATCH] Fix that disks may become INACTIVE during cluster bootstrap (#28886) --- ydb/core/cms/cms_impl.h | 2 + ydb/core/cms/cms_state.h | 1 + ydb/core/cms/cms_tx_load_state.cpp | 14 ++++++- ydb/core/cms/cms_tx_process_notification.cpp | 2 +- .../cms/cms_tx_store_first_boot_timestamp.cpp | 33 ++++++++++++++++ ydb/core/cms/cms_tx_store_permissions.cpp | 6 ++- ydb/core/cms/cms_tx_update_config.cpp | 2 +- ydb/core/cms/config.h | 6 +++ ydb/core/cms/scheme.h | 5 ++- ydb/core/cms/sentinel.cpp | 39 ++++++++++++++++--- ydb/core/cms/sentinel_impl.h | 15 ++++++- ydb/core/cms/sentinel_ut.cpp | 31 +++++++++++++++ ydb/core/cms/sentinel_ut_helpers.h | 7 +++- ydb/core/cms/ut_sentinel/ya.make | 2 + ydb/core/cms/ya.make | 1 + ydb/core/protos/cms.proto | 1 + ydb/core/protos/counters_cms.proto | 1 + 17 files changed, 153 insertions(+), 15 deletions(-) create mode 100644 ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index fdfb67608872..39a2d5709fbb 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -106,6 +106,7 @@ class TCms : public TActor, public TTabletExecutedFlat { class TTxStoreWalleTask; class TTxUpdateConfig; class TTxUpdateDowntimes; + class TTxStoreFirstBootTimestamp; struct TActionOptions { TDuration PermissionDuration; @@ -150,6 +151,7 @@ class TCms : public TActor, public TTabletExecutedFlat { ITransaction *CreateTxUpdateConfig(TEvCms::TEvSetConfigRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev); ITransaction *CreateTxUpdateDowntimes(); + ITransaction *CreateTxStoreFirstBootTimestamp(); static void AuditLog(const TActorContext &ctx, const TString &message) { NCms::AuditLog("CMS tablet", message, ctx); diff --git a/ydb/core/cms/cms_state.h b/ydb/core/cms/cms_state.h index 328ba32719bb..6e2a65546936 100644 --- a/ydb/core/cms/cms_state.h +++ b/ydb/core/cms/cms_state.h @@ -43,6 +43,7 @@ struct TCmsState : public TAtomicRefCount { ui64 NextRequestId = 0; ui64 NextNotificationId = 0; ui64 LastLogRecordTimestamp = 0; + TInstant FirstBootTimestamp; // State of Wall-E tasks. THashMap WalleTasks; diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp index 62a8ad301997..41ade4a73716 100644 --- a/ydb/core/cms/cms_tx_load_state.cpp +++ b/ydb/core/cms/cms_tx_load_state.cpp @@ -38,7 +38,7 @@ class TCms::TTxLoadState : public TTransactionBase { if (!db.Precharge()) return false; - auto paramRow = db.Table().Key(1).Select(); + auto paramRow = db.Table().Key(Schema::Param::Key).Select(); auto permissionRowset = db.Table().Range().Select(); auto requestRowset = db.Table().Range().Select(); auto walleTaskRowset = db.Table().Range().Select(); @@ -60,17 +60,23 @@ class TCms::TTxLoadState : public TTransactionBase { NKikimrCms::TCmsConfig config; if (paramRow.IsValid()) { + FirstBoot = false; + state->NextPermissionId = paramRow.GetValueOrDefault(1); state->NextRequestId = paramRow.GetValueOrDefault(1); state->NextNotificationId = paramRow.GetValueOrDefault(1); + state->FirstBootTimestamp = TInstant::MicroSeconds(paramRow.GetValueOrDefault(0)); config = paramRow.GetValueOrDefault(NKikimrCms::TCmsConfig()); LOG_DEBUG_S(ctx, NKikimrServices::CMS, "Loaded config: " << config.ShortDebugString()); } else { + FirstBoot = true; + state->NextPermissionId = 1; state->NextRequestId = 1; state->NextNotificationId = 1; + state->FirstBootTimestamp = ctx.Now(); LOG_DEBUG_S(ctx, NKikimrServices::CMS, "Using default config"); @@ -261,7 +267,13 @@ class TCms::TTxLoadState : public TTransactionBase { Self->ScheduleLogCleanup(ctx); Self->ScheduleUpdateClusterInfo(ctx, true); Self->ProcessInitQueue(ctx); + + if (FirstBoot) { + Self->Execute(Self->CreateTxStoreFirstBootTimestamp(), ctx); + } } +private: + bool FirstBoot = false; }; ITransaction *TCms::CreateTxLoadState() { diff --git a/ydb/core/cms/cms_tx_process_notification.cpp b/ydb/core/cms/cms_tx_process_notification.cpp index 5a9bd990139b..0a21503f4449 100644 --- a/ydb/core/cms/cms_tx_process_notification.cpp +++ b/ydb/core/cms/cms_tx_process_notification.cpp @@ -30,7 +30,7 @@ class TCms::TTxProcessNotification : public TTransactionBase { Response->Record.SetNotificationId(id); NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1) + db.Table().Key(Schema::Param::Key) .Update(NIceDb::TUpdate(Self->State->NextNotificationId)); TString notificationStr; diff --git a/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp b/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp new file mode 100644 index 000000000000..5b71608451df --- /dev/null +++ b/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp @@ -0,0 +1,33 @@ +#include "cms_impl.h" +#include "scheme.h" + +namespace NKikimr::NCms { + +class TCms::TTxStoreFirstBootTimestamp : public TTransactionBase { +public: + TTxStoreFirstBootTimestamp(TCms *self) + : TBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_STORE_FIRST_BOOT_TIMESTAMP; } + + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { + LOG_DEBUG_S(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Execute"); + + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Schema::Param::Key) + .Update(Self->State->FirstBootTimestamp.MicroSeconds()); + + return true; + } + + void Complete(const TActorContext &ctx) override { + LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Complete"); + } +}; + +ITransaction *TCms::CreateTxStoreFirstBootTimestamp() { + return new TTxStoreFirstBootTimestamp(this); +} + +} // namespace NKikimr::NCms diff --git a/ydb/core/cms/cms_tx_store_permissions.cpp b/ydb/core/cms/cms_tx_store_permissions.cpp index fd6086c3d640..be31b8fb6196 100644 --- a/ydb/core/cms/cms_tx_store_permissions.cpp +++ b/ydb/core/cms/cms_tx_store_permissions.cpp @@ -26,8 +26,10 @@ class TCms::TTxStorePermissions : public TTransactionBase { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStorePermissions Execute"); NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1).Update(NIceDb::TUpdate(NextPermissionId), - NIceDb::TUpdate(NextRequestId)); + db.Table().Key(Schema::Param::Key).Update( + NIceDb::TUpdate(NextPermissionId), + NIceDb::TUpdate(NextRequestId) + ); const auto &rec = Response->Get()->Record; diff --git a/ydb/core/cms/cms_tx_update_config.cpp b/ydb/core/cms/cms_tx_update_config.cpp index ebd4ae51bbcd..bbd68faf0a5c 100644 --- a/ydb/core/cms/cms_tx_update_config.cpp +++ b/ydb/core/cms/cms_tx_update_config.cpp @@ -24,7 +24,7 @@ class TCms::TTxUpdateConfig : public TTransactionBase { if (!google::protobuf::util::MessageDifferencer::Equals(Config, Self->State->ConfigProto)) { NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1) + db.Table().Key(Schema::Param::Key) .Update(Config); Modify = true; diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index 272294bce22c..a3b9614d3a0d 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -78,6 +78,8 @@ struct TCmsSentinelConfig { TStateStorageSelfHealConfig StateStorageSelfHealConfig; + TDuration InitialDeploymentGracePeriod; + void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { config.SetEnable(Enable); config.SetDryRun(DryRun); @@ -100,6 +102,8 @@ struct TCmsSentinelConfig { SaveStateLimits(config); SaveEvictVDisksStatus(config); + + config.SetInitialDeploymentGracePeriod(InitialDeploymentGracePeriod.GetValue()); } void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { @@ -125,6 +129,8 @@ struct TCmsSentinelConfig { StateLimits.swap(newStateLimits); EvictVDisksStatus = LoadEvictVDisksStatus(config); + + InitialDeploymentGracePeriod = TDuration::MicroSeconds(config.GetInitialDeploymentGracePeriod()); } void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { diff --git a/ydb/core/cms/scheme.h b/ydb/core/cms/scheme.h index 672bac94fe72..a591b84ffdc3 100644 --- a/ydb/core/cms/scheme.h +++ b/ydb/core/cms/scheme.h @@ -9,16 +9,19 @@ namespace NKikimr::NCms { struct Schema : NIceDb::Schema { struct Param : Table<1> { + static constexpr ui32 Key = 1; + struct ID : Column<1, NScheme::NTypeIds::Uint32> {}; struct NextPermissionID : Column<2, NScheme::NTypeIds::Uint64> {}; struct NextRequestID : Column<3, NScheme::NTypeIds::Uint64> {}; struct NextNotificationID : Column<4, NScheme::NTypeIds::Uint64> {}; struct Config : Column<5, NScheme::NTypeIds::String> { using Type = NKikimrCms::TCmsConfig; }; struct LastLogRecordTimestamp : Column<6, NScheme::NTypeIds::Uint64> {}; + struct FirstBootTimestamp : Column<7, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; using TColumns = TableColumns; + Config, LastLogRecordTimestamp, FirstBootTimestamp>; }; struct Permission : Table<2> { diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index a624607d7517..76b3b8e35ca6 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -85,11 +85,14 @@ void TNodeStatusComputer::AddState(ENodeState newState) { /// TPDiskStatusComputer -TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) +TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) : DefaultStateLimit(defaultStateLimit) , GoodStateLimit(goodStateLimit) , StateLimits(stateLimits) , StateCounter(0) + , CMSFirstBootTimestamp(cmsFirstBootTimestamp) + , InitialDeploymentGracePeriod(initialDeploymentGracePeriod) { } @@ -171,7 +174,11 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason } } - return EPDiskStatus::INACTIVE; + if (IsInitialDeploymentGracePeriod() && State == NKikimrBlobStorage::TPDiskState::Normal) { + return EPDiskStatus::ACTIVE; + } else { + return EPDiskStatus::INACTIVE; + } } reason = TStringBuilder() @@ -219,10 +226,25 @@ void TPDiskStatusComputer::ResetForcedStatus() { ForcedStatus.Clear(); } +bool TPDiskStatusComputer::IsInitialDeploymentGracePeriod() const { + if (TlsActivationContext) { + return CMSFirstBootTimestamp + InitialDeploymentGracePeriod > TActivationContext::Now(); + } else { + return false; // unsupported outside of actorsystem + } +} + /// TPDiskStatus + TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) - : TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits) + : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits, TInstant::Zero(), TDuration::Zero()) +{} + +TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) + : TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits, cmsFirstBootTimestamp, initialDeploymentGracePeriod) , Current(initialStatus) , ChangingAllowed(true) { @@ -284,8 +306,11 @@ void TPDiskStatus::DisallowChanging() { /// TPDiskInfo -TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) - : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits) +TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) + : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits, + cmsFirstBootTimestamp, initialDeploymentGracePeriod) , ActualStatus(initialStatus) { Touch(); @@ -619,7 +644,9 @@ class TConfigUpdater: public TUpdaterBaseFirstBootTimestamp, Config.InitialDeploymentGracePeriod)); } SentinelState->ConfigUpdaterState.GotBSCResponse = true; diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 829c4261c323..febce2e4bea9 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -16,7 +16,8 @@ using TLimitsMap = TMap; class TPDiskStatusComputer { public: - explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); void AddState(EPDiskState state, bool isNodeLocked); EPDiskStatus Compute(EPDiskStatus current, TString& reason) const; @@ -31,6 +32,8 @@ class TPDiskStatusComputer { bool HasForcedStatus() const; void ResetForcedStatus(); + bool IsInitialDeploymentGracePeriod() const; + private: const ui32& DefaultStateLimit; const ui32& GoodStateLimit; @@ -43,11 +46,17 @@ class TPDiskStatusComputer { mutable bool HadBadStateRecently = false; + TInstant CMSFirstBootTimestamp; + const TDuration& InitialDeploymentGracePeriod; + }; // TPDiskStatusComputer class TPDiskStatus: public TPDiskStatusComputer { public: explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); void AddState(EPDiskState state, bool isNodeLocked); bool IsChanged() const; @@ -96,7 +105,9 @@ struct TPDiskInfo ui32 PrevStatusChangeAttempt = 0; EIgnoreReason IgnoreReason = NKikimrCms::TPDiskInfo::NOT_IGNORED; - explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); bool IsTouched() const { return Touched; } void Touch() { Touched = true; } diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index 037b78c4bc65..f8d6accfbf88 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -866,6 +866,37 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { UNIT_ASSERT(computer.GetCurrentNodeState() == TNodeStatusComputer::ENodeState::GOOD); } + Y_UNIT_TEST(InitialDeploymentGracePeriod) { + NKikimrCms::TCmsConfig config; + config.MutableSentinelConfig()->SetInitialDeploymentGracePeriod(TDuration::Minutes(10).GetValue()); + TTestEnv env(8, 4, config); + + const TPDiskID id = env.RandomPDiskID(); + + Cerr << "...Initializing" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Initial, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE); + + Cerr << "...Disconnected" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally again" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE); + + Cerr << "...Initial deployment grace period is over" << Endl; + env.AdvanceCurrentTime(TDuration::Minutes(15)); + + Cerr << "...Disconnected" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally again, but no fast path to ACTIVE" << Endl; + for (ui32 i = 1; i < DefaultStateLimit; ++i) { + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal); + } + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + } } // TSentinelTests } diff --git a/ydb/core/cms/sentinel_ut_helpers.h b/ydb/core/cms/sentinel_ut_helpers.h index 65e355249b0a..eb618340bbf4 100644 --- a/ydb/core/cms/sentinel_ut_helpers.h +++ b/ydb/core/cms/sentinel_ut_helpers.h @@ -83,7 +83,7 @@ class TTestEnv: public TCmsTestEnv { } public: - explicit TTestEnv(ui32 nodeCount, ui32 pdisks, const NKikimrCms::TCmsConfig &config = {}) + explicit TTestEnv(ui32 nodeCount, ui32 pdisks, NKikimrCms::TCmsConfig config = {}) : TCmsTestEnv(nodeCount, pdisks) { SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); @@ -123,6 +123,11 @@ class TTestEnv: public TCmsTestEnv { }); State = new TCmsState; + + auto* sentinelConfig = config.MutableSentinelConfig(); + if (!sentinelConfig->HasInitialDeploymentGracePeriod()) { + sentinelConfig->SetInitialDeploymentGracePeriod(0); + } State->Config.Deserialize(config); MockClusterInfo(State->ClusterInfo); State->CmsActorId = GetSender(); diff --git a/ydb/core/cms/ut_sentinel/ya.make b/ydb/core/cms/ut_sentinel/ya.make index 160b4adbda8a..866eb861435e 100644 --- a/ydb/core/cms/ut_sentinel/ya.make +++ b/ydb/core/cms/ut_sentinel/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(ydb/core/cms) +FORK_SUBTESTS() + SIZE(MEDIUM) PEERDIR( diff --git a/ydb/core/cms/ya.make b/ydb/core/cms/ya.make index 38754c144ea1..e79c5707e8ae 100644 --- a/ydb/core/cms/ya.make +++ b/ydb/core/cms/ya.make @@ -21,6 +21,7 @@ SRCS( cms_tx_remove_permissions.cpp cms_tx_remove_request.cpp cms_tx_remove_task.cpp + cms_tx_store_first_boot_timestamp.cpp cms_tx_store_permissions.cpp cms_tx_store_walle_task.cpp cms_tx_update_config.cpp diff --git a/ydb/core/protos/cms.proto b/ydb/core/protos/cms.proto index 52fe7a2bf32b..7fbed90df24c 100644 --- a/ydb/core/protos/cms.proto +++ b/ydb/core/protos/cms.proto @@ -488,6 +488,7 @@ message TCmsConfig { optional uint32 GoodStateLimit = 16 [default = 5]; optional TStateStorageSelfHealConfig StateStorageSelfHealConfig = 19; + optional uint64 InitialDeploymentGracePeriod = 20 [default = 600000000]; } message TLogConfig { diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto index 331e8e0e3353..aaf3bac01a8a 100644 --- a/ydb/core/protos/counters_cms.proto +++ b/ydb/core/protos/counters_cms.proto @@ -64,4 +64,5 @@ enum ETxTypes { TXTYPE_STORE_WALLE_TASK = 13 [(TxTypeOpts) = {Name: "TxStoreWalleTask"}]; TXTYPE_UPDATE_CONFIG = 14 [(TxTypeOpts) = {Name: "TxUpdateConfig"}]; TXTYPE_UPDATE_DOWNTIMES = 15 [(TxTypeOpts) = {Name: "TxUpdateDowntimes"}]; + TXTYPE_STORE_FIRST_BOOT_TIMESTAMP = 16 [(TxTypeOpts) = {Name: "TxStoreFirstBootTimestamp"}]; }