From 4d6cfca29d56a47fbe1afc4256d533ccd1fbdee9 Mon Sep 17 00:00:00 2001 From: Ilia Shakhov Date: Wed, 26 Nov 2025 19:50:23 +0300 Subject: [PATCH] Fix that disks may become INACTIVE during cluster bootstrap (#28886) --- ydb/core/cms/cms_impl.h | 2 + ydb/core/cms/cms_state.h | 1 + ydb/core/cms/cms_tx_load_state.cpp | 14 ++++++- ydb/core/cms/cms_tx_process_notification.cpp | 2 +- .../cms/cms_tx_store_first_boot_timestamp.cpp | 33 ++++++++++++++++ ydb/core/cms/cms_tx_store_permissions.cpp | 6 ++- ydb/core/cms/cms_tx_update_config.cpp | 2 +- ydb/core/cms/config.h | 6 +++ ydb/core/cms/scheme.h | 5 ++- ydb/core/cms/sentinel.cpp | 39 ++++++++++++++++--- ydb/core/cms/sentinel_impl.h | 15 ++++++- ydb/core/cms/sentinel_ut.cpp | 32 +++++++++++++++ ydb/core/cms/sentinel_ut_helpers.h | 8 +++- ydb/core/cms/ut_sentinel/ya.make | 2 + ydb/core/cms/ya.make | 1 + ydb/core/protos/cms.proto | 1 + ydb/core/protos/counters_cms.proto | 1 + 17 files changed, 155 insertions(+), 15 deletions(-) create mode 100644 ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index 85415ab083b8..5a95d00a7536 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -105,6 +105,7 @@ class TCms : public TActor, public TTabletExecutedFlat { class TTxStoreWalleTask; class TTxUpdateConfig; class TTxUpdateDowntimes; + class TTxStoreFirstBootTimestamp; struct TActionOptions { TDuration PermissionDuration; @@ -149,6 +150,7 @@ class TCms : public TActor, public TTabletExecutedFlat { ITransaction *CreateTxUpdateConfig(TEvCms::TEvSetConfigRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev); ITransaction *CreateTxUpdateDowntimes(); + ITransaction *CreateTxStoreFirstBootTimestamp(); static void AuditLog(const TActorContext &ctx, const TString &message) { NCms::AuditLog("CMS tablet", message, ctx); diff --git a/ydb/core/cms/cms_state.h b/ydb/core/cms/cms_state.h index c6b5e5ba4dc6..817137095c1f 100644 --- a/ydb/core/cms/cms_state.h +++ b/ydb/core/cms/cms_state.h @@ -39,6 +39,7 @@ struct TCmsState : public TAtomicRefCount { ui64 NextRequestId = 0; ui64 NextNotificationId = 0; ui64 LastLogRecordTimestamp = 0; + TInstant FirstBootTimestamp; // State of Wall-E tasks. THashMap WalleTasks; diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp index 5df136c557b7..57a278e669e8 100644 --- a/ydb/core/cms/cms_tx_load_state.cpp +++ b/ydb/core/cms/cms_tx_load_state.cpp @@ -38,7 +38,7 @@ class TCms::TTxLoadState : public TTransactionBase { if (!db.Precharge()) return false; - auto paramRow = db.Table().Key(1).Select(); + auto paramRow = db.Table().Key(Schema::Param::Key).Select(); auto permissionRowset = db.Table().Range().Select(); auto requestRowset = db.Table().Range().Select(); auto walleTaskRowset = db.Table().Range().Select(); @@ -60,17 +60,23 @@ class TCms::TTxLoadState : public TTransactionBase { NKikimrCms::TCmsConfig config; if (paramRow.IsValid()) { + FirstBoot = false; + state->NextPermissionId = paramRow.GetValueOrDefault(1); state->NextRequestId = paramRow.GetValueOrDefault(1); state->NextNotificationId = paramRow.GetValueOrDefault(1); + state->FirstBootTimestamp = TInstant::MicroSeconds(paramRow.GetValueOrDefault(0)); config = paramRow.GetValueOrDefault(NKikimrCms::TCmsConfig()); LOG_DEBUG_S(ctx, NKikimrServices::CMS, "Loaded config: " << config.ShortDebugString()); } else { + FirstBoot = true; + state->NextPermissionId = 1; state->NextRequestId = 1; state->NextNotificationId = 1; + state->FirstBootTimestamp = ctx.Now(); LOG_DEBUG_S(ctx, NKikimrServices::CMS, "Using default config"); @@ -257,7 +263,13 @@ class TCms::TTxLoadState : public TTransactionBase { Self->ScheduleLogCleanup(ctx); Self->ScheduleUpdateClusterInfo(ctx, true); Self->ProcessInitQueue(ctx); + + if (FirstBoot) { + Self->Execute(Self->CreateTxStoreFirstBootTimestamp(), ctx); + } } +private: + bool FirstBoot = false; }; ITransaction *TCms::CreateTxLoadState() { diff --git a/ydb/core/cms/cms_tx_process_notification.cpp b/ydb/core/cms/cms_tx_process_notification.cpp index 5a9bd990139b..0a21503f4449 100644 --- a/ydb/core/cms/cms_tx_process_notification.cpp +++ b/ydb/core/cms/cms_tx_process_notification.cpp @@ -30,7 +30,7 @@ class TCms::TTxProcessNotification : public TTransactionBase { Response->Record.SetNotificationId(id); NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1) + db.Table().Key(Schema::Param::Key) .Update(NIceDb::TUpdate(Self->State->NextNotificationId)); TString notificationStr; diff --git a/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp b/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp new file mode 100644 index 000000000000..5b71608451df --- /dev/null +++ b/ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp @@ -0,0 +1,33 @@ +#include "cms_impl.h" +#include "scheme.h" + +namespace NKikimr::NCms { + +class TCms::TTxStoreFirstBootTimestamp : public TTransactionBase { +public: + TTxStoreFirstBootTimestamp(TCms *self) + : TBase(self) + {} + + TTxType GetTxType() const override { return TXTYPE_STORE_FIRST_BOOT_TIMESTAMP; } + + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { + LOG_DEBUG_S(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Execute"); + + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Schema::Param::Key) + .Update(Self->State->FirstBootTimestamp.MicroSeconds()); + + return true; + } + + void Complete(const TActorContext &ctx) override { + LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Complete"); + } +}; + +ITransaction *TCms::CreateTxStoreFirstBootTimestamp() { + return new TTxStoreFirstBootTimestamp(this); +} + +} // namespace NKikimr::NCms diff --git a/ydb/core/cms/cms_tx_store_permissions.cpp b/ydb/core/cms/cms_tx_store_permissions.cpp index b85160bac412..746124c80657 100644 --- a/ydb/core/cms/cms_tx_store_permissions.cpp +++ b/ydb/core/cms/cms_tx_store_permissions.cpp @@ -26,8 +26,10 @@ class TCms::TTxStorePermissions : public TTransactionBase { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStorePermissions Execute"); NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1).Update(NIceDb::TUpdate(NextPermissionId), - NIceDb::TUpdate(NextRequestId)); + db.Table().Key(Schema::Param::Key).Update( + NIceDb::TUpdate(NextPermissionId), + NIceDb::TUpdate(NextRequestId) + ); const auto &rec = Response->Get()->Record; diff --git a/ydb/core/cms/cms_tx_update_config.cpp b/ydb/core/cms/cms_tx_update_config.cpp index ebd4ae51bbcd..bbd68faf0a5c 100644 --- a/ydb/core/cms/cms_tx_update_config.cpp +++ b/ydb/core/cms/cms_tx_update_config.cpp @@ -24,7 +24,7 @@ class TCms::TTxUpdateConfig : public TTransactionBase { if (!google::protobuf::util::MessageDifferencer::Equals(Config, Self->State->ConfigProto)) { NIceDb::TNiceDb db(txc.DB); - db.Table().Key(1) + db.Table().Key(Schema::Param::Key) .Update(Config); Modify = true; diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index c2aad9eec45a..a35e9fdb405c 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -35,6 +35,8 @@ struct TCmsSentinelConfig { TMaybeFail EvictVDisksStatus; + TDuration InitialDeploymentGracePeriod; + void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { config.SetEnable(Enable); config.SetDryRun(DryRun); @@ -52,6 +54,8 @@ struct TCmsSentinelConfig { SaveStateLimits(config); SaveEvictVDisksStatus(config); + + config.SetInitialDeploymentGracePeriod(InitialDeploymentGracePeriod.GetValue()); } void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { @@ -73,6 +77,8 @@ struct TCmsSentinelConfig { StateLimits.swap(newStateLimits); EvictVDisksStatus = LoadEvictVDisksStatus(config); + + InitialDeploymentGracePeriod = TDuration::MicroSeconds(config.GetInitialDeploymentGracePeriod()); } void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { diff --git a/ydb/core/cms/scheme.h b/ydb/core/cms/scheme.h index bd9a4f38f249..811aeaf8ecbe 100644 --- a/ydb/core/cms/scheme.h +++ b/ydb/core/cms/scheme.h @@ -9,16 +9,19 @@ namespace NKikimr::NCms { struct Schema : NIceDb::Schema { struct Param : Table<1> { + static constexpr ui32 Key = 1; + struct ID : Column<1, NScheme::NTypeIds::Uint32> {}; struct NextPermissionID : Column<2, NScheme::NTypeIds::Uint64> {}; struct NextRequestID : Column<3, NScheme::NTypeIds::Uint64> {}; struct NextNotificationID : Column<4, NScheme::NTypeIds::Uint64> {}; struct Config : Column<5, NScheme::NTypeIds::String> { using Type = NKikimrCms::TCmsConfig; }; struct LastLogRecordTimestamp : Column<6, NScheme::NTypeIds::Uint64> {}; + struct FirstBootTimestamp : Column<7, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; using TColumns = TableColumns; + Config, LastLogRecordTimestamp, FirstBootTimestamp>; }; struct Permission : Table<2> { diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index e320fa1fba77..c7d35895a7e2 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -43,11 +43,14 @@ namespace NSentinel { /// TPDiskStatusComputer -TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) +TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) : DefaultStateLimit(defaultStateLimit) , GoodStateLimit(goodStateLimit) , StateLimits(stateLimits) , StateCounter(0) + , CMSFirstBootTimestamp(cmsFirstBootTimestamp) + , InitialDeploymentGracePeriod(initialDeploymentGracePeriod) { } @@ -129,7 +132,11 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason } } - return EPDiskStatus::INACTIVE; + if (IsInitialDeploymentGracePeriod() && State == NKikimrBlobStorage::TPDiskState::Normal) { + return EPDiskStatus::ACTIVE; + } else { + return EPDiskStatus::INACTIVE; + } } reason = TStringBuilder() @@ -177,10 +184,25 @@ void TPDiskStatusComputer::ResetForcedStatus() { ForcedStatus.Clear(); } +bool TPDiskStatusComputer::IsInitialDeploymentGracePeriod() const { + if (TlsActivationContext) { + return CMSFirstBootTimestamp + InitialDeploymentGracePeriod > TActivationContext::Now(); + } else { + return false; // unsupported outside of actorsystem + } +} + /// TPDiskStatus + TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) - : TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits) + : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits, TInstant::Zero(), TDuration::Zero()) +{} + +TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) + : TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits, cmsFirstBootTimestamp, initialDeploymentGracePeriod) , Current(initialStatus) , ChangingAllowed(true) { @@ -242,8 +264,11 @@ void TPDiskStatus::DisallowChanging() { /// TPDiskInfo -TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits) - : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits) +TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod) + : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits, + cmsFirstBootTimestamp, initialDeploymentGracePeriod) , ActualStatus(initialStatus) { Touch(); @@ -520,7 +545,9 @@ class TConfigUpdater: public TUpdaterBaseFirstBootTimestamp, Config.InitialDeploymentGracePeriod)); } SentinelState->ConfigUpdaterState.GotBSCResponse = true; diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 697855ae7aee..fc65641b9653 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -15,7 +15,8 @@ using TLimitsMap = TMap; class TPDiskStatusComputer { public: - explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); void AddState(EPDiskState state, bool isNodeLocked); EPDiskStatus Compute(EPDiskStatus current, TString& reason) const; @@ -30,6 +31,8 @@ class TPDiskStatusComputer { bool HasForcedStatus() const; void ResetForcedStatus(); + bool IsInitialDeploymentGracePeriod() const; + private: const ui32& DefaultStateLimit; const ui32& GoodStateLimit; @@ -42,11 +45,17 @@ class TPDiskStatusComputer { mutable bool HadBadStateRecently = false; + TInstant CMSFirstBootTimestamp; + const TDuration& InitialDeploymentGracePeriod; + }; // TPDiskStatusComputer class TPDiskStatus: public TPDiskStatusComputer { public: explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); void AddState(EPDiskState state, bool isNodeLocked); bool IsChanged() const; @@ -95,7 +104,9 @@ struct TPDiskInfo ui32 PrevStatusChangeAttempt = 0; EIgnoreReason IgnoreReason = NKikimrCms::TPDiskInfo::NOT_IGNORED; - explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits); + explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, + const ui32& goodStateLimit, const TLimitsMap& stateLimits, + TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod); bool IsTouched() const { return Touched; } void Touch() { Touched = true; } diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index c4afd877b7e2..e77ff5ada37f 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -550,6 +550,38 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { env.SetPDiskState({id1, id2, id3}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); } } + + Y_UNIT_TEST(InitialDeploymentGracePeriod) { + NKikimrCms::TCmsConfig config; + config.MutableSentinelConfig()->SetInitialDeploymentGracePeriod(TDuration::Minutes(10).GetValue()); + TTestEnv env(8, 4, config); + + const TPDiskID id = env.RandomPDiskID(); + + Cerr << "...Initializing" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Initial, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE); + + Cerr << "...Disconnected" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally again" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE); + + Cerr << "...Initial deployment grace period is over" << Endl; + env.AdvanceCurrentTime(TDuration::Minutes(15)); + + Cerr << "...Disconnected" << Endl; + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE); + + Cerr << "...Working normally again, but no fast path to ACTIVE" << Endl; + for (ui32 i = 1; i < DefaultStateLimit; ++i) { + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal); + } + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + } } // TSentinelTests } diff --git a/ydb/core/cms/sentinel_ut_helpers.h b/ydb/core/cms/sentinel_ut_helpers.h index 2f8be68fdf2e..aa24986237c1 100644 --- a/ydb/core/cms/sentinel_ut_helpers.h +++ b/ydb/core/cms/sentinel_ut_helpers.h @@ -83,7 +83,7 @@ class TTestEnv: public TCmsTestEnv { } public: - explicit TTestEnv(ui32 nodeCount, ui32 pdisks) + explicit TTestEnv(ui32 nodeCount, ui32 pdisks, NKikimrCms::TCmsConfig config = {}) : TCmsTestEnv(nodeCount, pdisks) { SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); @@ -123,6 +123,12 @@ class TTestEnv: public TCmsTestEnv { }); State = new TCmsState; + + auto* sentinelConfig = config.MutableSentinelConfig(); + if (!sentinelConfig->HasInitialDeploymentGracePeriod()) { + sentinelConfig->SetInitialDeploymentGracePeriod(0); + } + State->Config.Deserialize(config); MockClusterInfo(State->ClusterInfo); State->CmsActorId = GetSender(); diff --git a/ydb/core/cms/ut_sentinel/ya.make b/ydb/core/cms/ut_sentinel/ya.make index 160b4adbda8a..866eb861435e 100644 --- a/ydb/core/cms/ut_sentinel/ya.make +++ b/ydb/core/cms/ut_sentinel/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(ydb/core/cms) +FORK_SUBTESTS() + SIZE(MEDIUM) PEERDIR( diff --git a/ydb/core/cms/ya.make b/ydb/core/cms/ya.make index 1d4c9845dc5f..3d6a40be8e7f 100644 --- a/ydb/core/cms/ya.make +++ b/ydb/core/cms/ya.make @@ -21,6 +21,7 @@ SRCS( cms_tx_remove_permissions.cpp cms_tx_remove_request.cpp cms_tx_remove_task.cpp + cms_tx_store_first_boot_timestamp.cpp cms_tx_store_permissions.cpp cms_tx_store_walle_task.cpp cms_tx_update_config.cpp diff --git a/ydb/core/protos/cms.proto b/ydb/core/protos/cms.proto index 8375aec1b8be..990032967e46 100644 --- a/ydb/core/protos/cms.proto +++ b/ydb/core/protos/cms.proto @@ -458,6 +458,7 @@ message TCmsConfig { optional EEvictVDisksStatus EvictVDisksStatus = 15; optional uint32 GoodStateLimit = 16 [default = 5]; + optional uint64 InitialDeploymentGracePeriod = 20 [default = 600000000]; } message TLogConfig { diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto index 331e8e0e3353..aaf3bac01a8a 100644 --- a/ydb/core/protos/counters_cms.proto +++ b/ydb/core/protos/counters_cms.proto @@ -64,4 +64,5 @@ enum ETxTypes { TXTYPE_STORE_WALLE_TASK = 13 [(TxTypeOpts) = {Name: "TxStoreWalleTask"}]; TXTYPE_UPDATE_CONFIG = 14 [(TxTypeOpts) = {Name: "TxUpdateConfig"}]; TXTYPE_UPDATE_DOWNTIMES = 15 [(TxTypeOpts) = {Name: "TxUpdateDowntimes"}]; + TXTYPE_STORE_FIRST_BOOT_TIMESTAMP = 16 [(TxTypeOpts) = {Name: "TxStoreFirstBootTimestamp"}]; }