From fa10d601265343a64d9d5b7b32eb133010b24127 Mon Sep 17 00:00:00 2001 From: pechatnov Date: Tue, 6 May 2025 11:09:14 +0300 Subject: [PATCH 01/24] YT: Fix enricher, add from backtrace enricher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From backtrace enricher можно использовать для захвата бектрейсов при создании ошибок из исключений, а так же для перекладывания атрибутов из сложных исключений других библиотек. commit_hash:76711bd3bb7dbc1e41e43f80d43340d2ce8e4df7 --- library/cpp/yt/error/error.cpp | 42 ++++++++++--- library/cpp/yt/error/error.h | 15 ++++- library/cpp/yt/error/unittests/error_ut.cpp | 55 +++++++++++++++++ yt/yt/core/actions/future-inl.h | 3 +- yt/yt/core/actions/unittests/future_ut.cpp | 68 ++++++++++++++++++++- yt/yt/core/misc/error.cpp | 4 +- 6 files changed, 170 insertions(+), 17 deletions(-) diff --git a/library/cpp/yt/error/error.cpp b/library/cpp/yt/error/error.cpp index ea71a07feed3..c6e806fc58d4 100644 --- a/library/cpp/yt/error/error.cpp +++ b/library/cpp/yt/error/error.cpp @@ -30,6 +30,7 @@ void FormatValue(TStringBuilderBase* builder, TErrorCode code, TStringBuf spec) constexpr TStringBuf ErrorMessageTruncatedSuffix = "..."; TError::TEnricher TError::Enricher_; +TError::TFromExceptionEnricher TError::FromExceptionEnricher_; //////////////////////////////////////////////////////////////////////////////// @@ -248,6 +249,7 @@ TError::TErrorOr(const TErrorException& errorEx) noexcept { *this = errorEx.Error(); // NB: TErrorException verifies that error not IsOK at throwing end. + EnrichFromException(errorEx); } TError::TErrorOr(const std::exception& ex) @@ -277,8 +279,8 @@ TError::TErrorOr(const std::exception& ex) *this = TError(NYT::EErrorCode::Generic, TRuntimeFormat{ex.what()}); *this <<= TErrorAttribute("exception_type", TypeName(ex)); } + EnrichFromException(ex); YT_VERIFY(!IsOK()); - Enrich(); } TError::TErrorOr(std::string message, TDisableFormat) @@ -644,14 +646,31 @@ void TError::RegisterEnricher(TEnricher enricher) { // NB: This daisy-chaining strategy is optimal when there's O(1) callbacks. Convert to a vector // if the number grows. - if (Enricher_) { - Enricher_ = [first = std::move(Enricher_), second = std::move(enricher)] (TError& error) { - first(error); - second(error); - }; - } else { + if (!Enricher_) { Enricher_ = std::move(enricher); + return; + } + Enricher_ = [first = std::move(Enricher_), second = std::move(enricher)] (TError* error) { + first(error); + second(error); + }; +} + +void TError::RegisterFromExceptionEnricher(TFromExceptionEnricher enricher) +{ + // NB: This daisy-chaining strategy is optimal when there's O(1) callbacks. Convert to a vector + // if the number grows. + if (!FromExceptionEnricher_) { + FromExceptionEnricher_ = std::move(enricher); + return; } + FromExceptionEnricher_ = [ + first = std::move(FromExceptionEnricher_), + second = std::move(enricher) + ] (TError* error, const std::exception& exception) { + first(error, exception); + second(error, exception); + }; } TError::TErrorOr(std::unique_ptr impl) @@ -668,7 +687,14 @@ void TError::MakeMutable() void TError::Enrich() { if (Enricher_) { - Enricher_(*this); + Enricher_(this); + } +} + +void TError::EnrichFromException(const std::exception& exception) +{ + if (FromExceptionEnricher_) { + FromExceptionEnricher_(this, exception); } } diff --git a/library/cpp/yt/error/error.h b/library/cpp/yt/error/error.h index 92f19bc398a6..4b329105bcb5 100644 --- a/library/cpp/yt/error/error.h +++ b/library/cpp/yt/error/error.h @@ -219,13 +219,20 @@ class [[nodiscard]] TErrorOr template TError operator << (const std::optional& rhs) const &; - // The |enricher| is called during TError construction and before TErrorOr<> construction. Meant - // to enrich the error, e.g. by setting generic attributes. The |RegisterEnricher| method is not + // The |enricher| is called during TError initial construction and before TErrorOr<> construction. Meant + // to enrich the error, e.g. by setting generic attributes. Copying TError from another TError or TErrorException + // doesn't call enrichers. The |RegisterEnricher| method is not // threadsafe and is meant to be called from single-threaded bootstrapping code. Multiple // enrichers are supported and will be called in order of registration. - using TEnricher = std::function; + using TEnricher = std::function; static void RegisterEnricher(TEnricher enricher); + // The |enricher| is called during TError every construction from std::exception (including TErrorException). + // The |RegisterFromExceptionEnricher| method is not threadsafe and is meant to be called from single-threaded + // bootstrapping code. Multiple enrichers are supported and will be called in order of registration. + using TFromExceptionEnricher = std::function; + static void RegisterFromExceptionEnricher(TFromExceptionEnricher enricher); + private: class TImpl; std::unique_ptr Impl_; @@ -234,10 +241,12 @@ class [[nodiscard]] TErrorOr void MakeMutable(); void Enrich(); + void EnrichFromException(const std::exception& exception); friend class TErrorAttributes; static TEnricher Enricher_; + static TFromExceptionEnricher FromExceptionEnricher_; }; //////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/yt/error/unittests/error_ut.cpp b/library/cpp/yt/error/unittests/error_ut.cpp index a5576fad58a0..33a8e8fa9654 100644 --- a/library/cpp/yt/error/unittests/error_ut.cpp +++ b/library/cpp/yt/error/unittests/error_ut.cpp @@ -789,6 +789,61 @@ TEST(TErrorTest, MacroStaticAnalysisBrokenFormat) // }); } +TEST(TErrorTest, Enrichers) +{ + static auto getAttribute = [] (const TError& error) { + return error.Attributes().Get("test_attribute", ""); + }; + + { + static thread_local bool testEnricherEnabled = false; + testEnricherEnabled = true; + + TError::RegisterEnricher([](TError* error) { + if (testEnricherEnabled) { + *error <<= TErrorAttribute("test_attribute", getAttribute(*error) + "X"); + } + }); + + // Not from exception. + EXPECT_EQ(getAttribute(TError("E")), "X"); + EXPECT_EQ(getAttribute(TError(NYT::EErrorCode::Generic, "E")), "X"); + + // std::exception. + EXPECT_EQ(getAttribute(TError(std::runtime_error("E"))), "X"); + + // Copying. + EXPECT_EQ(getAttribute(TError(TError(std::runtime_error("E")))), "X"); + EXPECT_EQ(getAttribute(TError(TErrorException() <<= TError(std::runtime_error("E")))), "X"); + + testEnricherEnabled = false; + } + + { + static thread_local bool testFromExceptionEnricherEnabled = false; + testFromExceptionEnricherEnabled = true; + + TError::RegisterFromExceptionEnricher([](TError* error, const std::exception&) { + if (testFromExceptionEnricherEnabled) { + *error <<= TErrorAttribute("test_attribute", getAttribute(*error) + "X"); + } + }); + + // Not from exception. + EXPECT_EQ(getAttribute(TError("E")), ""); + EXPECT_EQ(getAttribute(TError(NYT::EErrorCode::Generic, "E")), ""); + + // From exception. + EXPECT_EQ(getAttribute(TError(std::runtime_error("E"))), "X"); + EXPECT_EQ(getAttribute(TError(TError(std::runtime_error("E")))), "X"); + + // From exception twice. + EXPECT_EQ(getAttribute(TError(TErrorException() <<= TError(std::runtime_error("E")))), "XX"); + + testFromExceptionEnricherEnabled = false; + } +} + //////////////////////////////////////////////////////////////////////////////// } // namespace diff --git a/yt/yt/core/actions/future-inl.h b/yt/yt/core/actions/future-inl.h index 30254e77d0d3..d5c52ee03a39 100644 --- a/yt/yt/core/actions/future-inl.h +++ b/yt/yt/core/actions/future-inl.h @@ -722,7 +722,8 @@ void InterceptExceptions(const TPromise& promise, const F& func) auto guard = MakeFutureCurrentTokenGuard(promise.Impl_.Get()); func(); } catch (const NYT::TErrorException& ex) { - promise.Set(ex.Error()); + // Create error explicitly from exception so that FromExceptionEnricher is called. + promise.Set(NYT::TError(ex)); } catch (const std::exception& ex) { promise.Set(NYT::TError(ex)); } catch (const NConcurrency::TFiberCanceledException&) { diff --git a/yt/yt/core/actions/unittests/future_ut.cpp b/yt/yt/core/actions/unittests/future_ut.cpp index 7ae6e2bbb5b5..19a6a08e6849 100644 --- a/yt/yt/core/actions/unittests/future_ut.cpp +++ b/yt/yt/core/actions/unittests/future_ut.cpp @@ -1755,7 +1755,7 @@ TEST_F(TFutureTest, AsyncViaCanceledInvoker2) << NYT::ToString(error); } -TEST_F(TFutureTest, YT_12720) +TError CreateFiberCanceledError(TError cancelationError) { auto aqueue = New(); auto invoker = aqueue->GetInvoker(); @@ -1768,8 +1768,13 @@ TEST_F(TFutureTest, YT_12720) WaitFor(taskStarted.ToFuture()).ThrowOnError(); - future.Cancel(NYT::TError(NYT::EErrorCode::Canceled, "Fiber canceled in .Reset() of TFiberGuard")); - auto error = WaitFor(future); + future.Cancel(cancelationError); + return WaitFor(future); +} + +TEST_F(TFutureTest, YT_12720) +{ + auto error = CreateFiberCanceledError(NYT::TError(NYT::EErrorCode::Canceled, "Fiber canceled in .Reset() of TFiberGuard")); EXPECT_FALSE(error.IsOK()); EXPECT_EQ(error.GetCode(), NYT::EErrorCode::Canceled); EXPECT_TRUE(NYT::ToString(error).Contains("Fiber canceled in .Reset() of TFiberGuard")) @@ -1803,6 +1808,63 @@ TEST_F(TFutureTest, DiscardInApply) << NYT::ToString(error); } +TEST_F(TFutureTest, ErrorFromException) +{ + // Creating error from exception whenever possible is important for FromExceptionEnricher. So test it with enricher. + + static thread_local bool testFromExceptionEnricherEnabled = false; + testFromExceptionEnricherEnabled = true; + auto finally = Finally([] { + testFromExceptionEnricherEnabled = false; + }); + + static auto getAttribute = [] (const TError& error) { + return error.Attributes().Get("test_attribute", ""); + }; + + TError::RegisterFromExceptionEnricher([](TError* error, const std::exception&) { + if (testFromExceptionEnricherEnabled) { + *error <<= TErrorAttribute("test_attribute", getAttribute(*error) + "X"); + } + }); + + static auto getError = [] (auto&& func) -> TError { + return BIND(func).AsyncVia(GetSyncInvoker()).Run().Get(); + }; + + // If there is no exception, there is no error. + EXPECT_TRUE(getError([] {}).IsOK()); + + // If there is std::exception, there is error and enricher is called. + { + auto error = getError([] { + throw std::runtime_error("test_std"); + }); + ASSERT_FALSE(error.IsOK()); + EXPECT_TRUE(error.GetMessage().contains("test_std")); + EXPECT_EQ(getAttribute(error), "X"); + } + + // If there is TErrorException, there is an error and enricher is called. + { + auto error = getError([] { + THROW_ERROR_EXCEPTION("test_yt"); + }); + ASSERT_FALSE(error.IsOK()); + EXPECT_TRUE(error.GetMessage().contains("test_yt")); + EXPECT_EQ(getAttribute(error), "X"); + } + + // If there is TFiberCanceledException, there is an error, but enricher is not called. + { + auto error = CreateFiberCanceledError(NYT::TError(NYT::EErrorCode::Canceled, "test_fiber_canceled")); + + ASSERT_FALSE(error.IsOK()); + EXPECT_TRUE(error.GetMessage().contains("test_fiber_canceled")); + EXPECT_EQ(getAttribute(error), ""); + } +} + //////////////////////////////////////////////////////////////////////////////// } // namespace diff --git a/yt/yt/core/misc/error.cpp b/yt/yt/core/misc/error.cpp index 6601358f7533..7d20101a8ff9 100644 --- a/yt/yt/core/misc/error.cpp +++ b/yt/yt/core/misc/error.cpp @@ -699,9 +699,9 @@ void TErrorCodicils::Initialize() Initialized_ = true; ErrorCodicilsSlot(); // Warm up the slot. - TError::RegisterEnricher([] (TError& error) { + TError::RegisterEnricher([] (TError* error) { if (auto* codicils = TErrorCodicils::MaybeGet()) { - codicils->Apply(error); + codicils->Apply(*error); } }); } From d3153b6102d6495f4af17e5d35964a0b871f4542 Mon Sep 17 00:00:00 2001 From: mikailbag Date: Tue, 6 May 2025 11:29:22 +0300 Subject: [PATCH 02/24] Avoid recursion in resolve_unit_addrs_overlap commit_hash:06cb3883a9d228b3505ad0a19162922b0a358b0b --- contrib/libs/backtrace/dwarf.c | 145 ++++++------ .../patches/010-avoid-recursion.patch | 215 ++++++++++++++++++ 2 files changed, 292 insertions(+), 68 deletions(-) create mode 100644 contrib/libs/backtrace/patches/010-avoid-recursion.patch diff --git a/contrib/libs/backtrace/dwarf.c b/contrib/libs/backtrace/dwarf.c index 2f323acb61a0..81af2b59057d 100644 --- a/contrib/libs/backtrace/dwarf.c +++ b/contrib/libs/backtrace/dwarf.c @@ -1615,8 +1615,7 @@ unit_addrs_search (const void *vkey, const void *ventry) static int resolve_unit_addrs_overlap_walk (struct backtrace_state *state, - size_t *pfrom, size_t *pto, - struct unit_addrs *enclosing, + struct backtrace_vector *enclosing, struct unit_addrs_vector *old_vec, backtrace_error_callback error_callback, void *data, @@ -1627,25 +1626,36 @@ resolve_unit_addrs_overlap_walk (struct backtrace_state *state, struct unit_addrs *new_addrs; size_t from; size_t to; + size_t enclosing_count; old_addrs = (struct unit_addrs *) old_vec->vec.base; old_count = old_vec->count; new_addrs = (struct unit_addrs *) new_vec->vec.base; - for (from = *pfrom, to = *pto; from < old_count; from++, to++) + enclosing_count = 0; + + to = 0; + for (from = 0; from < old_count; from++) { - /* If we are in the scope of a larger range that can no longer - cover any further ranges, return back to the caller. */ + struct unit_addrs *current_enclosing; + new_addrs[to] = old_addrs[from]; + to++; + + /* While we are in the scope of a larger range that can no longer + cover any further ranges, pop it from the enclosing stack. */ + while (enclosing_count > 0 + && ((struct unit_addrs**)enclosing->base)[enclosing_count-1]->high <= old_addrs[from].low) + { + enclosing_count--; + enclosing->alc += sizeof (struct unit_addrs*); + } + if (enclosing_count > 0) { + current_enclosing = ((struct unit_addrs**)enclosing->base)[enclosing_count-1]; + } else { + current_enclosing = NULL; + } - if (enclosing != NULL - && enclosing->high <= old_addrs[from].low) - { - *pfrom = from; - *pto = to; - return 1; - } - new_addrs[to] = old_addrs[from]; /* If we are in scope of a larger range, fill in any gaps between this entry and the next one. @@ -1653,65 +1663,63 @@ resolve_unit_addrs_overlap_walk (struct backtrace_state *state, There is an extra entry at the end of the vector, so it's always OK to refer to from + 1. */ - if (enclosing != NULL - && enclosing->high > old_addrs[from].high - && old_addrs[from].high < old_addrs[from + 1].low) - { - void *grew; - size_t new_high; + if (current_enclosing != NULL + && current_enclosing->high > old_addrs[from].high + && old_addrs[from].high < old_addrs[from + 1].low) + { + void *grew; + size_t new_high; - grew = backtrace_vector_grow (state, sizeof (struct unit_addrs), + grew = backtrace_vector_grow (state, sizeof (struct unit_addrs), error_callback, data, &new_vec->vec); - if (grew == NULL) - return 0; - new_addrs = (struct unit_addrs *) new_vec->vec.base; - to++; - new_addrs[to].low = old_addrs[from].high; - new_high = old_addrs[from + 1].low; - if (enclosing->high < new_high) - new_high = enclosing->high; - new_addrs[to].high = new_high; - new_addrs[to].u = enclosing->u; - } + if (grew == NULL) + return 0; + new_addrs = (struct unit_addrs *) new_vec->vec.base; + new_addrs[to].low = old_addrs[from].high; + new_high = old_addrs[from + 1].low; + if (current_enclosing->high < new_high) + new_high = current_enclosing->high; + new_addrs[to].high = new_high; + new_addrs[to].u = current_enclosing->u; + to++; + } /* If this range has a larger scope than the next one, use it to - fill in any gaps. */ + fill in any gaps. */ if (old_addrs[from].high > old_addrs[from + 1].high) - { - *pfrom = from + 1; - *pto = to + 1; - if (!resolve_unit_addrs_overlap_walk (state, pfrom, pto, - &old_addrs[from], old_vec, - error_callback, data, new_vec)) - return 0; - from = *pfrom; - to = *pto; - - /* Undo the increment the loop is about to do. */ - from--; - to--; - } + { + void* grew; + struct unit_addrs **enclosing_top; + + grew = backtrace_vector_grow (state, sizeof (struct unit_addrs *), + error_callback, data, enclosing); + if (grew == NULL) + return 0; + enclosing_top = ((struct unit_addrs **) (enclosing->base)) + enclosing_count; + + *enclosing_top = &old_addrs[from]; + } } - if (enclosing == NULL) - { - struct unit_addrs *pa; + + + struct unit_addrs *pa; - /* Add trailing entry. */ + /* Add trailing entry. */ - pa = ((struct unit_addrs *) - backtrace_vector_grow (state, sizeof (struct unit_addrs), - error_callback, data, &new_vec->vec)); - if (pa == NULL) - return 0; - pa->low = 0; - --pa->low; - pa->high = pa->low; - pa->u = NULL; + pa = ((struct unit_addrs *) + backtrace_vector_grow (state, sizeof (struct unit_addrs), + error_callback, data, &new_vec->vec)); + if (pa == NULL) + return 0; + pa->low = 0; + --pa->low; + pa->high = pa->low; + pa->u = NULL; - new_vec->count = to; - } + new_vec->count = to; + return 1; } @@ -1756,8 +1764,8 @@ resolve_unit_addrs_overlap (struct backtrace_state *state, size_t i; struct unit_addrs_vector new_vec; void *grew; - size_t from; - size_t to; + int walk_ok; + struct backtrace_vector enclosing; addrs = (struct unit_addrs *) addrs_vec->vec.base; count = addrs_vec->count; @@ -1787,15 +1795,16 @@ resolve_unit_addrs_overlap (struct backtrace_state *state, error_callback, data, &new_vec.vec); if (grew == NULL) return 0; + memset (&enclosing, 0, sizeof enclosing); - from = 0; - to = 0; - resolve_unit_addrs_overlap_walk (state, &from, &to, NULL, addrs_vec, + walk_ok = resolve_unit_addrs_overlap_walk (state, &enclosing, addrs_vec, error_callback, data, &new_vec); backtrace_vector_free (state, &addrs_vec->vec, error_callback, data); - *addrs_vec = new_vec; + backtrace_vector_free (state, &enclosing, error_callback, data); + if (walk_ok) + *addrs_vec = new_vec; - return 1; + return walk_ok; } /* Sort the line vector by PC. We want a stable sort here to maintain diff --git a/contrib/libs/backtrace/patches/010-avoid-recursion.patch b/contrib/libs/backtrace/patches/010-avoid-recursion.patch new file mode 100644 index 000000000000..7efaf1b660ff --- /dev/null +++ b/contrib/libs/backtrace/patches/010-avoid-recursion.patch @@ -0,0 +1,215 @@ +commit 8d7954369c2f1ef62fee65057da9af4dee17032d (HEAD -> pr-8661314, arcadia/users/mikailbag/submit-ac578483-dd24a51-10986d8a-96572213) +merge: ec02af38d2b9facf29be0e6cb9130bd73b2eccf9 1ba32821f8f7452cb6c8b354d82ffb7e2b2f74aa +author: mikailbag +date: 2025-05-05T19:22:33+03:00 + + submit for PR 8661314 + + arc up from 1ba32821f8 + +diff --git a/dwarf.c b/dwarf.c +--- a/dwarf.c ++++ b/dwarf.c +@@ -1615,8 +1615,7 @@ unit_addrs_search (const void *vkey, const void *ventry) + + static int + resolve_unit_addrs_overlap_walk (struct backtrace_state *state, +- size_t *pfrom, size_t *pto, +- struct unit_addrs *enclosing, ++ struct backtrace_vector *enclosing, + struct unit_addrs_vector *old_vec, + backtrace_error_callback error_callback, + void *data, +@@ -1627,25 +1626,36 @@ resolve_unit_addrs_overlap_walk (struct backtrace_state *state, + struct unit_addrs *new_addrs; + size_t from; + size_t to; ++ size_t enclosing_count; + + old_addrs = (struct unit_addrs *) old_vec->vec.base; + old_count = old_vec->count; + new_addrs = (struct unit_addrs *) new_vec->vec.base; + +- for (from = *pfrom, to = *pto; from < old_count; from++, to++) ++ enclosing_count = 0; ++ ++ to = 0; ++ for (from = 0; from < old_count; from++) + { +- /* If we are in the scope of a larger range that can no longer +- cover any further ranges, return back to the caller. */ ++ struct unit_addrs *current_enclosing; ++ new_addrs[to] = old_addrs[from]; ++ to++; ++ ++ /* While we are in the scope of a larger range that can no longer ++ cover any further ranges, pop it from the enclosing stack. */ ++ while (enclosing_count > 0 ++ && ((struct unit_addrs**)enclosing->base)[enclosing_count-1]->high <= old_addrs[from].low) ++ { ++ enclosing_count--; ++ enclosing->alc += sizeof (struct unit_addrs*); ++ } ++ if (enclosing_count > 0) { ++ current_enclosing = ((struct unit_addrs**)enclosing->base)[enclosing_count-1]; ++ } else { ++ current_enclosing = NULL; ++ } + +- if (enclosing != NULL +- && enclosing->high <= old_addrs[from].low) +- { +- *pfrom = from; +- *pto = to; +- return 1; +- } + +- new_addrs[to] = old_addrs[from]; + + /* If we are in scope of a larger range, fill in any gaps + between this entry and the next one. +@@ -1653,65 +1663,63 @@ resolve_unit_addrs_overlap_walk (struct backtrace_state *state, + There is an extra entry at the end of the vector, so it's + always OK to refer to from + 1. */ + +- if (enclosing != NULL +- && enclosing->high > old_addrs[from].high +- && old_addrs[from].high < old_addrs[from + 1].low) +- { +- void *grew; +- size_t new_high; ++ if (current_enclosing != NULL ++ && current_enclosing->high > old_addrs[from].high ++ && old_addrs[from].high < old_addrs[from + 1].low) ++ { ++ void *grew; ++ size_t new_high; + +- grew = backtrace_vector_grow (state, sizeof (struct unit_addrs), ++ grew = backtrace_vector_grow (state, sizeof (struct unit_addrs), + error_callback, data, &new_vec->vec); +- if (grew == NULL) +- return 0; +- new_addrs = (struct unit_addrs *) new_vec->vec.base; +- to++; +- new_addrs[to].low = old_addrs[from].high; +- new_high = old_addrs[from + 1].low; +- if (enclosing->high < new_high) +- new_high = enclosing->high; +- new_addrs[to].high = new_high; +- new_addrs[to].u = enclosing->u; +- } ++ if (grew == NULL) ++ return 0; ++ new_addrs = (struct unit_addrs *) new_vec->vec.base; ++ new_addrs[to].low = old_addrs[from].high; ++ new_high = old_addrs[from + 1].low; ++ if (current_enclosing->high < new_high) ++ new_high = current_enclosing->high; ++ new_addrs[to].high = new_high; ++ new_addrs[to].u = current_enclosing->u; ++ to++; ++ } + + /* If this range has a larger scope than the next one, use it to +- fill in any gaps. */ ++ fill in any gaps. */ + + if (old_addrs[from].high > old_addrs[from + 1].high) +- { +- *pfrom = from + 1; +- *pto = to + 1; +- if (!resolve_unit_addrs_overlap_walk (state, pfrom, pto, +- &old_addrs[from], old_vec, +- error_callback, data, new_vec)) +- return 0; +- from = *pfrom; +- to = *pto; +- +- /* Undo the increment the loop is about to do. */ +- from--; +- to--; +- } ++ { ++ void* grew; ++ struct unit_addrs **enclosing_top; ++ ++ grew = backtrace_vector_grow (state, sizeof (struct unit_addrs *), ++ error_callback, data, enclosing); ++ if (grew == NULL) ++ return 0; ++ enclosing_top = ((struct unit_addrs **) (enclosing->base)) + enclosing_count; ++ ++ *enclosing_top = &old_addrs[from]; ++ } + } + +- if (enclosing == NULL) +- { +- struct unit_addrs *pa; ++ ++ ++ struct unit_addrs *pa; + +- /* Add trailing entry. */ ++ /* Add trailing entry. */ + +- pa = ((struct unit_addrs *) +- backtrace_vector_grow (state, sizeof (struct unit_addrs), +- error_callback, data, &new_vec->vec)); +- if (pa == NULL) +- return 0; +- pa->low = 0; +- --pa->low; +- pa->high = pa->low; +- pa->u = NULL; ++ pa = ((struct unit_addrs *) ++ backtrace_vector_grow (state, sizeof (struct unit_addrs), ++ error_callback, data, &new_vec->vec)); ++ if (pa == NULL) ++ return 0; ++ pa->low = 0; ++ --pa->low; ++ pa->high = pa->low; ++ pa->u = NULL; + +- new_vec->count = to; +- } ++ new_vec->count = to; ++ + + return 1; + } +@@ -1756,8 +1764,8 @@ resolve_unit_addrs_overlap (struct backtrace_state *state, + size_t i; + struct unit_addrs_vector new_vec; + void *grew; +- size_t from; +- size_t to; ++ int walk_ok; ++ struct backtrace_vector enclosing; + + addrs = (struct unit_addrs *) addrs_vec->vec.base; + count = addrs_vec->count; +@@ -1787,15 +1795,16 @@ resolve_unit_addrs_overlap (struct backtrace_state *state, + error_callback, data, &new_vec.vec); + if (grew == NULL) + return 0; ++ memset (&enclosing, 0, sizeof enclosing); + +- from = 0; +- to = 0; +- resolve_unit_addrs_overlap_walk (state, &from, &to, NULL, addrs_vec, ++ walk_ok = resolve_unit_addrs_overlap_walk (state, &enclosing, addrs_vec, + error_callback, data, &new_vec); + backtrace_vector_free (state, &addrs_vec->vec, error_callback, data); +- *addrs_vec = new_vec; ++ backtrace_vector_free (state, &enclosing, error_callback, data); ++ if (walk_ok) ++ *addrs_vec = new_vec; + +- return 1; ++ return walk_ok; + } + + /* Sort the line vector by PC. We want a stable sort here to maintain From eb9f7c3ff5a0712b5e711cb88e12f1dcb68ad612 Mon Sep 17 00:00:00 2001 From: robot-contrib Date: Tue, 6 May 2025 11:35:11 +0300 Subject: [PATCH 03/24] Update contrib/restricted/boost/array to 1.88.0 commit_hash:0175a76a95ef74c349448549cc756046f4ebec70 --- .../boost/algorithm/searching/boyer_moore.hpp | 1 + .../patches/pr124-add-missing-include.patch | 21 + .../boost/array/.yandex_meta/default.nix | 4 +- .../.yandex_meta/devtools.copyrights.report | 2 +- .../.yandex_meta/devtools.licenses.report | 2 +- .../boost/array/include/boost/array.hpp | 411 ++++++++++-------- contrib/restricted/boost/array/ya.make | 5 +- 7 files changed, 263 insertions(+), 183 deletions(-) create mode 100644 contrib/restricted/boost/algorithm/patches/pr124-add-missing-include.patch diff --git a/contrib/restricted/boost/algorithm/include/boost/algorithm/searching/boyer_moore.hpp b/contrib/restricted/boost/algorithm/include/boost/algorithm/searching/boyer_moore.hpp index 80a5a4474d96..e3c00105c6bd 100644 --- a/contrib/restricted/boost/algorithm/include/boost/algorithm/searching/boyer_moore.hpp +++ b/contrib/restricted/boost/algorithm/include/boost/algorithm/searching/boyer_moore.hpp @@ -10,6 +10,7 @@ #ifndef BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP #define BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP +#include // for std::reverse_copy #include // for std::iterator_traits #include diff --git a/contrib/restricted/boost/algorithm/patches/pr124-add-missing-include.patch b/contrib/restricted/boost/algorithm/patches/pr124-add-missing-include.patch new file mode 100644 index 000000000000..b651dab0989f --- /dev/null +++ b/contrib/restricted/boost/algorithm/patches/pr124-add-missing-include.patch @@ -0,0 +1,21 @@ +From 7012619c8056b1b30f88bc57dd39c80c0018214f Mon Sep 17 00:00:00 2001 +From: Yuriy Chernyshov +Date: Mon, 5 May 2025 18:01:59 +0200 +Subject: [PATCH] Add missing include + +--- + include/boost/algorithm/searching/boyer_moore.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/boost/algorithm/searching/boyer_moore.hpp b/include/boost/algorithm/searching/boyer_moore.hpp +index 80a5a4474..e3c00105c 100644 +--- a/include/boost/algorithm/searching/boyer_moore.hpp ++++ b/include/boost/algorithm/searching/boyer_moore.hpp +@@ -10,6 +10,7 @@ + #ifndef BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP + #define BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP + ++#include // for std::reverse_copy + #include // for std::iterator_traits + + #include diff --git a/contrib/restricted/boost/array/.yandex_meta/default.nix b/contrib/restricted/boost/array/.yandex_meta/default.nix index 31d056ef9718..a627002ef7a6 100644 --- a/contrib/restricted/boost/array/.yandex_meta/default.nix +++ b/contrib/restricted/boost/array/.yandex_meta/default.nix @@ -1,13 +1,13 @@ self: super: with self; { boost_array = stdenv.mkDerivation rec { pname = "boost_array"; - version = "1.87.0"; + version = "1.88.0"; src = fetchFromGitHub { owner = "boostorg"; repo = "array"; rev = "boost-${version}"; - hash = "sha256-KlUpm9POv39C7BBpC2foHzTUCE9t2uEEka9kMdRVgcA="; + hash = "sha256-P7pFHRtIkmDxWv3Oq5xul5l2eh2zX6rAr6/24pk/daY="; }; }; } diff --git a/contrib/restricted/boost/array/.yandex_meta/devtools.copyrights.report b/contrib/restricted/boost/array/.yandex_meta/devtools.copyrights.report index 74ba3a85c532..3b242c7aace5 100644 --- a/contrib/restricted/boost/array/.yandex_meta/devtools.copyrights.report +++ b/contrib/restricted/boost/array/.yandex_meta/devtools.copyrights.report @@ -42,4 +42,4 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/boost/array.hpp [10:10] + include/boost/array.hpp [13:13] diff --git a/contrib/restricted/boost/array/.yandex_meta/devtools.licenses.report b/contrib/restricted/boost/array/.yandex_meta/devtools.licenses.report index 2c6ffdea8bdc..5ed4f38a7b01 100644 --- a/contrib/restricted/boost/array/.yandex_meta/devtools.licenses.report +++ b/contrib/restricted/boost/array/.yandex_meta/devtools.licenses.report @@ -45,4 +45,4 @@ BELONGS ya.make Match type : NOTICE Links : http://www.boost.org/LICENSE_1_0.txt, http://www.boost.org/users/license.html, https://spdx.org/licenses/BSL-1.0 Files with this license: - include/boost/array.hpp [12:14] + include/boost/array.hpp [15:17] diff --git a/contrib/restricted/boost/array/include/boost/array.hpp b/contrib/restricted/boost/array/include/boost/array.hpp index 02bd76db7559..bea6f9819ac1 100644 --- a/contrib/restricted/boost/array/include/boost/array.hpp +++ b/contrib/restricted/boost/array/include/boost/array.hpp @@ -1,3 +1,6 @@ +#ifndef BOOST_ARRAY_HPP_INCLUDED +#define BOOST_ARRAY_HPP_INCLUDED + /* The following code declares class array, * an STL container (as wrapper) for arrays of constant size. * @@ -28,31 +31,31 @@ * * Jan 29, 2004 */ -#ifndef BOOST_ARRAY_HPP -#define BOOST_ARRAY_HPP -#include +#include +#include #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) # pragma warning(push) -# pragma warning(disable:4996) // 'std::equal': Function call with parameters that may be unsafe -# pragma warning(disable:4510) // boost::array' : default constructor could not be generated -# pragma warning(disable:4610) // warning C4610: class 'boost::array' can never be instantiated - user defined constructor required +# pragma warning(disable: 4510) // boost::array' : default constructor could not be generated +# pragma warning(disable: 4512) // boost::array' : assignment operator could not be generated +# pragma warning(disable: 4610) // class 'boost::array' can never be instantiated - user defined constructor required +# pragma warning(disable: 4702) // unreachable code #endif -#include -#include -#include #include -#include #include - #include -#include - -// FIXES for broken compilers -#include +#include +#include +#include +#include +#if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L +# if __has_include() +# include +# endif +#endif namespace boost { @@ -72,62 +75,54 @@ namespace boost { typedef std::ptrdiff_t difference_type; // iterator support - iterator begin() { return elems; } - const_iterator begin() const { return elems; } - const_iterator cbegin() const { return elems; } + BOOST_CXX14_CONSTEXPR iterator begin() BOOST_NOEXCEPT { return elems; } + BOOST_CONSTEXPR const_iterator begin() const BOOST_NOEXCEPT { return elems; } + BOOST_CONSTEXPR const_iterator cbegin() const BOOST_NOEXCEPT { return elems; } - iterator end() { return elems+N; } - const_iterator end() const { return elems+N; } - const_iterator cend() const { return elems+N; } + BOOST_CXX14_CONSTEXPR iterator end() BOOST_NOEXCEPT { return elems+N; } + BOOST_CONSTEXPR const_iterator end() const BOOST_NOEXCEPT { return elems+N; } + BOOST_CONSTEXPR const_iterator cend() const BOOST_NOEXCEPT { return elems+N; } // reverse iterator support -#if !defined(BOOST_MSVC_STD_ITERATOR) && !defined(BOOST_NO_STD_ITERATOR_TRAITS) typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; -#elif defined(_RWSTD_NO_CLASS_PARTIAL_SPEC) - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#else - // workaround for broken reverse_iterator implementations - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#endif - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { + reverse_iterator rbegin() BOOST_NOEXCEPT { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const BOOST_NOEXCEPT { return const_reverse_iterator(end()); } - const_reverse_iterator crbegin() const { + const_reverse_iterator crbegin() const BOOST_NOEXCEPT { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { + reverse_iterator rend() BOOST_NOEXCEPT { return reverse_iterator(begin()); } + const_reverse_iterator rend() const BOOST_NOEXCEPT { return const_reverse_iterator(begin()); } - const_reverse_iterator crend() const { + const_reverse_iterator crend() const BOOST_NOEXCEPT { return const_reverse_iterator(begin()); } // operator[] - reference operator[](size_type i) + BOOST_CXX14_CONSTEXPR reference operator[](size_type i) { return BOOST_ASSERT_MSG( i < N, "out of range" ), elems[i]; } - /*BOOST_CONSTEXPR*/ const_reference operator[](size_type i) const +#if !BOOST_WORKAROUND(BOOST_GCC, < 50000) + BOOST_CONSTEXPR +#endif + const_reference operator[](size_type i) const { return BOOST_ASSERT_MSG( i < N, "out of range" ), elems[i]; } // at() with range check - reference at(size_type i) { return rangecheck(i), elems[i]; } - /*BOOST_CONSTEXPR*/ const_reference at(size_type i) const { return rangecheck(i), elems[i]; } + BOOST_CXX14_CONSTEXPR reference at(size_type i) { return rangecheck(i), elems[i]; } + BOOST_CONSTEXPR const_reference at(size_type i) const { return rangecheck(i), elems[i]; } // front() and back() - reference front() + BOOST_CXX14_CONSTEXPR reference front() { return elems[0]; } @@ -137,7 +132,7 @@ namespace boost { return elems[0]; } - reference back() + BOOST_CXX14_CONSTEXPR reference back() { return elems[N-1]; } @@ -148,38 +143,56 @@ namespace boost { } // size is constant - static BOOST_CONSTEXPR size_type size() { return N; } - static BOOST_CONSTEXPR bool empty() { return false; } - static BOOST_CONSTEXPR size_type max_size() { return N; } + static BOOST_CONSTEXPR size_type size() BOOST_NOEXCEPT { return N; } + static BOOST_CONSTEXPR bool empty() BOOST_NOEXCEPT { return false; } + static BOOST_CONSTEXPR size_type max_size() BOOST_NOEXCEPT { return N; } enum { static_size = N }; // swap (note: linear complexity) - void swap (array& y) { - for (size_type i = 0; i < N; ++i) - boost::core::invoke_swap(elems[i],y.elems[i]); + BOOST_CXX14_CONSTEXPR void swap (array& y) + { + std::swap( elems, y.elems ); } - // direct access to data (read-only) - const T* data() const { return elems; } - T* data() { return elems; } + // direct access to data + BOOST_CONSTEXPR const T* data() const BOOST_NOEXCEPT { return elems; } + BOOST_CXX14_CONSTEXPR T* data() BOOST_NOEXCEPT { return elems; } - // use array as C array (direct read/write access to data) - T* c_array() { return elems; } + // obsolete + BOOST_DEPRECATED( "please use `data()` instead" ) + T* c_array() BOOST_NOEXCEPT { return elems; } // assignment with type conversion template - array& operator= (const array& rhs) { - std::copy(rhs.begin(),rhs.end(), begin()); + array& operator= (const array& rhs) + { + for( std::size_t i = 0; i < N; ++i ) + { + elems[ i ] = rhs.elems[ i ]; + } + return *this; } - // assign one value to all elements - void assign (const T& value) { fill ( value ); } // A synonym for fill - void fill (const T& value) + // fill with one value + BOOST_CXX14_CONSTEXPR void fill (const T& value) { - std::fill_n(begin(),size(),value); + // using elems[ 0 ] as a temporary copy + // avoids the aliasing opportunity betw. + // `value` and `elems` + + elems[ 0 ] = value; + + for( std::size_t i = 1; i < N; ++i ) + { + elems[ i ] = elems[ 0 ]; + } } + // an obsolete synonym for fill + BOOST_DEPRECATED( "please use `fill` instead" ) + void assign (const T& value) { fill ( value ); } + // check range (may be private because it is static) static BOOST_CONSTEXPR bool rangecheck (size_type i) { return i >= size() ? boost::throw_exception(std::out_of_range ("array<>: index out of range")), true : true; @@ -189,6 +202,8 @@ namespace boost { template< class T > class array< T, 0 > { + public: + struct {} elems; // enables initialization with = {{}} public: // type definitions @@ -201,42 +216,31 @@ namespace boost { typedef std::ptrdiff_t difference_type; // iterator support - iterator begin() { return iterator( reinterpret_cast< T * >( this ) ); } - const_iterator begin() const { return const_iterator( reinterpret_cast< const T * >( this ) ); } - const_iterator cbegin() const { return const_iterator( reinterpret_cast< const T * >( this ) ); } + BOOST_CXX14_CONSTEXPR iterator begin() BOOST_NOEXCEPT { return data(); } + BOOST_CONSTEXPR const_iterator begin() const BOOST_NOEXCEPT { return data(); } + BOOST_CONSTEXPR const_iterator cbegin() const BOOST_NOEXCEPT { return data(); } - iterator end() { return begin(); } - const_iterator end() const { return begin(); } - const_iterator cend() const { return cbegin(); } + BOOST_CXX14_CONSTEXPR iterator end() BOOST_NOEXCEPT { return begin(); } + BOOST_CONSTEXPR const_iterator end() const BOOST_NOEXCEPT { return begin(); } + BOOST_CONSTEXPR const_iterator cend() const BOOST_NOEXCEPT { return cbegin(); } // reverse iterator support -#if !defined(BOOST_MSVC_STD_ITERATOR) && !defined(BOOST_NO_STD_ITERATOR_TRAITS) typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; -#elif defined(_RWSTD_NO_CLASS_PARTIAL_SPEC) - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#else - // workaround for broken reverse_iterator implementations - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#endif - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { + reverse_iterator rbegin() BOOST_NOEXCEPT { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const BOOST_NOEXCEPT { return const_reverse_iterator(end()); } - const_reverse_iterator crbegin() const { + const_reverse_iterator crbegin() const BOOST_NOEXCEPT { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { + reverse_iterator rend() BOOST_NOEXCEPT { return reverse_iterator(begin()); } + const_reverse_iterator rend() const BOOST_NOEXCEPT { return const_reverse_iterator(begin()); } - const_reverse_iterator crend() const { + const_reverse_iterator crend() const BOOST_NOEXCEPT { return const_reverse_iterator(begin()); } @@ -246,14 +250,14 @@ namespace boost { return failed_rangecheck(); } - /*BOOST_CONSTEXPR*/ const_reference operator[](size_type /*i*/) const + const_reference operator[](size_type /*i*/) const { return failed_rangecheck(); } // at() with range check - reference at(size_type /*i*/) { return failed_rangecheck(); } - /*BOOST_CONSTEXPR*/ const_reference at(size_type /*i*/) const { return failed_rangecheck(); } + reference at(size_type /*i*/) { return failed_rangecheck(); } + const_reference at(size_type /*i*/) const { return failed_rangecheck(); } // front() and back() reference front() @@ -261,7 +265,7 @@ namespace boost { return failed_rangecheck(); } - BOOST_CONSTEXPR const_reference front() const + const_reference front() const { return failed_rangecheck(); } @@ -271,26 +275,28 @@ namespace boost { return failed_rangecheck(); } - BOOST_CONSTEXPR const_reference back() const + const_reference back() const { return failed_rangecheck(); } // size is constant - static BOOST_CONSTEXPR size_type size() { return 0; } - static BOOST_CONSTEXPR bool empty() { return true; } - static BOOST_CONSTEXPR size_type max_size() { return 0; } + static BOOST_CONSTEXPR size_type size() BOOST_NOEXCEPT { return 0; } + static BOOST_CONSTEXPR bool empty() BOOST_NOEXCEPT { return true; } + static BOOST_CONSTEXPR size_type max_size() BOOST_NOEXCEPT { return 0; } enum { static_size = 0 }; - void swap (array& /*y*/) { + BOOST_CXX14_CONSTEXPR void swap (array& /*y*/) + { } - // direct access to data (read-only) - const T* data() const { return 0; } - T* data() { return 0; } + // direct access to data + BOOST_CONSTEXPR const T* data() const BOOST_NOEXCEPT { return 0; } + BOOST_CXX14_CONSTEXPR T* data() BOOST_NOEXCEPT { return 0; } - // use array as C array (direct read/write access to data) - T* c_array() { return 0; } + // obsolete + BOOST_DEPRECATED( "please use `data()` instead" ) + T* c_array() BOOST_NOEXCEPT { return 0; } // assignment with type conversion template @@ -298,87 +304,119 @@ namespace boost { return *this; } - // assign one value to all elements + // an obsolete synonym for fill + BOOST_DEPRECATED( "please use `fill` instead" ) void assign (const T& value) { fill ( value ); } - void fill (const T& ) {} + + // fill with one value + BOOST_CXX14_CONSTEXPR void fill (const T& ) {} // check range (may be private because it is static) - static reference failed_rangecheck () { - std::out_of_range e("attempt to access element of an empty array"); - boost::throw_exception(e); -#if defined(BOOST_NO_EXCEPTIONS) || (!defined(BOOST_MSVC) && !defined(__PATHSCALE__)) - // - // We need to return something here to keep - // some compilers happy: however we will never - // actually get here.... - // - static T placeholder; - return placeholder; -#endif - } + static reference failed_rangecheck () + { + boost::throw_exception( std::out_of_range( "attempt to access element of an empty array" ) ); + } }; // comparisons template - bool operator== (const array& x, const array& y) { - return std::equal(x.begin(), x.end(), y.begin()); + BOOST_CXX14_CONSTEXPR bool operator== (const array& x, const array& y) + { + for( std::size_t i = 0; i < N; ++i ) + { + if( !( x[ i ] == y[ i ] ) ) return false; + } + + return true; } - template - bool operator< (const array& x, const array& y) { - return std::lexicographical_compare(x.begin(),x.end(),y.begin(),y.end()); + +#if BOOST_WORKAROUND(BOOST_GCC, < 90000) + + template + BOOST_CXX14_CONSTEXPR bool operator== (const array& /*x*/, const array& /*y*/) + { + return true; } + +#endif + template - bool operator!= (const array& x, const array& y) { + BOOST_CXX14_CONSTEXPR bool operator!= (const array& x, const array& y) { return !(x==y); } + + template + BOOST_CXX14_CONSTEXPR bool operator< (const array& x, const array& y) + { + for( std::size_t i = 0; i < N; ++i ) + { + if( x[ i ] < y[ i ] ) return true; + if( y[ i ] < x[ i ] ) return false; + } + + return false; + } + +#if BOOST_WORKAROUND(BOOST_GCC, < 90000) + + template + BOOST_CXX14_CONSTEXPR bool operator< (const array& /*x*/, const array& /*y*/) + { + return false; + } + +#endif + template - bool operator> (const array& x, const array& y) { + BOOST_CXX14_CONSTEXPR bool operator> (const array& x, const array& y) { return y - bool operator<= (const array& x, const array& y) { + BOOST_CXX14_CONSTEXPR bool operator<= (const array& x, const array& y) { return !(y - bool operator>= (const array& x, const array& y) { + BOOST_CXX14_CONSTEXPR bool operator>= (const array& x, const array& y) { return !(x - inline void swap (array& x, array& y) { + BOOST_CXX14_CONSTEXPR inline void swap (array& x, array& y) { x.swap(y); } -#if defined(__SUNPRO_CC) -// Trac ticket #4757; the Sun Solaris compiler can't handle -// syntax like 'T(&get_c_array(boost::array& arg))[N]' -// -// We can't just use this for all compilers, because the -// borland compilers can't handle this form. - namespace detail { - template struct c_array - { - typedef T type[N]; - }; +#if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L +# if __has_include() + + template + constexpr auto operator<=> (const array& x, const array& y) + -> decltype( x.elems[ 0 ] <=> y.elems[ 0 ] ) + { + for( std::size_t i = 0; i < N; ++i ) + { + auto r = x.elems[ i ] <=> y.elems[ i ]; + if( r != 0 ) return r; + } + + return std::strong_ordering::equal; + } + + template + constexpr auto operator<=> (const array& /*x*/, const array& /*y*/) + -> std::strong_ordering + { + return std::strong_ordering::equal; } - // Specific for boost::array: simply returns its elems data member. - template - typename detail::c_array::type& get_c_array(boost::array& arg) - { - return arg.elems; - } - - // Specific for boost::array: simply returns its elems data member. - template - typename detail::c_array::type const& get_c_array(const boost::array& arg) - { - return arg.elems; - } -#else -// Specific for boost::array: simply returns its elems data member. +# endif +#endif + + // undocumented and obsolete template + BOOST_DEPRECATED( "please use `elems` instead" ) T(&get_c_array(boost::array& arg))[N] { return arg.elems; @@ -386,49 +424,68 @@ namespace boost { // Const version. template + BOOST_DEPRECATED( "please use `elems` instead" ) const T(&get_c_array(const boost::array& arg))[N] { return arg.elems; } -#endif -#if 0 - // Overload for std::array, assuming that std::array will have - // explicit conversion functions as discussed at the WG21 meeting - // in Summit, March 2009. - template - T(&get_c_array(std::array& arg))[N] + template + BOOST_CXX14_CONSTEXPR T &get(boost::array &arr) BOOST_NOEXCEPT { - return static_cast(arg); + BOOST_STATIC_ASSERT_MSG ( Idx < N, "boost::get<>(boost::array &) index out of range" ); + return arr[Idx]; } - // Const version. - template - const T(&get_c_array(const std::array& arg))[N] + template + BOOST_CONSTEXPR const T &get(const boost::array &arr) BOOST_NOEXCEPT { - return static_cast(arg); + BOOST_STATIC_ASSERT_MSG ( Idx < N, "boost::get<>(const boost::array &) index out of range" ); + return arr[Idx]; } -#endif - template std::size_t hash_range(It, It); + template + BOOST_CXX14_CONSTEXPR array to_array( T const (&a)[ N ] ) + { + array r = {}; + + for( std::size_t i = 0; i < N; ++i ) + { + r[ i ] = a[ i ]; + } + + return r; + } + +#if !defined(BOOST_NO_CXX11_RVALUE_REFERENCES) template - std::size_t hash_value(const array& arr) + BOOST_CXX14_CONSTEXPR array to_array( T (&&a)[ N ] ) { - return boost::hash_range(arr.begin(), arr.end()); + array r = {}; + + for( std::size_t i = 0; i < N; ++i ) + { + r[ i ] = std::move( a[ i ] ); + } + + return r; } - template - T &get(boost::array &arr) BOOST_NOEXCEPT { - BOOST_STATIC_ASSERT_MSG ( Idx < N, "boost::get<>(boost::array &) index out of range" ); - return arr[Idx]; - } + template + BOOST_CXX14_CONSTEXPR array to_array( T const (&&a)[ N ] ) + { + array r = {}; - template - const T &get(const boost::array &arr) BOOST_NOEXCEPT { - BOOST_STATIC_ASSERT_MSG ( Idx < N, "boost::get<>(const boost::array &) index out of range" ); - return arr[Idx]; - } + for( std::size_t i = 0; i < N; ++i ) + { + r[ i ] = a[ i ]; + } + + return r; + } + +#endif } /* namespace boost */ @@ -436,12 +493,14 @@ namespace boost { // If we don't have std::array, I'm assuming that we don't have std::get namespace std { template + BOOST_DEPRECATED( "please use `boost::get` instead" ) T &get(boost::array &arr) BOOST_NOEXCEPT { BOOST_STATIC_ASSERT_MSG ( Idx < N, "std::get<>(boost::array &) index out of range" ); return arr[Idx]; } template + BOOST_DEPRECATED( "please use `boost::get` instead" ) const T &get(const boost::array &arr) BOOST_NOEXCEPT { BOOST_STATIC_ASSERT_MSG ( Idx < N, "std::get<>(const boost::array &) index out of range" ); return arr[Idx]; @@ -453,4 +512,4 @@ namespace std { # pragma warning(pop) #endif -#endif /*BOOST_ARRAY_HPP*/ +#endif // #ifndef BOOST_ARRAY_HPP_INCLUDED diff --git a/contrib/restricted/boost/array/ya.make b/contrib/restricted/boost/array/ya.make index 36c56a138b4d..f09d0a5b3ac9 100644 --- a/contrib/restricted/boost/array/ya.make +++ b/contrib/restricted/boost/array/ya.make @@ -6,14 +6,13 @@ LICENSE(BSL-1.0) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(1.87.0) +VERSION(1.88.0) -ORIGINAL_SOURCE(https://github.com/boostorg/array/archive/boost-1.87.0.tar.gz) +ORIGINAL_SOURCE(https://github.com/boostorg/array/archive/boost-1.88.0.tar.gz) PEERDIR( contrib/restricted/boost/assert contrib/restricted/boost/config - contrib/restricted/boost/core contrib/restricted/boost/static_assert contrib/restricted/boost/throw_exception ) From ca377fd4336db2e4e53c1cd32160cca95766d213 Mon Sep 17 00:00:00 2001 From: osidorkin Date: Tue, 6 May 2025 13:01:56 +0300 Subject: [PATCH 04/24] Trivial: Add reserves commit_hash:e320e188c0837ac9069763983542f12a7952aa63 --- .../client/chaos_client/replication_card_serialization.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt/yt/client/chaos_client/replication_card_serialization.cpp b/yt/yt/client/chaos_client/replication_card_serialization.cpp index ebb1f4975ef7..cd7a4cf6eeae 100644 --- a/yt/yt/client/chaos_client/replication_card_serialization.cpp +++ b/yt/yt/client/chaos_client/replication_card_serialization.cpp @@ -400,17 +400,21 @@ void ToProto( const TReplicationCard& replicationCard, const TReplicationCardFetchOptions& options) { + protoReplicationCard->mutable_replicas()->Reserve(replicationCard.Replicas.size()); for (const auto& [replicaId, replicaInfo] : SortHashMapByKeys(replicationCard.Replicas)) { auto* protoReplicaEntry = protoReplicationCard->add_replicas(); ToProto(protoReplicaEntry->mutable_id(), replicaId); ToProto(protoReplicaEntry->mutable_info(), replicaInfo, options); } + if (options.IncludeCoordinators) { ToProto(protoReplicationCard->mutable_coordinator_cell_ids(), replicationCard.CoordinatorCellIds); } + if (options.IncludeReplicatedTableOptions && replicationCard.ReplicatedTableOptions) { protoReplicationCard->set_replicated_table_options(ConvertToYsonString(replicationCard.ReplicatedTableOptions).ToString()); } + protoReplicationCard->set_era(replicationCard.Era); ToProto(protoReplicationCard->mutable_table_id(), replicationCard.TableId); protoReplicationCard->set_table_path(replicationCard.TablePath); @@ -421,11 +425,13 @@ void ToProto( void FromProto(TReplicationCard* replicationCard, const NChaosClient::NProto::TReplicationCard& protoReplicationCard) { + replicationCard->Replicas.reserve(protoReplicationCard.replicas().size()); for (const auto& protoEntry : protoReplicationCard.replicas()) { auto replicaId = FromProto(protoEntry.id()); auto& replicaInfo = EmplaceOrCrash(replicationCard->Replicas, replicaId, TReplicaInfo())->second; FromProto(&replicaInfo, protoEntry.info()); } + FromProto(&replicationCard->CoordinatorCellIds, protoReplicationCard.coordinator_cell_ids()); replicationCard->Era = protoReplicationCard.era(); replicationCard->TableId = FromProto(protoReplicationCard.table_id()); @@ -435,6 +441,7 @@ void FromProto(TReplicationCard* replicationCard, const NChaosClient::NProto::TR if (protoReplicationCard.has_replicated_table_options()) { replicationCard->ReplicatedTableOptions = ConvertTo(TYsonString(protoReplicationCard.replicated_table_options())); } + if (protoReplicationCard.has_replication_card_collocation_id()) { FromProto(&replicationCard->ReplicationCardCollocationId, protoReplicationCard.replication_card_collocation_id()); } From 0ba803a734b1c0a6c0f79beff16668302c34f3d1 Mon Sep 17 00:00:00 2001 From: atarasov5 Date: Tue, 6 May 2025 15:01:42 +0300 Subject: [PATCH 05/24] YQL-19767: Introduce MKQL allocator address sanitizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Здесь поддержал только out of bounds access и use after free. Отложенное использование памяти и тд буду делать потом commit_hash:2a3fd472b626762ff7c8b7b0bc1285af50c511cf --- yql/essentials/minikql/aligned_page_pool.cpp | 96 ++++----- yql/essentials/minikql/aligned_page_pool.h | 18 +- .../minikql/aligned_page_pool_ut.cpp | 189 ++++++++++-------- yql/essentials/minikql/asan_utils.h | 91 +++++++++ .../minikql/comp_nodes/mkql_grace_join.cpp | 2 + .../comp_nodes/ut/mkql_wide_combine_ut.cpp | 3 + yql/essentials/minikql/compact_hash.h | 9 +- .../minikql/gtest_ut/allocator_ut.cpp | 168 ++++++++++++++++ yql/essentials/minikql/gtest_ut/ya.make | 17 ++ yql/essentials/minikql/mkql_alloc.cpp | 35 +++- yql/essentials/minikql/mkql_alloc.h | 78 ++++++-- yql/essentials/minikql/mkql_alloc_ut.cpp | 8 +- yql/essentials/minikql/mkql_mem_info.cpp | 7 +- yql/essentials/minikql/mkql_string_util.cpp | 74 ++++--- yql/essentials/minikql/ya.make | 1 + .../parser/pg_wrapper/comp_factory.cpp | 11 +- yql/essentials/public/udf/udf_string.h | 21 +- .../udfs/common/datetime2/datetime_udf.cpp | 27 ++- yt/yql/providers/yt/codec/yt_codec.cpp | 2 +- 19 files changed, 633 insertions(+), 224 deletions(-) create mode 100644 yql/essentials/minikql/asan_utils.h create mode 100644 yql/essentials/minikql/gtest_ut/allocator_ut.cpp create mode 100644 yql/essentials/minikql/gtest_ut/ya.make diff --git a/yql/essentials/minikql/aligned_page_pool.cpp b/yql/essentials/minikql/aligned_page_pool.cpp index e061c08574d8..f98065f20e5c 100644 --- a/yql/essentials/minikql/aligned_page_pool.cpp +++ b/yql/essentials/minikql/aligned_page_pool.cpp @@ -10,6 +10,8 @@ #include #include +#include + #if defined(_win_) # include #elif defined(_unix_) @@ -76,6 +78,7 @@ class TGlobalPagePool { void *page = nullptr; if (Pages.Dequeue(&page)) { --Count; + SanitizerMarkInvalid(page, PageSize); return page; } @@ -101,13 +104,14 @@ class TGlobalPagePool { FreePage(addr); return GetPageSize(); } - + SanitizerMarkInvalid(addr, PageSize); ++Count; Pages.Enqueue(addr); return 0; } void FreePage(void* addr) { + SanitizerMarkInvalid(addr, PageSize); auto res = T::Munmap(addr, PageSize); Y_DEBUG_ABORT_UNLESS(0 == res, "Madvise failed: %s", LastSystemErrorText()); } @@ -142,6 +146,7 @@ class TGlobalPools { Y_DEBUG_ABORT_UNLESS(!TAlignedPagePoolImpl::IsDefaultAllocatorUsed(), "No memory maps allowed while using default allocator"); void* res = T::Mmap(size); + SanitizerMarkInvalid(res, size); TotalMmappedBytes += size; return res; } @@ -289,38 +294,15 @@ inline int TSystemMmap::Munmap(void* addr, size_t size) } #endif -std::function TFakeAlignedMmap::OnMmap = {}; -std::function TFakeAlignedMmap::OnMunmap = {}; - -void* TFakeAlignedMmap::Mmap(size_t size) -{ - if (OnMmap) { - OnMmap(size); - } - return reinterpret_cast(TAlignedPagePool::POOL_PAGE_SIZE); -} - -int TFakeAlignedMmap::Munmap(void* addr, size_t size) -{ - if (OnMunmap) { - OnMunmap(addr, size); - } - return 0; -} - -std::function TFakeUnalignedMmap::OnMmap = {}; -std::function TFakeUnalignedMmap::OnMunmap = {}; +std::function TFakeMmap::OnMmap = {}; +std::function TFakeMmap::OnMunmap = {}; -void* TFakeUnalignedMmap::Mmap(size_t size) -{ - if (OnMmap) { - OnMmap(size); - } - return reinterpret_cast(TAlignedPagePool::POOL_PAGE_SIZE+1); +void* TFakeMmap::Mmap(size_t size) { + Y_DEBUG_ABORT_UNLESS(OnMmap, "mmap function must be provided"); + return OnMmap(size); } -int TFakeUnalignedMmap::Munmap(void* addr, size_t size) -{ +int TFakeMmap::Munmap(void* addr, size_t size) { if (OnMunmap) { OnMunmap(addr, size); } @@ -436,7 +418,7 @@ void TAlignedPagePoolImpl::OffloadFree(ui64 size) noexcept { } template -void* TAlignedPagePoolImpl::GetPage() { +void* TAlignedPagePoolImpl::GetPageImpl() { ++PageAllocCount; if (!FreePages.empty()) { ++PageHitCount; @@ -479,6 +461,13 @@ void* TAlignedPagePoolImpl::GetPage() { return res; } +template +void* TAlignedPagePoolImpl::GetPage() { + auto* page = GetPageImpl(); + SanitizerMarkInvalid(page, POOL_PAGE_SIZE); + return page; +}; + template void TAlignedPagePoolImpl::ReturnPage(void* addr) noexcept { if (Y_UNLIKELY(IsDefaultAllocatorUsed())) { @@ -487,6 +476,7 @@ void TAlignedPagePoolImpl::ReturnPage(void* addr) noexcept { return; } + SanitizerMarkInvalid(addr, POOL_PAGE_SIZE); Y_DEBUG_ABORT_UNLESS(AllPages.find(addr) != AllPages.end()); FreePages.emplace(addr); } @@ -504,14 +494,9 @@ void* TAlignedPagePoolImpl::GetBlock(size_t size) { return ret; } - - if (size == POOL_PAGE_SIZE) { - return GetPage(); - } else { - const auto ptr = Alloc(size); - Y_DEBUG_ABORT_UNLESS(ActiveBlocks.emplace(ptr, size).second); - return ptr; - } + auto* block = GetBlockImpl(size); + SanitizerMarkInvalid(block, size); + return block; } template @@ -681,6 +666,17 @@ bool TAlignedPagePoolImpl::TryIncreaseLimit(ui64 required) { return Limit >= required; } +template +void* TAlignedPagePoolImpl::GetBlockImpl(size_t size) { + if (size == POOL_PAGE_SIZE) { + return GetPage(); + } else { + const auto ptr = Alloc(size); + Y_DEBUG_ABORT_UNLESS(ActiveBlocks.emplace(ptr, size).second); + return ptr; + } +} + template ui64 TAlignedPagePoolImpl::GetGlobalPagePoolSize() { ui64 size = 0; @@ -713,8 +709,7 @@ bool TAlignedPagePoolImpl::IsDefaultAllocatorUsed() { #endif template class TAlignedPagePoolImpl<>; -template class TAlignedPagePoolImpl; -template class TAlignedPagePoolImpl; +template class TAlignedPagePoolImpl; template void* GetAlignedPage(ui64 size) { @@ -809,7 +804,6 @@ void ReleaseAlignedPage(void* mem, ui64 size) { TGlobalPools::Instance().PushPage(level, mem); return; } - TGlobalPools::Instance().DoMunmap(mem, size); } @@ -829,28 +823,22 @@ i64 GetTotalFreeListBytes() { } template i64 GetTotalMmapedBytes<>(); -template i64 GetTotalMmapedBytes(); -template i64 GetTotalMmapedBytes(); +template i64 GetTotalMmapedBytes(); template i64 GetTotalFreeListBytes<>(); -template i64 GetTotalFreeListBytes(); -template i64 GetTotalFreeListBytes(); +template i64 GetTotalFreeListBytes(); template void* GetAlignedPage<>(ui64); -template void* GetAlignedPage(ui64); -template void* GetAlignedPage(ui64); +template void* GetAlignedPage(ui64); template void* GetAlignedPage<>(); -template void* GetAlignedPage(); -template void* GetAlignedPage(); +template void* GetAlignedPage(); template void ReleaseAlignedPage<>(void*,ui64); -template void ReleaseAlignedPage(void*,ui64); -template void ReleaseAlignedPage(void*,ui64); +template void ReleaseAlignedPage(void*,ui64); template void ReleaseAlignedPage<>(void*); -template void ReleaseAlignedPage(void*); -template void ReleaseAlignedPage(void*); +template void ReleaseAlignedPage(void*); size_t GetMemoryMapsCount() { size_t lineCount = 0; diff --git a/yql/essentials/minikql/aligned_page_pool.h b/yql/essentials/minikql/aligned_page_pool.h index bc570528c97b..6028878f5a91 100644 --- a/yql/essentials/minikql/aligned_page_pool.h +++ b/yql/essentials/minikql/aligned_page_pool.h @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include @@ -50,18 +52,9 @@ class TSystemMmap { static int Munmap(void* addr, size_t size); }; -class TFakeAlignedMmap { +class TFakeMmap { public: - static std::function OnMmap; - static std::function OnMunmap; - - static void* Mmap(size_t size); - static int Munmap(void* addr, size_t size); -}; - -class TFakeUnalignedMmap { -public: - static std::function OnMmap; + static std::function OnMmap; static std::function OnMunmap; static void* Mmap(size_t size); @@ -261,6 +254,9 @@ class TAlignedPagePoolImpl { bool TryIncreaseLimit(ui64 required); + void* GetBlockImpl(size_t size); + + void* GetPageImpl(); protected: std::stack> FreePages; std::unordered_set AllPages; diff --git a/yql/essentials/minikql/aligned_page_pool_ut.cpp b/yql/essentials/minikql/aligned_page_pool_ut.cpp index b917176dd12d..d586d002e9e0 100644 --- a/yql/essentials/minikql/aligned_page_pool_ut.cpp +++ b/yql/essentials/minikql/aligned_page_pool_ut.cpp @@ -3,139 +3,156 @@ #include #include +#include namespace NKikimr { namespace NMiniKQL { -Y_UNIT_TEST_SUITE(TAlignedPagePoolTest) { - -Y_UNIT_TEST(AlignedMmapPageSize) { - TAlignedPagePool::ResetGlobalsUT(); - TAlignedPagePoolImpl alloc(__LOCATION__); +namespace { +class TScopedMemoryMapper { +public: + static constexpr size_t EXTRA_SPACE_FOR_UNALIGNMENT = 1; - int munmaps = 0; - TFakeAlignedMmap::OnMunmap = [&](void* addr, size_t s) { - Y_UNUSED(addr); - Y_UNUSED(s); - munmaps ++; + struct TUnmapEntry { + void* Addr; + size_t Size; + bool operator==(const TUnmapEntry& rhs) { + return std::tie(Addr, Size) == std::tie(rhs.Addr, rhs.Size); + } }; + TScopedMemoryMapper(bool aligned) { + Aligned_ = aligned; + TFakeMmap::OnMunmap = [this](void* addr, size_t s) { + Munmaps_.push_back({addr, s}); + }; + + TFakeMmap::OnMmap = [this](size_t size) -> void* { + // Allocate more memory to ensure we have enough space for alignment + Storage_ = THolder(new char[AlignUp(size + EXTRA_SPACE_FOR_UNALIGNMENT, TAlignedPagePool::POOL_PAGE_SIZE)]); + UNIT_ASSERT(Storage_.Get()); + + // Force TFakeMmap::Munmap to be called by returning a pointer that will always need adjustment + if (Aligned_) { + return PointerToAlignedMemory(); + } else { + // Ensure the pointer is always unaligned by a fixed amount + void* ptr = PointerToAlignedMemory(); + // Add EXTRA_SPACE_FOR_UNALIGNMENT to ensure it's unaligned + return static_cast(static_cast(ptr) + EXTRA_SPACE_FOR_UNALIGNMENT); + } + }; + } + + ~TScopedMemoryMapper() { + TFakeMmap::OnMunmap = {}; + TFakeMmap::OnMmap = {}; + Storage_.Reset(); + } + + void* PointerToAlignedMemory() { + return AlignUp(Storage_.Get(), TAlignedPagePool::POOL_PAGE_SIZE); + } + + size_t MunmapsSize() { + return Munmaps_.size(); + } + + TUnmapEntry Munmaps(size_t i) { + return Munmaps_[i]; + } + +private: + THolder Storage_; + std::vector Munmaps_; + bool Aligned_; +}; + +}; // namespace + +Y_UNIT_TEST_SUITE(TAlignedPagePoolTest) { + +Y_UNIT_TEST(AlignedMmapPageSize) { + TAlignedPagePoolImpl::ResetGlobalsUT(); + TAlignedPagePoolImpl alloc(__LOCATION__); + TScopedMemoryMapper mmapper(/*aligned=*/true); auto size = TAlignedPagePool::POOL_PAGE_SIZE; auto block = std::shared_ptr(alloc.GetBlock(size), [&](void* addr) { alloc.ReturnBlock(addr, size); }); - TFakeAlignedMmap::OnMunmap = {}; - UNIT_ASSERT_EQUAL(0, munmaps); + UNIT_ASSERT_EQUAL(0u, mmapper.MunmapsSize()); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(block.get()), TAlignedPagePool::POOL_PAGE_SIZE); + UNIT_ASSERT_VALUES_EQUAL(block.get(), mmapper.PointerToAlignedMemory()); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount() - , TAlignedPagePool::ALLOC_AHEAD_PAGES); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount(), TAlignedPagePool::ALLOC_AHEAD_PAGES); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated() - , TAlignedPagePool::POOL_PAGE_SIZE + TAlignedPagePool::ALLOC_AHEAD_PAGES*TAlignedPagePool::POOL_PAGE_SIZE - ); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated(), TAlignedPagePool::POOL_PAGE_SIZE + TAlignedPagePool::ALLOC_AHEAD_PAGES * TAlignedPagePool::POOL_PAGE_SIZE); } Y_UNIT_TEST(UnalignedMmapPageSize) { - TAlignedPagePool::ResetGlobalsUT(); - TAlignedPagePoolImpl alloc(__LOCATION__); - - int munmaps = 0; - TFakeUnalignedMmap::OnMunmap = [&](void* addr, size_t s) { - Y_UNUSED(addr); - if (munmaps == 0) { - UNIT_ASSERT_VALUES_EQUAL(s, TAlignedPagePool::POOL_PAGE_SIZE - 1); - } else { - UNIT_ASSERT_VALUES_EQUAL(s, 1); - } - munmaps ++; - }; + TAlignedPagePoolImpl::ResetGlobalsUT(); + TAlignedPagePoolImpl alloc(__LOCATION__); + TScopedMemoryMapper mmapper(/*aligned=*/false); auto size = TAlignedPagePool::POOL_PAGE_SIZE; auto block = std::shared_ptr(alloc.GetBlock(size), [&](void* addr) { alloc.ReturnBlock(addr, size); }); - TFakeUnalignedMmap::OnMunmap = {}; - UNIT_ASSERT_EQUAL(2, munmaps); + UNIT_ASSERT_EQUAL(2, mmapper.MunmapsSize()); + UNIT_ASSERT_EQUAL(TAlignedPagePool::POOL_PAGE_SIZE - TScopedMemoryMapper::EXTRA_SPACE_FOR_UNALIGNMENT, mmapper.Munmaps(0).Size); + UNIT_ASSERT_EQUAL(TScopedMemoryMapper::EXTRA_SPACE_FOR_UNALIGNMENT, mmapper.Munmaps(1).Size); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(block.get()), 2 * TAlignedPagePool::POOL_PAGE_SIZE); + UNIT_ASSERT_VALUES_EQUAL(block.get(), (char*)mmapper.PointerToAlignedMemory() + TAlignedPagePool::POOL_PAGE_SIZE); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount() - , TAlignedPagePool::ALLOC_AHEAD_PAGES - 1); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount(), TAlignedPagePool::ALLOC_AHEAD_PAGES - 1); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated() - , TAlignedPagePool::POOL_PAGE_SIZE + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 1) * TAlignedPagePool::POOL_PAGE_SIZE - ); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated(), TAlignedPagePool::POOL_PAGE_SIZE + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 1) * TAlignedPagePool::POOL_PAGE_SIZE); } Y_UNIT_TEST(AlignedMmapUnalignedSize) { - TAlignedPagePool::ResetGlobalsUT(); - TAlignedPagePoolImpl alloc(__LOCATION__); + TAlignedPagePoolImpl::ResetGlobalsUT(); + TAlignedPagePoolImpl alloc(__LOCATION__); auto smallSize = NSystemInfo::GetPageSize(); auto size = smallSize + 1024 * TAlignedPagePool::POOL_PAGE_SIZE; - - int munmaps = 0; - TFakeAlignedMmap::OnMunmap = [&](void* addr, size_t s) { - if (munmaps == 0) { - UNIT_ASSERT_VALUES_EQUAL(s, TAlignedPagePool::POOL_PAGE_SIZE - smallSize); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(addr), TAlignedPagePool::POOL_PAGE_SIZE + size); - } else { - UNIT_ASSERT_VALUES_EQUAL(s, smallSize); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(addr), TAlignedPagePool::POOL_PAGE_SIZE + TAlignedPagePool::ALLOC_AHEAD_PAGES * TAlignedPagePool::POOL_PAGE_SIZE + size - smallSize); - } - - munmaps ++; - }; + TScopedMemoryMapper mmapper(/*aligned=*/true); auto block = std::shared_ptr(alloc.GetBlock(size), [&](void* addr) { alloc.ReturnBlock(addr, size); }); - TFakeAlignedMmap::OnMunmap = {}; - UNIT_ASSERT_EQUAL(2, munmaps); + UNIT_ASSERT_EQUAL(2, mmapper.MunmapsSize()); + auto expected0 = (TScopedMemoryMapper::TUnmapEntry{(char*)mmapper.PointerToAlignedMemory() + size, TAlignedPagePool::POOL_PAGE_SIZE - smallSize}); + UNIT_ASSERT_EQUAL(expected0, mmapper.Munmaps(0)); + auto expected1 = TScopedMemoryMapper::TUnmapEntry{ + (char*)mmapper.PointerToAlignedMemory() + TAlignedPagePool::ALLOC_AHEAD_PAGES * TAlignedPagePool::POOL_PAGE_SIZE + size - smallSize, + smallSize}; + UNIT_ASSERT_EQUAL(expected1, mmapper.Munmaps(1)); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(block.get()), TAlignedPagePool::POOL_PAGE_SIZE); + UNIT_ASSERT_VALUES_EQUAL(block.get(), mmapper.PointerToAlignedMemory()); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount() - , TAlignedPagePool::ALLOC_AHEAD_PAGES - 1); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount(), TAlignedPagePool::ALLOC_AHEAD_PAGES - 1); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated() - , size + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 1) * TAlignedPagePool::POOL_PAGE_SIZE - ); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated(), size + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 1) * TAlignedPagePool::POOL_PAGE_SIZE); } Y_UNIT_TEST(UnalignedMmapUnalignedSize) { - TAlignedPagePool::ResetGlobalsUT(); - TAlignedPagePoolImpl alloc(__LOCATION__); + TAlignedPagePoolImpl::ResetGlobalsUT(); + TAlignedPagePoolImpl alloc(__LOCATION__); auto smallSize = NSystemInfo::GetPageSize(); auto size = smallSize + 1024 * TAlignedPagePool::POOL_PAGE_SIZE; - int munmaps = 0; - TFakeUnalignedMmap::OnMunmap = [&](void* addr, size_t s) { - Y_UNUSED(addr); - if (munmaps == 0) { - UNIT_ASSERT_VALUES_EQUAL(s, TAlignedPagePool::POOL_PAGE_SIZE - 1); - } else if (munmaps == 1) { - UNIT_ASSERT_VALUES_EQUAL(s, TAlignedPagePool::POOL_PAGE_SIZE - smallSize); - } else { - UNIT_ASSERT_VALUES_EQUAL(s, smallSize + 1); - } - munmaps ++; - }; - + TScopedMemoryMapper mmapper(/*aligned=*/false); auto block = std::shared_ptr(alloc.GetBlock(size), [&](void* addr) { alloc.ReturnBlock(addr, size); }); - TFakeUnalignedMmap::OnMunmap = {}; - UNIT_ASSERT_EQUAL(3, munmaps); + UNIT_ASSERT_EQUAL(3, mmapper.MunmapsSize()); + UNIT_ASSERT_EQUAL(TAlignedPagePool::POOL_PAGE_SIZE - TScopedMemoryMapper::EXTRA_SPACE_FOR_UNALIGNMENT, mmapper.Munmaps(0).Size); + UNIT_ASSERT_EQUAL(TAlignedPagePool::POOL_PAGE_SIZE - smallSize, mmapper.Munmaps(1).Size); + UNIT_ASSERT_EQUAL(smallSize + 1, mmapper.Munmaps(2).Size); - UNIT_ASSERT_VALUES_EQUAL(reinterpret_cast(block.get()), 2 * TAlignedPagePool::POOL_PAGE_SIZE); + UNIT_ASSERT_VALUES_EQUAL(block.get(), (char*)mmapper.PointerToAlignedMemory() + TAlignedPagePool::POOL_PAGE_SIZE); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount() - , TAlignedPagePool::ALLOC_AHEAD_PAGES - 2); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetFreePageCount(), TAlignedPagePool::ALLOC_AHEAD_PAGES - 2); - UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated() - , size + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 2) * TAlignedPagePool::POOL_PAGE_SIZE - ); + UNIT_ASSERT_VALUES_EQUAL(alloc.GetAllocated(), size + (TAlignedPagePool::ALLOC_AHEAD_PAGES - 2) * TAlignedPagePool::POOL_PAGE_SIZE); } Y_UNIT_TEST(YellowZoneSwitchesCorrectlyBlock) { TAlignedPagePool::ResetGlobalsUT(); TAlignedPagePoolImpl alloc(__LOCATION__); - // choose relatively big chunk so ALLOC_AHEAD_PAGES don't affect the correctness of the test + // choose relatively big chunk so ALLOC_AHEAD_PAGES don't affect the correctness of the test auto size = 1024 * TAlignedPagePool::POOL_PAGE_SIZE; alloc.SetLimit(size * 10); diff --git a/yql/essentials/minikql/asan_utils.h b/yql/essentials/minikql/asan_utils.h new file mode 100644 index 000000000000..b404be7b6350 --- /dev/null +++ b/yql/essentials/minikql/asan_utils.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include + +#if defined(_asan_enabled_) + #include +#endif + +namespace NKikimr { + +inline constexpr size_t ALLOCATION_REDZONE_SIZE = 16; +inline constexpr size_t ASAN_EXTRA_ALLOCATION_SPACE = ALLOCATION_REDZONE_SIZE * 2; + +constexpr void* SanitizerMarkInvalid(void* addr, size_t size) { +#if defined(_asan_enabled_) + if (addr == nullptr) { + return nullptr; + } + __asan_poison_memory_region(addr, size); +#else // defined(_asan_enabled_) + Y_UNUSED(addr, size); +#endif + return addr; +} + +constexpr void* SanitizerMarkValid(void* addr, size_t size) { +#if defined(_asan_enabled_) + if (addr == nullptr) { + return nullptr; + } + __asan_unpoison_memory_region(addr, size); +#else // defined(_asan_enabled_) + Y_UNUSED(addr, size); +#endif + return addr; +} + +constexpr size_t GetSizeToAlloc(size_t size) { +#if defined(_asan_enabled_) + if (size == 0) { + return 0; + } + return size + 2 * ALLOCATION_REDZONE_SIZE; +#else // defined(_asan_enabled_) + return size; +#endif +} + +constexpr const void* GetOriginalAllocatedObject(const void* ptr, size_t size) { +#if defined(_asan_enabled_) + if (size == 0) { + return ptr; + } + return (char*)ptr - ALLOCATION_REDZONE_SIZE; +#else // defined(_asan_enabled_) + Y_UNUSED(size); + return ptr; +#endif +} + +constexpr void* WrapPointerWithRedZones(void* ptr, size_t extendedSizeWithRedzone) { +#if defined(_asan_enabled_) + if (extendedSizeWithRedzone == 0) { + return ptr; + } + SanitizerMarkInvalid(ptr, extendedSizeWithRedzone); + SanitizerMarkValid((char*)ptr + ALLOCATION_REDZONE_SIZE, extendedSizeWithRedzone - 2 * ALLOCATION_REDZONE_SIZE); + return (char*)ptr + ALLOCATION_REDZONE_SIZE; +#else // defined(_asan_enabled_) + Y_UNUSED(extendedSizeWithRedzone); + return ptr; +#endif +} + +constexpr const void* UnwrapPointerWithRedZones(const void* ptr, size_t size) { +#if defined(_asan_enabled_) + if (size == 0) { + return ptr; + } + SanitizerMarkInvalid((char*)ptr - ALLOCATION_REDZONE_SIZE, 2 * ALLOCATION_REDZONE_SIZE + size); + return (char*)ptr - ALLOCATION_REDZONE_SIZE; +#else // defined(_asan_enabled_) + Y_UNUSED(size); + return ptr; +#endif +} + +} // namespace NKikimr diff --git a/yql/essentials/minikql/comp_nodes/mkql_grace_join.cpp b/yql/essentials/minikql/comp_nodes/mkql_grace_join.cpp index bb3199845260..1ed5ff6bb9d2 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_grace_join.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_grace_join.cpp @@ -202,6 +202,8 @@ void TGraceJoinPacker::Pack() { TuplesPacked++; std::fill(TupleIntVals.begin(), TupleIntVals.end(), 0); + std::fill(TupleStrings.begin(), TupleStrings.end(), nullptr); + std::fill(TupleStrSizes.begin(), TupleStrSizes.end(), 0); for (ui64 i = 0; i < ColumnsPackInfo.size(); i++) { diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_wide_combine_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_wide_combine_ut.cpp index 49e292d8eb9d..3affc77478a0 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_wide_combine_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_wide_combine_ut.cpp @@ -496,6 +496,8 @@ Y_UNIT_TEST_SUITE(TMiniKQLWideCombinerTest) { UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish); } +// Do not run under ASAN since memory limits is hard to track. Every allocation produce and tracks more memory than requested. +#if !defined(_asan_enabled_) Y_UNIT_TEST_LLVM(TestSkipYieldRespectsMemLimit) { TTestStreamParams params; params.StringSize = 50000; @@ -528,6 +530,7 @@ Y_UNIT_TEST_SUITE(TMiniKQLWideCombinerTest) { UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish); UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish); } +#endif // defined(_asan_enabled_) } Y_UNIT_TEST_SUITE(TMiniKQLWideCombinerPerfTest) { diff --git a/yql/essentials/minikql/compact_hash.h b/yql/essentials/minikql/compact_hash.h index ec810efe5bc2..3704907d9c3d 100644 --- a/yql/essentials/minikql/compact_hash.h +++ b/yql/essentials/minikql/compact_hash.h @@ -3,6 +3,7 @@ #include #include "aligned_page_pool.h" +#include "asan_utils.h" #include "primes.h" #include @@ -563,7 +564,7 @@ class TListPool: public TListPoolBase { } ui16 listCount = GetSmallPageCapacity(size); Y_ASSERT(listCount >= 2); - TListHeader* header = new (GetPagePool().GetPage()) TListHeader(SMALL_MARK, size, listCount); + TListHeader* header = new (SanitizerMarkValid(GetPagePool().GetPage(), TAlignedPagePool::POOL_PAGE_SIZE)) TListHeader(SMALL_MARK, size, listCount); pages.PushFront(&header->ListItem); return header; } @@ -580,14 +581,14 @@ class TListPool: public TListPoolBase { ui16 listCapacity = FastClp2(size); ui16 listCount = GetMediumPageCapacity(listCapacity); Y_ASSERT(listCount >= 2); - TListHeader* header = new (GetPagePool().GetPage()) TListHeader(MEDIUM_MARK, listCapacity, listCount); + TListHeader* header = new (SanitizerMarkValid(GetPagePool().GetPage(), TAlignedPagePool::POOL_PAGE_SIZE)) TListHeader(MEDIUM_MARK, listCapacity, listCount); pages.PushFront(&header->ListItem); return header; } template TLargeListHeader* GetLargeListPage() { - TLargeListHeader* const header = new (GetPagePool().GetPage()) TLargeListHeader(GetLargePageCapacity()); + TLargeListHeader* const header = new (SanitizerMarkValid(GetPagePool().GetPage(), TAlignedPagePool::POOL_PAGE_SIZE)) TLargeListHeader(GetLargePageCapacity()); return header; } @@ -1315,7 +1316,7 @@ class TCompactHashBase { void AllocateBuckets(size_t count) { auto bucketsMemory = Max(sizeof(TItemNode) * count, (size_t)TAlignedPagePool::POOL_PAGE_SIZE); - Buckets_ = (TItemNode*)GetPagePool().GetBlock(bucketsMemory); + Buckets_ = (TItemNode*)SanitizerMarkValid(GetPagePool().GetBlock(bucketsMemory), bucketsMemory); BucketsCount_ = count; BucketsMemory_ = bucketsMemory; for (size_t i = 0; i < count; ++i) { diff --git a/yql/essentials/minikql/gtest_ut/allocator_ut.cpp b/yql/essentials/minikql/gtest_ut/allocator_ut.cpp new file mode 100644 index 000000000000..f4ae5a029264 --- /dev/null +++ b/yql/essentials/minikql/gtest_ut/allocator_ut.cpp @@ -0,0 +1,168 @@ +#include + +#include + +#include + +namespace NKikimr::NMiniKQL { + +enum class EAllocatorType { + DefaultAllocator, + ArrowAllocator, + HugeAllocator, +}; + +class MemoryTest: public ::testing::TestWithParam> { +protected: + MemoryTest() + : ScopedAlloc_(__LOCATION__) { + } + + size_t AllocSize() const { + return static_cast(std::get<0>(GetParam())); + } + + EAllocatorType GetAllocatorType() const { + return std::get<1>(GetParam()); + } + + void* AllocateMemory(size_t size) const { + EAllocatorType allocatorType = GetAllocatorType(); + switch (allocatorType) { + case EAllocatorType::DefaultAllocator: + return TWithDefaultMiniKQLAlloc::AllocWithSize(size); + case EAllocatorType::ArrowAllocator: + return MKQLArrowAllocate(size); + case EAllocatorType::HugeAllocator: + return TMKQLHugeAllocator::allocate(size); + default: + return nullptr; // Should never reach here + } + } + + void Free(const void* mem, size_t size) const { + EAllocatorType allocatorType = GetAllocatorType(); + switch (allocatorType) { + case EAllocatorType::DefaultAllocator: + TWithDefaultMiniKQLAlloc::FreeWithSize(mem, size); + break; + case EAllocatorType::ArrowAllocator: + MKQLArrowFree(mem, size); + break; + case EAllocatorType::HugeAllocator: + TMKQLHugeAllocator::deallocate(const_cast(static_cast(mem)), size); + break; + default: + break; // Should never reach here + } + } + + void AccessMemory(volatile void* memory, ssize_t offset) const { + volatile char* ptr = static_cast(memory) + offset; + *ptr = 'A'; // Perform a basic write operation + } + +private: + TScopedAlloc ScopedAlloc_; +}; + +// Test naming function +std::string TestNameGenerator(const ::testing::TestParamInfo& info) { + int sizeNumber = std::get<0>(info.param); + EAllocatorType allocatorType = std::get<1>(info.param); + + + std::string allocatorName = [&] () { + switch (allocatorType) { + case EAllocatorType::DefaultAllocator: + return "DefaultAllocator"; + case EAllocatorType::ArrowAllocator: + return "ArrowAllocator"; + case EAllocatorType::HugeAllocator: + return "HugeAllocator"; + } + }(); + + return "Size" + std::to_string(sizeNumber) + "With" + allocatorName + "Allocator"; +} + +// Out of bounds access + use after free can be tested only with +// --sanitize=address. +#if defined(_asan_enabled_) +TEST_P(MemoryTest, AccessOutOfBounds) { + size_t allocationSize = AllocSize(); + + void* memory = AllocateMemory(allocationSize); + ASSERT_NE(memory, nullptr) << "Memory allocation failed."; + // Accessing valid memory. + ASSERT_NO_THROW({ + AccessMemory(memory, 0); + AccessMemory(memory, allocationSize - 1); + }); + + // Accessing invalid left memory. + EXPECT_DEATH({ AccessMemory(memory, -1); }, ""); + EXPECT_DEATH({ AccessMemory(memory, -8); }, ""); + EXPECT_DEATH({ AccessMemory(memory, -16); }, ""); + + // Accessing invalid right memory. + EXPECT_DEATH({ AccessMemory(memory, allocationSize); }, ""); + EXPECT_DEATH({ AccessMemory(memory, allocationSize + 6); }, ""); + EXPECT_DEATH({ AccessMemory(memory, allocationSize + 12); }, ""); + EXPECT_DEATH({ AccessMemory(memory, allocationSize + 15); }, ""); + + Free(memory, allocationSize); +} + +TEST_P(MemoryTest, AccessAfterFree) { + size_t allocationSize = AllocSize(); + void* memory = AllocateMemory(allocationSize); + void* memory2 = AllocateMemory(allocationSize); + ASSERT_NE(memory, nullptr) << "Memory allocation failed."; + Free(memory, allocationSize); + + // Access after free — should crash + EXPECT_DEATH({ AccessMemory(memory, 0); }, ""); + EXPECT_DEATH({ AccessMemory(memory, allocationSize / 2); }, ""); + EXPECT_DEATH({ AccessMemory(memory, allocationSize - 1); }, ""); + + Free(memory2, allocationSize); + // Access after free — should crash + EXPECT_DEATH({ AccessMemory(memory2, 0); }, ""); + EXPECT_DEATH({ AccessMemory(memory2, allocationSize / 2); }, ""); + EXPECT_DEATH({ AccessMemory(memory2, allocationSize - 1); }, ""); +} + +#endif // defined(_asan_enabled_) + +// Double free tracked only in DEBUG mode. +#ifndef NDEBUG +TEST_P(MemoryTest, DoubleFree) { + if (GetAllocatorType() == EAllocatorType::ArrowAllocator || GetAllocatorType() == EAllocatorType::HugeAllocator) { + GTEST_SKIP() << "Arrow and Huge allocators arae not instrumented yet to track double free."; + } + size_t allocationSize = AllocSize(); + + void* memory = AllocateMemory(allocationSize); + ASSERT_NE(memory, nullptr) << "Memory allocation failed."; + + Free(memory, allocationSize); + + // Attempting double free — should crash + EXPECT_DEATH({ Free(memory, allocationSize); }, ""); +} +#endif // NDEBUG + +// Allow empty tests for MSAN and other sanitizers. +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MemoryTest); + +INSTANTIATE_TEST_SUITE_P(MemoryTests, MemoryTest, + ::testing::Combine( + ::testing::Values(8, 64, 32 * 1024, 64 * 1024, 128 * 1024, 64 * 1024 * 1024), + ::testing::Values( + EAllocatorType::DefaultAllocator, + EAllocatorType::ArrowAllocator, + EAllocatorType::HugeAllocator)), + TestNameGenerator); + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/gtest_ut/ya.make b/yql/essentials/minikql/gtest_ut/ya.make new file mode 100644 index 000000000000..ed80e31b2084 --- /dev/null +++ b/yql/essentials/minikql/gtest_ut/ya.make @@ -0,0 +1,17 @@ +GTEST() + +PEERDIR( + yql/essentials/minikql + yql/essentials/minikql/invoke_builtins/llvm16 + yql/essentials/public/udf/service/exception_policy + contrib/libs/apache/arrow + yql/essentials/sql/pg_dummy +) + +SRC( + allocator_ut.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/yql/essentials/minikql/mkql_alloc.cpp b/yql/essentials/minikql/mkql_alloc.cpp index 27a963380e99..a8933de21494 100644 --- a/yql/essentials/minikql/mkql_alloc.cpp +++ b/yql/essentials/minikql/mkql_alloc.cpp @@ -1,6 +1,10 @@ #include "mkql_alloc.h" -#include + #include + +#include +#include + #include namespace NKikimr { @@ -204,6 +208,7 @@ void* MKQLAllocSlow(size_t sz, TAllocState* state, const EMemorySubPool mPool) { auto roundedSize = AlignUp(sz + sizeof(TAllocPageHeader), MKQL_ALIGNMENT); auto capacity = Max(ui64(TAlignedPagePool::POOL_PAGE_SIZE), roundedSize); auto currPage = (TAllocPageHeader*)state->GetBlock(capacity); + SanitizerMarkValid(currPage, sizeof(TAllocPageHeader)); currPage->Deallocated = 0; currPage->Capacity = capacity; currPage->Offset = roundedSize; @@ -239,6 +244,7 @@ void* TPagedArena::AllocSlow(const size_t sz, const EMemorySubPool mPool) { auto roundedSize = AlignUp(sz + sizeof(TAllocPageHeader), MKQL_ALIGNMENT); auto capacity = Max(ui64(TAlignedPagePool::POOL_PAGE_SIZE), roundedSize); currentPage = (TAllocPageHeader*)PagePool_->GetBlock(capacity); + SanitizerMarkValid(currentPage, sizeof(TAllocPageHeader)); currentPage->Capacity = capacity; void* ret = (char*)currentPage + sizeof(TAllocPageHeader); currentPage->Offset = roundedSize; @@ -267,7 +273,6 @@ void* MKQLArrowAllocateOnArena(ui64 size) { auto alignedSize = AlignUp(size, ArrowAlignment); auto& page = state->CurrentArrowPages; - if (Y_UNLIKELY(!page || page->Offset + alignedSize > page->Size)) { const auto pageSize = TAllocState::POOL_PAGE_SIZE; @@ -280,6 +285,7 @@ void* MKQLArrowAllocateOnArena(ui64 size) { } page = (TMkqlArrowHeader*)GetAlignedPage(); + SanitizerMarkValid(page, sizeof(TMkqlArrowHeader)); page->Offset = sizeof(TMkqlArrowHeader); page->Size = pageSize; page->UseCount = 1; @@ -295,11 +301,11 @@ void* MKQLArrowAllocateOnArena(ui64 size) { void* ptr = (ui8*)page + page->Offset; page->Offset += alignedSize; ++page->UseCount; - return ptr; } -void* MKQLArrowAllocate(ui64 size) { +namespace { +void* MKQLArrowAllocateImpl(ui64 size) { if (Y_LIKELY(!TAllocState::IsDefaultAllocatorUsed())) { if (size <= ArrowSizeForArena) { return MKQLArrowAllocateOnArena(size); @@ -324,6 +330,7 @@ void* MKQLArrowAllocate(ui64 size) { } auto* header = (TMkqlArrowHeader*)ptr; + SanitizerMarkValid(header, sizeof(TMkqlArrowHeader)); header->Offset = 0; header->UseCount = 0; @@ -337,6 +344,13 @@ void* MKQLArrowAllocate(ui64 size) { header->Size = size; return header + 1; } +} // namespace + +void* MKQLArrowAllocate(ui64 size) { + auto sizeWithRedzones = GetSizeToAlloc(size); + void* mem = MKQLArrowAllocateImpl(sizeWithRedzones); + return WrapPointerWithRedZones(mem, sizeWithRedzones); +} void* MKQLArrowReallocate(const void* mem, ui64 prevSize, ui64 size) { auto res = MKQLArrowAllocate(size); @@ -358,14 +372,15 @@ void MKQLArrowFreeOnArena(const void* ptr) { Y_ENSURE(it != state->ArrowBuffers.end()); state->ArrowBuffers.erase(it); } - + SanitizerMarkInvalid(page, sizeof(TMkqlArrowHeader)); ReleaseAlignedPage(page); } return; } -void MKQLArrowFree(const void* mem, ui64 size) { +namespace { +void MKQLArrowFreeImpl(const void* mem, ui64 size) { if (Y_LIKELY(!TAllocState::IsDefaultAllocatorUsed())) { if (size <= ArrowSizeForArena) { return MKQLArrowFreeOnArena(mem); @@ -393,8 +408,16 @@ void MKQLArrowFree(const void* mem, ui64 size) { ReleaseAlignedPage(header, fullSize); } +} // namespace + +void MKQLArrowFree(const void* mem, ui64 size) { + mem = UnwrapPointerWithRedZones(mem, size); + auto sizeWithRedzones = GetSizeToAlloc(size); + return MKQLArrowFreeImpl(mem, sizeWithRedzones); +} void MKQLArrowUntrack(const void* mem, ui64 size) { + mem = GetOriginalAllocatedObject(mem, size); TAllocState* state = TlsAllocState; Y_ENSURE(state); if (!state->EnableArrowTracking) { diff --git a/yql/essentials/minikql/mkql_alloc.h b/yql/essentials/minikql/mkql_alloc.h index 94a77471065b..7a7716dd6590 100644 --- a/yql/essentials/minikql/mkql_alloc.h +++ b/yql/essentials/minikql/mkql_alloc.h @@ -1,23 +1,26 @@ #pragma once + #include "aligned_page_pool.h" #include "mkql_mem_info.h" + #include +#include #include #include #include + #include #include #include #include -#include +#include + #include #include #include #include -namespace NKikimr { - -namespace NMiniKQL { +namespace NKikimr::NMiniKQL { const ui64 MKQL_ALIGNMENT = 16; @@ -116,7 +119,7 @@ struct TAllocState : public TAlignedPagePool explicit TAllocState(const TSourceLocation& location, const TAlignedPagePoolCounters& counters, bool supportsSizedAllocators); void KillAllBoxed(); void InvalidateMemInfo(); - size_t GetDeallocatedInPages() const; + Y_NO_SANITIZE("address") size_t GetDeallocatedInPages() const; static void CleanupPAllocList(TListEntry* root); static void CleanupArrowList(TListEntry* root); @@ -284,17 +287,22 @@ class TPagedArena { Clear(); } - void* Alloc(size_t sz, const EMemorySubPool pagePool = EMemorySubPool::Default) { + void* AllocImpl(size_t sz, const EMemorySubPool pagePool) { auto& currentPage = CurrentPages_[(TMemorySubPoolIdx)pagePool]; if (Y_LIKELY(currentPage->Offset + sz <= currentPage->Capacity)) { void* ret = (char*)currentPage + currentPage->Offset; currentPage->Offset = AlignUp(currentPage->Offset + sz, MKQL_ALIGNMENT); return ret; } - return AllocSlow(sz, pagePool); } + void* Alloc(size_t sz, const EMemorySubPool pagePool = EMemorySubPool::Default) { + sz = GetSizeToAlloc(sz); + void* mem = AllocImpl(sz, pagePool); + return WrapPointerWithRedZones(mem, sz); + } + void Clear() noexcept; private: @@ -344,7 +352,7 @@ inline void* MKQLAllocFastDeprecated(size_t sz, TAllocState* state, const EMemor return ret; } -inline void* MKQLAllocFastWithSize(size_t sz, TAllocState* state, const EMemorySubPool mPool, const TAllocLocation& location = TAllocLocation::current()) { +inline void* MKQLAllocFastWithSizeImpl(size_t sz, TAllocState* state, const EMemorySubPool mPool, const TAllocLocation& location) { #ifdef NDEBUG Y_UNUSED(location); #endif @@ -384,6 +392,12 @@ inline void* MKQLAllocFastWithSize(size_t sz, TAllocState* state, const EMemoryS return ret; } +inline void* MKQLAllocFastWithSize(size_t sz, TAllocState* state, const EMemorySubPool mPool, const TAllocLocation& location = TAllocLocation::current()) { + sz = GetSizeToAlloc(sz); + void* mem = MKQLAllocFastWithSizeImpl(sz, state, mPool, location); + return WrapPointerWithRedZones(mem, sz); +} + void MKQLFreeSlow(TAllocPageHeader* header, TAllocState *state, const EMemorySubPool mPool) noexcept; inline void MKQLFreeDeprecated(const void* mem, const EMemorySubPool mPool) noexcept { @@ -415,7 +429,7 @@ inline void MKQLFreeDeprecated(const void* mem, const EMemorySubPool mPool) noex MKQLFreeSlow(header, TlsAllocState, mPool); } -inline void MKQLFreeFastWithSize(const void* mem, size_t sz, TAllocState* state, const EMemorySubPool mPool) noexcept { +inline void MKQLFreeFastWithSizeImpl(const void* mem, size_t sz, TAllocState* state, const EMemorySubPool mPool) noexcept { if (!mem) { return; } @@ -436,18 +450,26 @@ inline void MKQLFreeFastWithSize(const void* mem, size_t sz, TAllocState* state, } TAllocPageHeader* header = (TAllocPageHeader*)TAllocState::GetPageStart(mem); - Y_DEBUG_ABORT_UNLESS(header->MyAlloc == state, "%s", (TStringBuilder() << "wrong allocator was used; " - "allocated with: " << header->MyAlloc->GetDebugInfo() << " freed with: " << TlsAllocState->GetDebugInfo()).data()); - if (Y_LIKELY(--header->UseCount != 0)) { - header->Deallocated += sz; - return; + { + Y_DEBUG_ABORT_UNLESS(header->MyAlloc == state, "Wrong allocator was used. Allocated with: %s, freed with: %s", + header->MyAlloc->GetDebugInfo().c_str(), TlsAllocState->GetDebugInfo().c_str()); + if (Y_LIKELY(--header->UseCount != 0)) { + header->Deallocated += sz; + return; + } } MKQLFreeSlow(header, state, mPool); } -inline void* MKQLAllocDeprecated(size_t sz, const EMemorySubPool mPool, const TAllocLocation& location = TAllocLocation::current()) { - return MKQLAllocFastDeprecated(sz, TlsAllocState, mPool, location); +inline void MKQLFreeFastWithSize(const void* mem, size_t sz, TAllocState* state, const EMemorySubPool mPool) noexcept { + mem = UnwrapPointerWithRedZones(mem, sz); + sz = GetSizeToAlloc(sz); + return MKQLFreeFastWithSizeImpl(mem, sz, state, mPool); +} + +inline void* MKQLAllocDeprecated(size_t sz, const EMemorySubPool mPool) { + return MKQLAllocFastDeprecated(sz, TlsAllocState, mPool); } inline void* MKQLAllocWithSize(size_t sz, const EMemorySubPool mPool, const TAllocLocation& location = TAllocLocation::current()) { @@ -568,17 +590,31 @@ struct TMKQLHugeAllocator template bool operator==(const TMKQLHugeAllocator&) const { return true; } template bool operator!=(const TMKQLHugeAllocator&) const { return false; } - static pointer allocate(size_type n, const void* = nullptr) + static pointer allocateImpl(size_type n, const void* = nullptr) { size_t size = Max(n * sizeof(value_type), TAllocState::POOL_PAGE_SIZE); return static_cast(TlsAllocState->GetBlock(size)); } - static void deallocate(const_pointer p, size_type n) noexcept + static pointer allocate(size_type n, const void* = nullptr) + { + n = GetSizeToAlloc(n); + void* mem = allocateImpl(n); + return static_cast(WrapPointerWithRedZones(mem, n)); + } + + static void deallocateImpl(const_pointer p, size_type n) noexcept { size_t size = Max(n * sizeof(value_type), TAllocState::POOL_PAGE_SIZE); TlsAllocState->ReturnBlock(const_cast(p), size); } + + static void deallocate(const_pointer p, size_type n) noexcept + { + p = static_cast(UnwrapPointerWithRedZones(p, n)); + n = GetSizeToAlloc(n); + return deallocateImpl(p, n); + } }; template @@ -611,7 +647,7 @@ class TPagedList return; } - auto ptr = Pool.GetPage(); + auto ptr = SanitizerMarkValid(Pool.GetPage(), TAlignedPagePool::POOL_PAGE_SIZE); IndexInLastPage = 1; Pages.push_back(ptr); new(ptr) T(std::move(value)); @@ -808,6 +844,4 @@ inline void TBoxedValueWithFree::operator delete(void *mem) noexcept { MKQLFreeWithSize(mem, size, EMemorySubPool::Default); } -} // NMiniKQL - -} // NKikimr +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/mkql_alloc_ut.cpp b/yql/essentials/minikql/mkql_alloc_ut.cpp index 9dfd726e7ad6..c1636375c357 100644 --- a/yql/essentials/minikql/mkql_alloc_ut.cpp +++ b/yql/essentials/minikql/mkql_alloc_ut.cpp @@ -31,6 +31,12 @@ Y_UNIT_TEST_SUITE(TMiniKQLAllocTest) { Y_UNIT_TEST(TestDeallocated) { TScopedAlloc alloc(__LOCATION__); +#if defined(_asan_enabled_) + constexpr size_t EXTRA_ALLOCATION_SPACE = ASAN_EXTRA_ALLOCATION_SPACE; +#else // defined(_asan_enabled_) + constexpr size_t EXTRA_ALLOCATION_SPACE = 0; +#endif // defined(_asan_enabled_) + void* p1 = TWithDefaultMiniKQLAlloc::AllocWithSize(10); void* p2 = TWithDefaultMiniKQLAlloc::AllocWithSize(20); UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetUsed(), TAlignedPagePool::POOL_PAGE_SIZE); @@ -38,7 +44,7 @@ Y_UNIT_TEST_SUITE(TMiniKQLAllocTest) { UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetFreePageCount(), 0); TWithDefaultMiniKQLAlloc::FreeWithSize(p1, 10); UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetUsed(), TAlignedPagePool::POOL_PAGE_SIZE); - UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetDeallocatedInPages(), 10); + UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetDeallocatedInPages(), 10 + EXTRA_ALLOCATION_SPACE); UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetFreePageCount(), 0); TWithDefaultMiniKQLAlloc::FreeWithSize(p2, 20); UNIT_ASSERT_VALUES_EQUAL(alloc.Ref().GetUsed(), 0); diff --git a/yql/essentials/minikql/mkql_mem_info.cpp b/yql/essentials/minikql/mkql_mem_info.cpp index a8441d268737..5628531aaa96 100644 --- a/yql/essentials/minikql/mkql_mem_info.cpp +++ b/yql/essentials/minikql/mkql_mem_info.cpp @@ -67,9 +67,8 @@ void TMemoryUsageInfo::Return(const void* mem, ui64 size) { Y_DEBUG_ABORT_UNLESS(it != AllocationsMap_.end(), "Double free at: %p", mem); } - Y_DEBUG_ABORT_UNLESS(size == it->second.Size, - "Deallocating wrong size at: %p, " - "allocated at: %s", mem, (TStringBuilder() << it->second.Location).c_str()); + Y_DEBUG_ABORT_UNLESS(size == it->second.Size, "Deallocating wrong size at: %p, allocated at: %s. Actual size: %zu, but expected size is: %zu", + mem, (TStringBuilder() << it->second.Location).c_str(), size, it->second.Size); if (AllowMissing_) { it->second.IsDeleted = true; } else { @@ -141,4 +140,4 @@ void TMemoryUsageInfo::VerifyDebug() const { } } -} \ No newline at end of file +} diff --git a/yql/essentials/minikql/mkql_string_util.cpp b/yql/essentials/minikql/mkql_string_util.cpp index 78adacc231d1..e9604c069240 100644 --- a/yql/essentials/minikql/mkql_string_util.cpp +++ b/yql/essentials/minikql/mkql_string_util.cpp @@ -1,5 +1,7 @@ #include "mkql_string_util.h" +#include + namespace NKikimr { namespace NMiniKQL { @@ -31,19 +33,26 @@ NUdf::TUnboxedValuePod AppendString(const NUdf::TUnboxedValuePod value, const NU return result; } else { if (value.IsString()) { - auto str = value.AsStringValue(); - const ui32 offset = ref.Data() - str.Data(); - if (str.Size() == valueRef.Size() + offset) { - if (str.TryExpandOn(ref.Size())) { - std::memcpy(str.Data() + offset + valueRef.Size(), ref.Data(), ref.Size()); - return NUdf::TUnboxedValuePod(std::move(str), newSize, offset); + auto str = value.AsRawStringValue(); + const char* strData = str->Data(); + const char* refData = ref.Data(); + // Check if ref.Data() is within the memory range of str + if (refData >= strData && refData < strData + str->Size()) { + const ui32 offset = refData - strData; + if (str->Size() == valueRef.Size() + offset) { + if (str->TryExpandOn(ref.Size())) { + std::memcpy(str->Data() + offset + valueRef.Size(), ref.Data(), ref.Size()); + return NUdf::TUnboxedValuePod(NYql::NUdf::TStringValue(std::move(str)), newSize, offset); + } } } } - auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); NUdf::TStringValue str(data); - data->UnRef(); + Y_DEFER { + data->ReleaseRef(); + value.DeleteUnreferenced(); + }; std::memcpy(str.Data(), valueRef.Data(), valueRef.Size()); std::memcpy(str.Data() + valueRef.Size(), ref.Data(), ref.Size()); return NUdf::TUnboxedValuePod(std::move(str)); @@ -69,10 +78,12 @@ NUdf::TUnboxedValuePod PrependString(const NUdf::TStringRef ref, const NUdf::TUn } else { auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); NUdf::TStringValue str(data); - data->UnRef(); + Y_DEFER { + data->ReleaseRef(); + value.DeleteUnreferenced(); + }; std::memcpy(str.Data(), ref.Data(), ref.Size()); std::memcpy(str.Data() + ref.Size(), valueRef.Data(), valueRef.Size()); - value.DeleteUnreferenced(); return NUdf::TUnboxedValuePod(std::move(str)); } } @@ -96,23 +107,31 @@ NUdf::TUnboxedValuePod ConcatStrings(const NUdf::TUnboxedValuePod first, const N return result; } else { if (first.IsString()) { - auto str = first.AsStringValue(); - const ui32 offset = leftRef.Data() - str.Data(); - if (str.Size() == leftRef.Size() + offset) { - if (str.TryExpandOn(rightRef.Size())) { - std::memcpy(str.Data() + offset + leftRef.Size(), rightRef.Data(), rightRef.Size()); - second.DeleteUnreferenced(); - return NUdf::TUnboxedValuePod(std::move(str), newSize, offset); + auto str = first.AsRawStringValue(); + const char* strData = str->Data(); + const char* leftRefData = leftRef.Data(); + // Check if leftRef.Data() is within the memory range of str + if (leftRefData >= strData && leftRefData < strData + str->Size()) { + const ui32 offset = leftRefData - strData; + if (str->Size() == leftRef.Size() + offset) { + if (str->TryExpandOn(rightRef.Size())) { + std::memcpy(str->Data() + offset + leftRef.Size(), rightRef.Data(), rightRef.Size()); + second.DeleteUnreferenced(); + return NUdf::TUnboxedValuePod(NUdf::TStringValue(str), newSize, offset); + } } } } auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2); NUdf::TStringValue str(data); - data->UnRef(); + Y_DEFER { + data->ReleaseRef(); + second.DeleteUnreferenced(); + first.DeleteUnreferenced(); + }; std::memcpy(str.Data(), leftRef.Data(), leftRef.Size()); std::memcpy(str.Data() + leftRef.Size(), rightRef.Data(), rightRef.Size()); - second.DeleteUnreferenced(); return NUdf::TUnboxedValuePod(std::move(str)); } } @@ -134,13 +153,22 @@ NUdf::TUnboxedValuePod SubString(const NUdf::TUnboxedValuePod value, ui32 offset value.DeleteUnreferenced(); return result; } else { - auto old = value.AsStringValue(); - if (const auto newOffset = ui32(ref.Data() - old.Data()) + offset; NUdf::TUnboxedValuePod::OffsetLimit > newOffset) - return NUdf::TUnboxedValuePod(std::move(old), newSize, newOffset); + auto old = value.AsRawStringValue(); + const char* oldData = old->Data(); + const char* refData = ref.Data(); + // Check if ref.Data() is within the memory range of old + if (refData >= oldData && refData < oldData + old->Size()) { + if (const auto newOffset = ui32(refData - oldData) + offset; NUdf::TUnboxedValuePod::OffsetLimit > newOffset) { + return NUdf::TUnboxedValuePod(NUdf::TStringValue(old), newSize, newOffset); + } + } auto data = NUdf::TStringValue::AllocateData(newSize, newSize + (newSize >> 1U)); NUdf::TStringValue str(data); - data->UnRef(); + Y_DEFER { + data->ReleaseRef(); + value.DeleteUnreferenced(); + }; std::memcpy(str.Data(), ref.Data() + offset, newSize); return NUdf::TUnboxedValuePod(std::move(str)); } diff --git a/yql/essentials/minikql/ya.make b/yql/essentials/minikql/ya.make index 4c09b76c66f8..fe830919d027 100644 --- a/yql/essentials/minikql/ya.make +++ b/yql/essentials/minikql/ya.make @@ -108,4 +108,5 @@ RECURSE( RECURSE_FOR_TESTS( benchmark ut + gtest_ut ) diff --git a/yql/essentials/parser/pg_wrapper/comp_factory.cpp b/yql/essentials/parser/pg_wrapper/comp_factory.cpp index 8c833132bafa..c992daf84756 100644 --- a/yql/essentials/parser/pg_wrapper/comp_factory.cpp +++ b/yql/essentials/parser/pg_wrapper/comp_factory.cpp @@ -107,10 +107,8 @@ extern void MkqlDelete(MemoryContext context); extern MemoryContext MkqlGetChunkContext(void *pointer); extern Size MkqlGetChunkSpace(void *pointer); extern bool MkqlIsEmpty(MemoryContext context); -extern void MkqlStats(MemoryContext context, - MemoryStatsPrintFunc printfunc, void *passthru, - MemoryContextCounters *totals, - bool print_to_stderr); +extern void MkqlStats(MemoryContext context, MemoryStatsPrintFunc printfunc, void* passthru, MemoryContextCounters* totals, + bool print_to_stderr); #ifdef MEMORY_CONTEXT_CHECKING extern void MkqlCheck(MemoryContext context); #endif @@ -1337,6 +1335,11 @@ class TPgResolvedCall : public TPgResolvedCallBase> NUdf::TUnboxedValuePod res; if constexpr (!UseContext) { TPAllocScope call; + Y_DEFER { + // This ensures that there is no dangling pointers references to freed + // |TPAllocScope| pages that can be allocated and stored inside |callInfo.flinfo->fn_extra|. + callInfo.flinfo->fn_extra = nullptr; + }; res = this->DoCall(callInfo); } else { res = this->DoCall(callInfo); diff --git a/yql/essentials/public/udf/udf_string.h b/yql/essentials/public/udf/udf_string.h index 72b2c3476225..96e97a71b517 100644 --- a/yql/essentials/public/udf/udf_string.h +++ b/yql/essentials/public/udf/udf_string.h @@ -94,6 +94,20 @@ class TStringValue Refs_ = prev; } + bool TryExpandOn(ui32 len) { + if (RefCount() < 0) { + return false; + } + + const auto size = Size_ + len; + if (Capacity_ < size) { + return false; + } + + Size_ = size; + return true; + } + private: ui32 Size_; i32 Refs_; @@ -169,12 +183,7 @@ class TStringValue if (RefCount() < 0) return false; - const auto size = Data_->Size_ + len; - if (Data_->Capacity_ < size) - return false; - - Data_->Size_ = size; - return true; + return Data_->TryExpandOn(len); } inline i32 LockRef() noexcept { diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index c159f63d5e56..35f0af4150f4 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -3183,14 +3183,37 @@ class TShift : public TBoxedValue { } SetYear(result, year); } else { - static constexpr size_t size = 6; i64 year = 0LL; i64 negative = 1LL; + if (limit == 0) { + // Year must take at least 1 byte. + return false; + } if (*it == '-') { negative = -1LL; it++; + limit--; } - if (!ParseNDigits::Do(it, year) || !ValidateYear(negative * year)) { + auto parseDigits = [&]() { + switch (limit) { + case 0: + // Year number must take at least 1 byte. + return false; + case 1: + return ParseNDigits<1, true>::Do(it, year); + case 2: + return ParseNDigits<2, true>::Do(it, year); + case 3: + return ParseNDigits<3, true>::Do(it, year); + case 4: + return ParseNDigits<4, true>::Do(it, year); + case 5: + return ParseNDigits<5, true>::Do(it, year); + default: + return ParseNDigits<6, true>::Do(it, year); + } + }; + if (!parseDigits() || !ValidateYear(negative * year)) { return false; } SetYear(result, negative * year); diff --git a/yt/yql/providers/yt/codec/yt_codec.cpp b/yt/yql/providers/yt/codec/yt_codec.cpp index 14e317df97fb..e414aa0f6d19 100644 --- a/yt/yql/providers/yt/codec/yt_codec.cpp +++ b/yt/yql/providers/yt/codec/yt_codec.cpp @@ -1791,7 +1791,7 @@ class TTempBlockWriter : public NCommon::IBlockWriter { void operator=(const TTempBlockWriter&) = delete; std::pair NextEmptyBlock() override { - auto newPage = Pool_.GetPage(); + auto newPage = SanitizerMarkValid(Pool_.GetPage(), TAlignedPagePool::POOL_PAGE_SIZE); auto header = (TPageHeader*)newPage; header->Avail_ = 0; header->Next_ = &Dummy_; From a8066d88a9c0c0c5050c38f6a902b27d64ccf6d3 Mon Sep 17 00:00:00 2001 From: babenko Date: Tue, 6 May 2025 15:21:47 +0300 Subject: [PATCH 06/24] Read cluster name from "just cache" commit_hash:3c75cdc79519b4d360385ac2b57111ddf07780ed --- yt/yt/client/api/client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/yt/client/api/client.cpp b/yt/yt/client/api/client.cpp index b7e4e876d1bd..29c9f9fe8e16 100644 --- a/yt/yt/client/api/client.cpp +++ b/yt/yt/client/api/client.cpp @@ -51,7 +51,7 @@ TFuture> TClusterAwareClientBase::GetClusterName(bool TFuture> TClusterAwareClientBase::FetchClusterNameFromMasterCache() { TGetNodeOptions options; - options.ReadFrom = EMasterChannelKind::MasterCache; + options.ReadFrom = EMasterChannelKind::Cache; return GetNode(ClusterNamePath, options).Apply( BIND([] (const TErrorOr& clusterNameYsonOrError) -> std::optional { From c131e959456f9f9a4adada5623ce3bae4097a8c1 Mon Sep 17 00:00:00 2001 From: babenko Date: Tue, 6 May 2025 15:27:22 +0300 Subject: [PATCH 07/24] Avoid implicit cast to bool commit_hash:164cdc4f4c977cf8276af152ce84992f8e7dc81b --- yt/yt/core/threading/thread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/yt/core/threading/thread.cpp b/yt/yt/core/threading/thread.cpp index 389cd88237f7..d69a4dad4d68 100644 --- a/yt/yt/core/threading/thread.cpp +++ b/yt/yt/core/threading/thread.cpp @@ -214,7 +214,7 @@ YT_PREVENT_TLS_CACHING void TThread::ThreadMainTrampoline() public: ~TExitInterceptor() { - if (Armed_ && !std::uncaught_exceptions()) { + if (Armed_ && std::uncaught_exceptions() == 0) { if (auto* logFile = TryGetShutdownLogFile()) { ::fprintf(logFile, "%s\tThread exit interceptor triggered (ThreadId: %" PRISZT ")\n", GetInstant().ToString().c_str(), From 9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28 Mon Sep 17 00:00:00 2001 From: vityaman Date: Tue, 6 May 2025 15:49:02 +0300 Subject: [PATCH 08/24] YQL-19747 Complete folder, table and cluster names --- - Related to `YQL-19747` - On top of https://github.com/ytsaurus/ytsaurus/pull/1253 - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/14 - Related to https://github.com/vityaman/ydb/issues/35 - Related to https://github.com/vityaman/ydb/issues/40 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1257 commit_hash:0b842abb27184c88b8177beeea29fb1ea86b7a04 --- yql/essentials/sql/v1/complete/antlr4/c3t.h | 40 +- yql/essentials/sql/v1/complete/core/input.cpp | 25 ++ yql/essentials/sql/v1/complete/core/input.h | 2 + yql/essentials/sql/v1/complete/core/name.h | 10 + yql/essentials/sql/v1/complete/core/ya.make | 4 + .../sql/v1/complete/name/cluster/discovery.h | 19 + .../name/cluster/static/discovery.cpp | 28 ++ .../complete/name/cluster/static/discovery.h | 9 + .../v1/complete/name/cluster/static/ya.make | 11 + .../sql/v1/complete/name/cluster/ya.make | 11 + .../complete/name/object/dispatch/schema.cpp | 36 ++ .../v1/complete/name/object/dispatch/schema.h | 9 + .../v1/complete/name/object/dispatch/ya.make | 11 + .../object/{schema_gateway.cpp => schema.cpp} | 2 +- .../object/{schema_gateway.h => schema.h} | 6 +- .../simple/{schema_gateway.cpp => schema.cpp} | 20 +- .../simple/{schema_gateway.h => schema.h} | 10 +- .../schema_ut.cpp} | 51 +-- .../static/schema.cpp} | 16 +- .../name/object/simple/static/schema.h | 10 + .../name/object/simple/static/ya.make | 11 + .../v1/complete/name/object/simple/ut/ya.make | 11 + .../v1/complete/name/object/simple/ya.make | 10 +- .../name/object/static/schema_gateway.h | 9 - .../v1/complete/name/object/static/ut/ya.make | 7 - .../v1/complete/name/object/static/ya.make | 16 - .../sql/v1/complete/name/object/ya.make | 4 +- .../name/service/cluster/name_service.cpp | 82 ++++ .../name/service/cluster/name_service.h | 10 + .../v1/complete/name/service/cluster/ya.make | 12 + .../v1/complete/name/service/name_service.cpp | 4 + .../v1/complete/name/service/name_service.h | 46 ++- .../complete/name/service/ranking/ranking.cpp | 14 +- .../name/service/schema/name_service.cpp | 100 +++++ .../name/service/schema/name_service.h | 10 + .../v1/complete/name/service/schema/ya.make | 12 + .../name/service/union/name_service.cpp | 9 + .../sql/v1/complete/name/service/ya.make | 2 + yql/essentials/sql/v1/complete/name/ya.make | 1 + .../sql/v1/complete/sql_complete.cpp | 178 ++++++--- yql/essentials/sql/v1/complete/sql_complete.h | 4 + .../sql/v1/complete/sql_complete_ut.cpp | 369 ++++++++++++++++-- .../complete/syntax/cursor_token_context.cpp | 160 ++++++++ .../v1/complete/syntax/cursor_token_context.h | 50 +++ .../syntax/cursor_token_context_ut.cpp | 50 +++ .../sql/v1/complete/syntax/format.cpp | 13 + .../sql/v1/complete/syntax/format.h | 2 + .../sql/v1/complete/syntax/grammar.cpp | 26 +- .../sql/v1/complete/syntax/local.cpp | 206 ++++++---- yql/essentials/sql/v1/complete/syntax/local.h | 24 +- .../v1/complete/syntax/parser_call_stack.cpp | 47 +-- .../v1/complete/syntax/parser_call_stack.h | 6 + .../sql/v1/complete/syntax/token.cpp | 60 --- yql/essentials/sql/v1/complete/syntax/token.h | 24 -- .../sql/v1/complete/syntax/ut/ya.make | 5 + yql/essentials/sql/v1/complete/syntax/ya.make | 4 +- yql/essentials/sql/v1/complete/ut/ya.make | 7 + .../tools/yql_complete/yql_complete | 1 + 58 files changed, 1541 insertions(+), 395 deletions(-) create mode 100644 yql/essentials/sql/v1/complete/core/input.cpp create mode 100644 yql/essentials/sql/v1/complete/core/name.h create mode 100644 yql/essentials/sql/v1/complete/name/cluster/discovery.h create mode 100644 yql/essentials/sql/v1/complete/name/cluster/static/discovery.cpp create mode 100644 yql/essentials/sql/v1/complete/name/cluster/static/discovery.h create mode 100644 yql/essentials/sql/v1/complete/name/cluster/static/ya.make create mode 100644 yql/essentials/sql/v1/complete/name/cluster/ya.make create mode 100644 yql/essentials/sql/v1/complete/name/object/dispatch/schema.cpp create mode 100644 yql/essentials/sql/v1/complete/name/object/dispatch/schema.h create mode 100644 yql/essentials/sql/v1/complete/name/object/dispatch/ya.make rename yql/essentials/sql/v1/complete/name/object/{schema_gateway.cpp => schema.cpp} (86%) rename yql/essentials/sql/v1/complete/name/object/{schema_gateway.h => schema.h} (89%) rename yql/essentials/sql/v1/complete/name/object/simple/{schema_gateway.cpp => schema.cpp} (81%) rename yql/essentials/sql/v1/complete/name/object/simple/{schema_gateway.h => schema.h} (51%) rename yql/essentials/sql/v1/complete/name/object/{static/schema_gateway_ut.cpp => simple/schema_ut.cpp} (58%) rename yql/essentials/sql/v1/complete/name/object/{static/schema_gateway.cpp => simple/static/schema.cpp} (69%) create mode 100644 yql/essentials/sql/v1/complete/name/object/simple/static/schema.h create mode 100644 yql/essentials/sql/v1/complete/name/object/simple/static/ya.make create mode 100644 yql/essentials/sql/v1/complete/name/object/simple/ut/ya.make delete mode 100644 yql/essentials/sql/v1/complete/name/object/static/schema_gateway.h delete mode 100644 yql/essentials/sql/v1/complete/name/object/static/ut/ya.make delete mode 100644 yql/essentials/sql/v1/complete/name/object/static/ya.make create mode 100644 yql/essentials/sql/v1/complete/name/service/cluster/name_service.cpp create mode 100644 yql/essentials/sql/v1/complete/name/service/cluster/name_service.h create mode 100644 yql/essentials/sql/v1/complete/name/service/cluster/ya.make create mode 100644 yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp create mode 100644 yql/essentials/sql/v1/complete/name/service/schema/name_service.h create mode 100644 yql/essentials/sql/v1/complete/name/service/schema/ya.make create mode 100644 yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp create mode 100644 yql/essentials/sql/v1/complete/syntax/cursor_token_context.h create mode 100644 yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp delete mode 100644 yql/essentials/sql/v1/complete/syntax/token.cpp delete mode 100644 yql/essentials/sql/v1/complete/syntax/token.h create mode 100644 yql/essentials/tools/yql_complete/yql_complete diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h index 35b1f714fa71..aca5ebf92e50 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3t.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h @@ -27,37 +27,37 @@ namespace NSQLComplete { class TC3Engine: public IC3Engine { public: explicit TC3Engine(TConfig config) - : Chars() - , Lexer(&Chars) - , Tokens(&Lexer) - , Parser(&Tokens) - , CompletionCore(&Parser) + : Chars_() + , Lexer_(&Chars_) + , Tokens_(&Lexer_) + , Parser_(&Tokens_) + , CompletionCore_(&Parser_) { - Lexer.removeErrorListeners(); - Parser.removeErrorListeners(); + Lexer_.removeErrorListeners(); + Parser_.removeErrorListeners(); - CompletionCore.ignoredTokens = std::move(config.IgnoredTokens); - CompletionCore.preferredRules = std::move(config.PreferredRules); + CompletionCore_.ignoredTokens = std::move(config.IgnoredTokens); + CompletionCore_.preferredRules = std::move(config.PreferredRules); } TC3Candidates Complete(TCompletionInput input) override { auto prefix = input.Text.Head(input.CursorPosition); Assign(prefix); const auto caretTokenIndex = CaretTokenIndex(prefix); - auto candidates = CompletionCore.collectCandidates(caretTokenIndex); + auto candidates = CompletionCore_.collectCandidates(caretTokenIndex); return Converted(std::move(candidates)); } private: void Assign(TStringBuf prefix) { - Chars.load(prefix.Data(), prefix.Size(), /* lenient = */ false); - Lexer.reset(); - Tokens.setTokenSource(&Lexer); - Tokens.fill(); + Chars_.load(prefix.Data(), prefix.Size(), /* lenient = */ false); + Lexer_.reset(); + Tokens_.setTokenSource(&Lexer_); + Tokens_.fill(); } size_t CaretTokenIndex(TStringBuf prefix) { - const auto tokensCount = Tokens.size(); + const auto tokensCount = Tokens_.size(); if (2 <= tokensCount && !LastWord(prefix).Empty()) { return tokensCount - 2; } @@ -76,11 +76,11 @@ namespace NSQLComplete { return converted; } - antlr4::ANTLRInputStream Chars; - G::TLexer Lexer; - antlr4::BufferedTokenStream Tokens; - G::TParser Parser; - c3::CodeCompletionCore CompletionCore; + antlr4::ANTLRInputStream Chars_; + G::TLexer Lexer_; + antlr4::BufferedTokenStream Tokens_; + G::TParser Parser_; + c3::CodeCompletionCore CompletionCore_; }; } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/core/input.cpp b/yql/essentials/sql/v1/complete/core/input.cpp new file mode 100644 index 000000000000..8eca3a28ee8f --- /dev/null +++ b/yql/essentials/sql/v1/complete/core/input.cpp @@ -0,0 +1,25 @@ +#include "input.h" + +#include + +namespace NSQLComplete { + + TCompletionInput SharpedInput(TString& text) { + constexpr char delim = '#'; + + size_t pos = text.find_first_of(delim); + if (pos == TString::npos) { + return { + .Text = text, + }; + } + + Y_ENSURE(!TStringBuf(text).Tail(pos + 1).Contains(delim)); + text.erase(std::begin(text) + pos); + return { + .Text = text, + .CursorPosition = pos, + }; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/core/input.h b/yql/essentials/sql/v1/complete/core/input.h index 3bb609cbb22e..d852736cb44e 100644 --- a/yql/essentials/sql/v1/complete/core/input.h +++ b/yql/essentials/sql/v1/complete/core/input.h @@ -9,4 +9,6 @@ namespace NSQLComplete { size_t CursorPosition = Text.length(); }; + TCompletionInput SharpedInput(TString& text Y_LIFETIME_BOUND); + } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/core/name.h b/yql/essentials/sql/v1/complete/core/name.h new file mode 100644 index 000000000000..02524766de11 --- /dev/null +++ b/yql/essentials/sql/v1/complete/core/name.h @@ -0,0 +1,10 @@ +#pragma once + +namespace NSQLComplete { + + enum class EObjectKind { + Folder, + Table, + }; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/core/ya.make b/yql/essentials/sql/v1/complete/core/ya.make index 9865d255c8f1..8bc457f8f956 100644 --- a/yql/essentials/sql/v1/complete/core/ya.make +++ b/yql/essentials/sql/v1/complete/core/ya.make @@ -1,3 +1,7 @@ LIBRARY() +SRCS( + input.cpp +) + END() diff --git a/yql/essentials/sql/v1/complete/name/cluster/discovery.h b/yql/essentials/sql/v1/complete/name/cluster/discovery.h new file mode 100644 index 000000000000..6b496f155463 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/cluster/discovery.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +#include + +namespace NSQLComplete { + + using TClusterList = TVector; + + class IClusterDiscovery: public TThrRefBase { + public: + using TPtr = TIntrusivePtr; + + virtual ~IClusterDiscovery() = default; + virtual NThreading::TFuture Query() const = 0; + }; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/cluster/static/discovery.cpp b/yql/essentials/sql/v1/complete/name/cluster/static/discovery.cpp new file mode 100644 index 000000000000..7caee64c182f --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/cluster/static/discovery.cpp @@ -0,0 +1,28 @@ +#include "discovery.h" + +namespace NSQLComplete { + + namespace { + + class TClusterDiscovery: public IClusterDiscovery { + public: + explicit TClusterDiscovery(TVector instances) + : ClusterList_(std::move(instances)) + { + } + + NThreading::TFuture Query() const override { + return NThreading::MakeFuture(ClusterList_); + } + + private: + TVector ClusterList_; + }; + + } // namespace + + IClusterDiscovery::TPtr MakeStaticClusterDiscovery(TVector instances) { + return new TClusterDiscovery(std::move(instances)); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/cluster/static/discovery.h b/yql/essentials/sql/v1/complete/name/cluster/static/discovery.h new file mode 100644 index 000000000000..bfad0eed62f3 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/cluster/static/discovery.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NSQLComplete { + + IClusterDiscovery::TPtr MakeStaticClusterDiscovery(TVector instances); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/cluster/static/ya.make b/yql/essentials/sql/v1/complete/name/cluster/static/ya.make new file mode 100644 index 000000000000..567130a2ff0a --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/cluster/static/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + discovery.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/cluster +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/cluster/ya.make b/yql/essentials/sql/v1/complete/name/cluster/ya.make new file mode 100644 index 000000000000..5ea880aeaddf --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/cluster/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +PEERDIR( + library/cpp/threading/future +) + +END() + +RECURSE( + static +) diff --git a/yql/essentials/sql/v1/complete/name/object/dispatch/schema.cpp b/yql/essentials/sql/v1/complete/name/object/dispatch/schema.cpp new file mode 100644 index 000000000000..f6d79b280a0e --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/dispatch/schema.cpp @@ -0,0 +1,36 @@ +#include "schema.h" + +namespace NSQLComplete { + + namespace { + + class TSchema: public ISchema { + public: + explicit TSchema(THashMap mapping) + : Mapping_(std::move(mapping)) + { + } + + NThreading::TFuture List(const TListRequest& request) const override { + auto iter = Mapping_.find(request.Cluster); + if (iter == std::end(Mapping_)) { + yexception e; + e << "unknown cluster '" << request.Cluster << "'"; + std::exception_ptr p = std::make_exception_ptr(e); + return NThreading::MakeErrorFuture(p); + } + + return iter->second->List(request); + } + + private: + THashMap Mapping_; + }; + + } // namespace + + ISchema::TPtr MakeDispatchSchema(THashMap mapping) { + return new TSchema(std::move(mapping)); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/dispatch/schema.h b/yql/essentials/sql/v1/complete/name/object/dispatch/schema.h new file mode 100644 index 000000000000..517a3ad0af78 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/dispatch/schema.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NSQLComplete { + + ISchema::TPtr MakeDispatchSchema(THashMap mapping); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/dispatch/ya.make b/yql/essentials/sql/v1/complete/name/object/dispatch/ya.make new file mode 100644 index 000000000000..071bf5dff7d2 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/dispatch/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + schema.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/object +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/object/schema_gateway.cpp b/yql/essentials/sql/v1/complete/name/object/schema.cpp similarity index 86% rename from yql/essentials/sql/v1/complete/name/object/schema_gateway.cpp rename to yql/essentials/sql/v1/complete/name/object/schema.cpp index c802ddcb7a10..ba9e09b2ee71 100644 --- a/yql/essentials/sql/v1/complete/name/object/schema_gateway.cpp +++ b/yql/essentials/sql/v1/complete/name/object/schema.cpp @@ -1,4 +1,4 @@ -#include "schema_gateway.h" +#include "schema.h" template <> void Out(IOutputStream& out, const NSQLComplete::TFolderEntry& entry) { diff --git a/yql/essentials/sql/v1/complete/name/object/schema_gateway.h b/yql/essentials/sql/v1/complete/name/object/schema.h similarity index 89% rename from yql/essentials/sql/v1/complete/name/object/schema_gateway.h rename to yql/essentials/sql/v1/complete/name/object/schema.h index f9307bf495db..687f92d7e8a5 100644 --- a/yql/essentials/sql/v1/complete/name/object/schema_gateway.h +++ b/yql/essentials/sql/v1/complete/name/object/schema.h @@ -41,11 +41,11 @@ namespace NSQLComplete { TVector Entries; }; - class ISchemaGateway: public TThrRefBase { + class ISchema: public TThrRefBase { public: - using TPtr = TIntrusivePtr; + using TPtr = TIntrusivePtr; - virtual ~ISchemaGateway() = default; + virtual ~ISchema() = default; virtual NThreading::TFuture List(const TListRequest& request) const = 0; }; diff --git a/yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.cpp b/yql/essentials/sql/v1/complete/name/object/simple/schema.cpp similarity index 81% rename from yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.cpp rename to yql/essentials/sql/v1/complete/name/object/simple/schema.cpp index e8e7bf3ccd97..c7b62946f640 100644 --- a/yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.cpp +++ b/yql/essentials/sql/v1/complete/name/object/simple/schema.cpp @@ -1,4 +1,4 @@ -#include "schema_gateway.h" +#include "schema.h" #include @@ -6,7 +6,7 @@ namespace NSQLComplete { namespace { - class TSimpleSchemaGateway: public ISchemaGateway { + class TSimpleSchema: public ISchema { private: static auto FilterByName(TString name) { return [name = std::move(name)](auto f) { @@ -47,14 +47,20 @@ namespace NSQLComplete { } public: - explicit TSimpleSchemaGateway(ISimpleSchemaGateway::TPtr simple) + explicit TSimpleSchema(ISimpleSchema::TPtr simple) : Simple_(std::move(simple)) { } NThreading::TFuture List(const TListRequest& request) const override { auto [path, name] = Simple_->Split(request.Path); - return Simple_->List(TString(path)) + + TString pathStr(path); + if (!pathStr.StartsWith('/')) { + pathStr.prepend('/'); + } + + return Simple_->List(std::move(pathStr)) .Apply(FilterByName(TString(name))) .Apply(FilterByTypes(std::move(request.Filter.Types))) .Apply(Crop(request.Limit)) @@ -62,13 +68,13 @@ namespace NSQLComplete { } private: - ISimpleSchemaGateway::TPtr Simple_; + ISimpleSchema::TPtr Simple_; }; } // namespace - ISchemaGateway::TPtr MakeSimpleSchemaGateway(ISimpleSchemaGateway::TPtr simple) { - return ISchemaGateway::TPtr(new TSimpleSchemaGateway(std::move(simple))); + ISchema::TPtr MakeSimpleSchema(ISimpleSchema::TPtr simple) { + return ISchema::TPtr(new TSimpleSchema(std::move(simple))); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.h b/yql/essentials/sql/v1/complete/name/object/simple/schema.h similarity index 51% rename from yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.h rename to yql/essentials/sql/v1/complete/name/object/simple/schema.h index 4b4671f1ccac..67def573a73b 100644 --- a/yql/essentials/sql/v1/complete/name/object/simple/schema_gateway.h +++ b/yql/essentials/sql/v1/complete/name/object/simple/schema.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace NSQLComplete { @@ -9,15 +9,15 @@ namespace NSQLComplete { TStringBuf NameHint; }; - class ISimpleSchemaGateway: public TThrRefBase { + class ISimpleSchema: public TThrRefBase { public: - using TPtr = TIntrusivePtr; + using TPtr = TIntrusivePtr; - virtual ~ISimpleSchemaGateway() = default; + virtual ~ISimpleSchema() = default; virtual TSplittedPath Split(TStringBuf path) const = 0; virtual NThreading::TFuture> List(TString folder) const = 0; }; - ISchemaGateway::TPtr MakeSimpleSchemaGateway(ISimpleSchemaGateway::TPtr simple); + ISchema::TPtr MakeSimpleSchema(ISimpleSchema::TPtr simple); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway_ut.cpp b/yql/essentials/sql/v1/complete/name/object/simple/schema_ut.cpp similarity index 58% rename from yql/essentials/sql/v1/complete/name/object/static/schema_gateway_ut.cpp rename to yql/essentials/sql/v1/complete/name/object/simple/schema_ut.cpp index 86c8118f197e..954ecc4da756 100644 --- a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway_ut.cpp +++ b/yql/essentials/sql/v1/complete/name/object/simple/schema_ut.cpp @@ -1,12 +1,14 @@ -#include "schema_gateway.h" +#include "schema.h" + +#include #include using namespace NSQLComplete; -Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { +Y_UNIT_TEST_SUITE(StaticSchemaTests) { - ISchemaGateway::TPtr MakeStaticSchemaGatewayUT() { + ISchema::TPtr MakeStaticSchemaUT() { THashMap> fs = { {"/", {{"Folder", "local"}, {"Folder", "test"}, @@ -18,18 +20,19 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { {"Table", "meta"}}}, {"/test/service/", {{"Table", "example"}}}, }; - return MakeStaticSchemaGateway(std::move(fs)); + return MakeSimpleSchema( + MakeStaticSimpleSchema(std::move(fs))); } Y_UNIT_TEST(ListFolderBasic) { - auto gateway = MakeStaticSchemaGatewayUT(); + auto schema = MakeStaticSchemaUT(); { TVector expected = { {"Folder", "local"}, {"Folder", "test"}, {"Folder", "prod"}, }; - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/"}).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/"}).GetValueSync().Entries, expected); } { TVector expected = { @@ -37,29 +40,29 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { {"Table", "account"}, {"Table", "abacaba"}, }; - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/local/"}).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/local/"}).GetValueSync().Entries, expected); } { TVector expected = { {"Folder", "service"}, {"Table", "meta"}, }; - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/test/"}).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/test/"}).GetValueSync().Entries, expected); } { TVector expected = { {"Table", "example"}}; - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/test/service/"}).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/test/service/"}).GetValueSync().Entries, expected); } } Y_UNIT_TEST(ListFolderHint) { - auto gateway = MakeStaticSchemaGatewayUT(); + auto schema = MakeStaticSchemaUT(); { TVector expected = { {"Folder", "local"}, }; - auto actual = gateway->List({.Path = "/l"}).GetValueSync(); + auto actual = schema->List({.Path = "/l"}).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(actual.Entries, expected); UNIT_ASSERT_VALUES_EQUAL(actual.NameHintLength, 1); } @@ -68,7 +71,7 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { {"Table", "account"}, {"Table", "abacaba"}, }; - auto actual = gateway->List({.Path = "/local/a"}).GetValueSync(); + auto actual = schema->List({.Path = "/local/a"}).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(actual.Entries, expected); UNIT_ASSERT_VALUES_EQUAL(actual.NameHintLength, 1); } @@ -76,14 +79,14 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { TVector expected = { {"Folder", "service"}, }; - auto actual = gateway->List({.Path = "/test/service"}).GetValueSync(); + auto actual = schema->List({.Path = "/test/service"}).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(actual.Entries, expected); UNIT_ASSERT_VALUES_EQUAL(actual.NameHintLength, 7); } } Y_UNIT_TEST(ListFolderFilterByType) { - auto gateway = MakeStaticSchemaGatewayUT(); + auto schema = MakeStaticSchemaUT(); { TVector expected = { {"Folder", "service"}, @@ -94,7 +97,7 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { .Types = THashSet{"Folder"}, }, }; - UNIT_ASSERT_VALUES_EQUAL(gateway->List(request).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List(request).GetValueSync().Entries, expected); } { TVector expected = { @@ -106,18 +109,18 @@ Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) { .Types = THashSet{"Table"}, }, }; - UNIT_ASSERT_VALUES_EQUAL(gateway->List(request).GetValueSync().Entries, expected); + UNIT_ASSERT_VALUES_EQUAL(schema->List(request).GetValueSync().Entries, expected); } } Y_UNIT_TEST(ListFolderLimit) { - auto gateway = MakeStaticSchemaGatewayUT(); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 0}).GetValueSync().Entries.size(), 0); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 1}).GetValueSync().Entries.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 2}).GetValueSync().Entries.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 3}).GetValueSync().Entries.size(), 3); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 4}).GetValueSync().Entries.size(), 3); - UNIT_ASSERT_VALUES_EQUAL(gateway->List({.Path = "/", .Limit = 5}).GetValueSync().Entries.size(), 3); + auto schema = MakeStaticSchemaUT(); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 0}).GetValueSync().Entries.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 1}).GetValueSync().Entries.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 2}).GetValueSync().Entries.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 3}).GetValueSync().Entries.size(), 3); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 4}).GetValueSync().Entries.size(), 3); + UNIT_ASSERT_VALUES_EQUAL(schema->List({.Path = "/", .Limit = 5}).GetValueSync().Entries.size(), 3); } -} // Y_UNIT_TEST_SUITE(StaticSchemaGatewayTests) +} // Y_UNIT_TEST_SUITE(StaticSchemaTests) diff --git a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway.cpp b/yql/essentials/sql/v1/complete/name/object/simple/static/schema.cpp similarity index 69% rename from yql/essentials/sql/v1/complete/name/object/static/schema_gateway.cpp rename to yql/essentials/sql/v1/complete/name/object/simple/static/schema.cpp index f43af57c752a..0af3ff0ef96b 100644 --- a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway.cpp +++ b/yql/essentials/sql/v1/complete/name/object/simple/static/schema.cpp @@ -1,16 +1,12 @@ -#include "schema_gateway.h" - -#include - -#include +#include "schema.h" namespace NSQLComplete { namespace { - class TSimpleSchemaGateway: public ISimpleSchemaGateway { + class TSimpleSchema: public ISimpleSchema { public: - explicit TSimpleSchemaGateway(THashMap> data) + explicit TSimpleSchema(THashMap> data) : Data_(std::move(data)) { for (const auto& [k, _] : Data_) { @@ -44,10 +40,8 @@ namespace NSQLComplete { } // namespace - ISchemaGateway::TPtr MakeStaticSchemaGateway(THashMap> fs) { - return MakeSimpleSchemaGateway( - ISimpleSchemaGateway::TPtr( - new TSimpleSchemaGateway(std::move(fs)))); + ISimpleSchema::TPtr MakeStaticSimpleSchema(THashMap> fs) { + return new TSimpleSchema(std::move(fs)); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/simple/static/schema.h b/yql/essentials/sql/v1/complete/name/object/simple/static/schema.h new file mode 100644 index 000000000000..f04c89f0b23b --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/simple/static/schema.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace NSQLComplete { + + ISimpleSchema::TPtr MakeStaticSimpleSchema( + THashMap> fs); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/simple/static/ya.make b/yql/essentials/sql/v1/complete/name/object/simple/static/ya.make new file mode 100644 index 000000000000..8e7918ed0115 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/simple/static/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + schema.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/object/simple +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/object/simple/ut/ya.make b/yql/essentials/sql/v1/complete/name/object/simple/ut/ya.make new file mode 100644 index 000000000000..048dc38d7d85 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/object/simple/ut/ya.make @@ -0,0 +1,11 @@ +UNITTEST_FOR(yql/essentials/sql/v1/complete/name/object/simple) + +SRCS( + schema_ut.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/object/simple/static +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/object/simple/ya.make b/yql/essentials/sql/v1/complete/name/object/simple/ya.make index d3668fdb1fcb..56eafc1b848a 100644 --- a/yql/essentials/sql/v1/complete/name/object/simple/ya.make +++ b/yql/essentials/sql/v1/complete/name/object/simple/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - schema_gateway.cpp + schema.cpp ) PEERDIR( @@ -9,3 +9,11 @@ PEERDIR( ) END() + +RECURSE( + static +) + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway.h b/yql/essentials/sql/v1/complete/name/object/static/schema_gateway.h deleted file mode 100644 index fd23a956d252..000000000000 --- a/yql/essentials/sql/v1/complete/name/object/static/schema_gateway.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include - -namespace NSQLComplete { - - ISchemaGateway::TPtr MakeStaticSchemaGateway(THashMap> fs); - -} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/object/static/ut/ya.make b/yql/essentials/sql/v1/complete/name/object/static/ut/ya.make deleted file mode 100644 index 877f015d806f..000000000000 --- a/yql/essentials/sql/v1/complete/name/object/static/ut/ya.make +++ /dev/null @@ -1,7 +0,0 @@ -UNITTEST_FOR(yql/essentials/sql/v1/complete/name/object/static) - -SRCS( - schema_gateway_ut.cpp -) - -END() diff --git a/yql/essentials/sql/v1/complete/name/object/static/ya.make b/yql/essentials/sql/v1/complete/name/object/static/ya.make deleted file mode 100644 index d37495f0700d..000000000000 --- a/yql/essentials/sql/v1/complete/name/object/static/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -LIBRARY() - -SRCS( - schema_gateway.cpp -) - -PEERDIR( - yql/essentials/sql/v1/complete/name/object - yql/essentials/sql/v1/complete/name/object/simple -) - -END() - -RECURSE_FOR_TESTS( - ut -) diff --git a/yql/essentials/sql/v1/complete/name/object/ya.make b/yql/essentials/sql/v1/complete/name/object/ya.make index 483f11c9a59f..2561c0182927 100644 --- a/yql/essentials/sql/v1/complete/name/object/ya.make +++ b/yql/essentials/sql/v1/complete/name/object/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - schema_gateway.cpp + schema.cpp ) PEERDIR( @@ -11,6 +11,6 @@ PEERDIR( END() RECURSE( + dispatch simple - static ) diff --git a/yql/essentials/sql/v1/complete/name/service/cluster/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/cluster/name_service.cpp new file mode 100644 index 000000000000..db0ba00b6673 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/cluster/name_service.cpp @@ -0,0 +1,82 @@ +#include "name_service.h" + +#include + +namespace NSQLComplete { + + namespace { + + class TNameService: public INameService { + private: + static auto FilterByName(TString name) { + return [name = std::move(name)](auto f) { + TClusterList clusters = f.ExtractValue(); + EraseIf(clusters, [prefix = ToLowerUTF8(name)](const TString& instance) { + return !instance.StartsWith(prefix); + }); + return clusters; + }; + } + + static auto Crop(size_t limit) { + return [limit](auto f) { + TClusterList clusters = f.ExtractValue(); + clusters.crop(limit); + return clusters; + }; + } + + static auto ToResponse(TNameConstraints constraints) { + return [constraints = std::move(constraints)](auto f) { + TClusterList clusters = f.ExtractValue(); + + TNameResponse response; + response.RankedNames.reserve(clusters.size()); + + for (auto& cluster : clusters) { + TClusterName name; + name.Indentifier = std::move(cluster); + response.RankedNames.emplace_back(std::move(name)); + } + + response.RankedNames = constraints.Unqualified(std::move(response.RankedNames)); + return response; + }; + } + + public: + explicit TNameService(IClusterDiscovery::TPtr discovery) + : Discovery_(std::move(discovery)) + { + } + + NThreading::TFuture Lookup(TNameRequest request) const override { + if (!request.Constraints.Cluster) { + return NThreading::MakeFuture({}); + } + + return Discovery_->Query() + .Apply(FilterByName(QualifiedClusterName(request))) + .Apply(Crop(request.Limit)) + .Apply(ToResponse(request.Constraints)); + } + + private: + static TString QualifiedClusterName(const TNameRequest& request) { + TClusterName cluster; + cluster.Indentifier = request.Prefix; + + TGenericName generic = request.Constraints.Qualified(cluster); + return std::get(std::move(generic)).Indentifier; + } + + IClusterDiscovery::TPtr Discovery_; + }; + + } // namespace + + INameService::TPtr MakeClusterNameService(IClusterDiscovery::TPtr discovery) { + return new TNameService(std::move(discovery)); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/cluster/name_service.h b/yql/essentials/sql/v1/complete/name/service/cluster/name_service.h new file mode 100644 index 000000000000..a57eabc0d2b7 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/cluster/name_service.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +namespace NSQLComplete { + + INameService::TPtr MakeClusterNameService(IClusterDiscovery::TPtr discovery); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/cluster/ya.make b/yql/essentials/sql/v1/complete/name/service/cluster/ya.make new file mode 100644 index 000000000000..4849690a6f25 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/cluster/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + name_service.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/cluster + yql/essentials/sql/v1/complete/name/service +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/service/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/name_service.cpp index 88473a60bff4..92d8b50e98f7 100644 --- a/yql/essentials/sql/v1/complete/name/service/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/service/name_service.cpp @@ -34,6 +34,8 @@ namespace NSQLComplete { SetPrefix(name.Indentifier, ".", *Pragma); } else if constexpr (std::is_same_v) { SetPrefix(name.Indentifier, "::", *Function); + } else if constexpr (std::is_same_v) { + SetPrefix(name.Indentifier, ":", *Cluster); } return name; }, std::move(unqualified)); @@ -46,6 +48,8 @@ namespace NSQLComplete { FixPrefix(name.Indentifier, ".", *Pragma); } else if constexpr (std::is_same_v) { FixPrefix(name.Indentifier, "::", *Function); + } else if constexpr (std::is_same_v) { + FixPrefix(name.Indentifier, ":", *Cluster); } return name; }, std::move(qualified)); diff --git a/yql/essentials/sql/v1/complete/name/service/name_service.h b/yql/essentials/sql/v1/complete/name/service/name_service.h index 7d773582b614..cdcfc3a49829 100644 --- a/yql/essentials/sql/v1/complete/name/service/name_service.h +++ b/yql/essentials/sql/v1/complete/name/service/name_service.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,11 +8,10 @@ #include #include #include +#include namespace NSQLComplete { - using NThreading::TFuture; // TODO(YQL-19747): remove - struct TIndentifier { TString Indentifier; }; @@ -29,7 +29,7 @@ namespace NSQLComplete { }; struct TTypeName: TIndentifier { - using TConstraints = std::monostate; + struct TConstraints {}; }; struct TFunctionName: TIndentifier { @@ -42,18 +42,45 @@ namespace NSQLComplete { }; }; + struct TObjectNameConstraints { + TString Provider; + TString Cluster; + THashSet Kinds; + }; + + struct TFolderName: TIndentifier { + }; + + struct TTableName: TIndentifier { + }; + + struct TClusterName: TIndentifier { + struct TConstraints: TNamespaced {}; + }; + + struct TUnkownName { + TString Content; + TString Type; + }; + using TGenericName = std::variant< TKeyword, TPragmaName, TTypeName, TFunctionName, - THintName>; + THintName, + TFolderName, + TTableName, + TClusterName, + TUnkownName>; struct TNameConstraints { TMaybe Pragma; TMaybe Type; TMaybe Function; TMaybe Hint; + TMaybe Object; + TMaybe Cluster; TGenericName Qualified(TGenericName unqualified) const; TGenericName Unqualified(TGenericName qualified) const; @@ -72,19 +99,26 @@ namespace NSQLComplete { !Constraints.Pragma && !Constraints.Type && !Constraints.Function && - !Constraints.Hint; + !Constraints.Hint && + !Constraints.Object && + !Constraints.Cluster; } }; struct TNameResponse { TVector RankedNames; + TMaybe NameHintLength; + + bool IsEmpty() const { + return RankedNames.empty(); + } }; class INameService: public TThrRefBase { public: using TPtr = TIntrusivePtr; - virtual TFuture Lookup(TNameRequest request) const = 0; + virtual NThreading::TFuture Lookup(TNameRequest request) const = 0; virtual ~INameService() = default; }; diff --git a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp index 3e2dd322522a..6b8aa42bc5e6 100644 --- a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp +++ b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp @@ -23,7 +23,7 @@ namespace NSQLComplete { TVector& names, const TNameConstraints& constraints, size_t limit) const override { - limit = std::min(limit, names.size()); + limit = Min(limit, names.size()); TVector rows; rows.reserve(names.size()); @@ -91,6 +91,15 @@ namespace NSQLComplete { } } + if constexpr (std::is_same_v || + std::is_same_v) { + return std::numeric_limits::max(); + } + + if constexpr (std::is_same_v) { + return std::numeric_limits::max() - 8; + } + return 0; }, name); } @@ -108,6 +117,9 @@ namespace NSQLComplete { if constexpr (std::is_base_of_v) { return name.Indentifier; } + if constexpr (std::is_base_of_v) { + return name.Content; + } }, name); } diff --git a/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp new file mode 100644 index 000000000000..de8e8db65acc --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp @@ -0,0 +1,100 @@ +#include "name_service.h" + +namespace NSQLComplete { + + namespace { + + class TNameService: public INameService { + public: + explicit TNameService(ISchema::TPtr schema) + : Schema_(std::move(schema)) + { + } + + NThreading::TFuture Lookup(TNameRequest request) const override { + if (!request.Constraints.Object) { + return NThreading::MakeFuture({}); + } + + return Schema_ + ->List(ToListRequest(std::move(request))) + .Apply(ToNameResponse); + } + + private: + static TListRequest ToListRequest(TNameRequest request) { + return { + .Cluster = ClusterName(*request.Constraints.Object), + .Path = request.Prefix, + .Filter = ToListFilter(request.Constraints), + .Limit = request.Limit, + }; + } + + static TString ClusterName(const TObjectNameConstraints& constraints) { + TString name = constraints.Cluster; + if (!constraints.Provider.empty()) { + name.prepend(":"); + name.prepend(constraints.Provider); + } + return name; + } + + static TListFilter ToListFilter(const TNameConstraints& constraints) { + TListFilter filter; + filter.Types = THashSet(); + for (auto kind : constraints.Object->Kinds) { + filter.Types->emplace(ToFolderEntry(kind)); + } + return filter; + } + + static TString ToFolderEntry(EObjectKind kind) { + switch (kind) { + case EObjectKind::Folder: + return TFolderEntry::Folder; + case EObjectKind::Table: + return TFolderEntry::Table; + } + } + + static TNameResponse ToNameResponse(NThreading::TFuture f) { + TListResponse list = f.ExtractValue(); + + TNameResponse response; + for (auto& entry : list.Entries) { + response.RankedNames.emplace_back(ToGenericName(std::move(entry))); + } + response.NameHintLength = list.NameHintLength; + return response; + } + + static TGenericName ToGenericName(TFolderEntry entry) { + TGenericName name; + if (entry.Type == TFolderEntry::Folder) { + TFolderName local; + local.Indentifier = std::move(entry.Name); + name = std::move(local); + } else if (entry.Type == TFolderEntry::Table) { + TTableName local; + local.Indentifier = std::move(entry.Name); + name = std::move(local); + } else { + TUnkownName local; + local.Content = std::move(entry.Name); + local.Type = std::move(entry.Type); + name = std::move(local); + } + return name; + } + + ISchema::TPtr Schema_; + }; + + } // namespace + + INameService::TPtr MakeSchemaNameService(ISchema::TPtr schema) { + return new TNameService(std::move(schema)); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/schema/name_service.h b/yql/essentials/sql/v1/complete/name/service/schema/name_service.h new file mode 100644 index 000000000000..aa2d7eb7f319 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/schema/name_service.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +namespace NSQLComplete { + + INameService::TPtr MakeSchemaNameService(ISchema::TPtr schema); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/schema/ya.make b/yql/essentials/sql/v1/complete/name/service/schema/ya.make new file mode 100644 index 000000000000..9cdd3aad356f --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/schema/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + name_service.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/object + yql/essentials/sql/v1/complete/name/service +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp index c2373822f6f9..0eadf446545a 100644 --- a/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp @@ -21,6 +21,7 @@ namespace NSQLComplete { for (const auto& c : Children_) { fs.emplace_back(c->Lookup(request)); } + return NThreading::WaitAll(fs) .Apply([fs, this, request = std::move(request)](auto) { return Union(fs, request.Constraints, request.Limit); @@ -35,9 +36,17 @@ namespace NSQLComplete { TNameResponse united; for (auto f : fs) { TNameResponse response = f.ExtractValue(); + std::ranges::move( response.RankedNames, std::back_inserter(united.RankedNames)); + + if (!response.IsEmpty() && response.NameHintLength) { + Y_ENSURE( + united.NameHintLength.Empty() || + united.NameHintLength == response.NameHintLength); + united.NameHintLength = response.NameHintLength; + } } Ranking_->CropToSortedPrefix(united.RankedNames, constraints, limit); return united; diff --git a/yql/essentials/sql/v1/complete/name/service/ya.make b/yql/essentials/sql/v1/complete/name/service/ya.make index 1f1af9055ae2..ec4de4d5e108 100644 --- a/yql/essentials/sql/v1/complete/name/service/ya.make +++ b/yql/essentials/sql/v1/complete/name/service/ya.make @@ -12,7 +12,9 @@ PEERDIR( END() RECURSE( + cluster ranking + schema static union ) diff --git a/yql/essentials/sql/v1/complete/name/ya.make b/yql/essentials/sql/v1/complete/name/ya.make index 8eb198ffa3d9..0dcc75aabcc0 100644 --- a/yql/essentials/sql/v1/complete/name/ya.make +++ b/yql/essentials/sql/v1/complete/name/ya.make @@ -8,6 +8,7 @@ PEERDIR( END() RECURSE( + cluster object service ) diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index 0ec34e212db3..d3941661e44f 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -16,9 +16,9 @@ namespace NSQLComplete { TLexerSupplier lexer, INameService::TPtr names, ISqlCompletionEngine::TConfiguration configuration) - : Configuration(std::move(configuration)) - , SyntaxAnalysis(MakeLocalSyntaxAnalysis(lexer)) - , Names(std::move(names)) + : Configuration_(std::move(configuration)) + , SyntaxAnalysis_(MakeLocalSyntaxAnalysis(lexer)) + , Names_(std::move(names)) { } @@ -36,32 +36,35 @@ namespace NSQLComplete { << " for input size " << input.Text.size(); } - TLocalSyntaxContext context = SyntaxAnalysis->Analyze(input); + TLocalSyntaxContext context = SyntaxAnalysis_->Analyze(input); + auto keywords = context.Keywords; - TStringBuf prefix = input.Text.Head(input.CursorPosition); - TCompletedToken completedToken = GetCompletedToken(prefix); + TNameRequest request = NameRequestFrom(input, context); + if (request.IsEmpty()) { + return NThreading::MakeFuture({ + .CompletedToken = GetCompletedToken(input, context.EditRange), + .Candidates = {}, + }); + } - return GetCandidates(std::move(context), completedToken) - .Apply([completedToken](NThreading::TFuture> f) { - return TCompletion{ - .CompletedToken = std::move(completedToken), - .Candidates = f.ExtractValue(), - }; + return Names_->Lookup(std::move(request)) + .Apply([this, input, context = std::move(context)](auto f) { + return ToCompletion(input, context, f.ExtractValue()); }); } private: - TCompletedToken GetCompletedToken(TStringBuf prefix) const { + TCompletedToken GetCompletedToken(TCompletionInput input, TEditRange editRange) const { return { - .Content = LastWord(prefix), - .SourcePosition = LastWordIndex(prefix), + .Content = input.Text.SubStr(editRange.Begin, editRange.Length), + .SourcePosition = editRange.Begin, }; } - NThreading::TFuture> GetCandidates(TLocalSyntaxContext context, const TCompletedToken& prefix) const { + TNameRequest NameRequestFrom(TCompletionInput input, const TLocalSyntaxContext& context) const { TNameRequest request = { - .Prefix = TString(prefix.Content), - .Limit = Configuration.Limit, + .Prefix = TString(GetCompletedToken(input, context.EditRange).Content), + .Limit = Configuration_.Limit, }; for (const auto& [first, _] : context.Keywords) { @@ -74,7 +77,7 @@ namespace NSQLComplete { request.Constraints.Pragma = std::move(constraints); } - if (context.IsTypeName) { + if (context.Type) { request.Constraints.Type = TTypeName::TConstraints(); } @@ -90,48 +93,109 @@ namespace NSQLComplete { request.Constraints.Hint = std::move(constraints); } - if (request.IsEmpty()) { - return NThreading::MakeFuture>({}); + if (context.Object) { + request.Constraints.Object = TObjectNameConstraints{ + .Provider = context.Object->Provider, + .Cluster = context.Object->Cluster, + .Kinds = context.Object->Kinds, + }; + request.Prefix = context.Object->Path; } - return Names->Lookup(std::move(request)) - .Apply([keywords = std::move(context.Keywords)](NThreading::TFuture f) { - TNameResponse response = f.ExtractValue(); - return Convert(std::move(response.RankedNames), std::move(keywords)); - }); + if (context.Cluster) { + TClusterName::TConstraints constraints; + constraints.Namespace = context.Cluster->Provider; + request.Constraints.Cluster = std::move(constraints); + } + + return request; } - static TVector Convert(TVector names, TLocalSyntaxContext::TKeywords keywords) { + TCompletion ToCompletion( + TCompletionInput input, + TLocalSyntaxContext context, + TNameResponse response) const { + TCompletion completion = { + .CompletedToken = GetCompletedToken(input, context.EditRange), + .Candidates = Convert(std::move(response.RankedNames), std::move(context)), + }; + + if (response.NameHintLength) { + const auto length = *response.NameHintLength; + TEditRange editRange = { + .Begin = input.CursorPosition - length, + .Length = length, + }; + completion.CompletedToken = GetCompletedToken(input, editRange); + } + + return completion; + } + + static TVector Convert(TVector names, TLocalSyntaxContext context) { TVector candidates; + candidates.reserve(names.size()); for (auto& name : names) { - candidates.emplace_back(std::visit([&](auto&& name) -> TCandidate { - using T = std::decay_t; - if constexpr (std::is_base_of_v) { - TVector& seq = keywords[name.Content]; - seq.insert(std::begin(seq), name.Content); - return {ECandidateKind::Keyword, FormatKeywords(seq)}; - } - if constexpr (std::is_base_of_v) { - return {ECandidateKind::PragmaName, std::move(name.Indentifier)}; - } - if constexpr (std::is_base_of_v) { - return {ECandidateKind::TypeName, std::move(name.Indentifier)}; - } - if constexpr (std::is_base_of_v) { - name.Indentifier += "("; - return {ECandidateKind::FunctionName, std::move(name.Indentifier)}; - } - if constexpr (std::is_base_of_v) { - return {ECandidateKind::HintName, std::move(name.Indentifier)}; - } - }, std::move(name))); + candidates.emplace_back(Convert(std::move(name), context)); } return candidates; } - TConfiguration Configuration; - ILocalSyntaxAnalysis::TPtr SyntaxAnalysis; - INameService::TPtr Names; + static TCandidate Convert(TGenericName name, TLocalSyntaxContext& context) { + return std::visit([&](auto&& name) -> TCandidate { + using T = std::decay_t; + + if constexpr (std::is_base_of_v) { + TVector& seq = context.Keywords[name.Content]; + seq.insert(std::begin(seq), name.Content); + return {ECandidateKind::Keyword, FormatKeywords(seq)}; + } + + if constexpr (std::is_base_of_v) { + return {ECandidateKind::PragmaName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + return {ECandidateKind::TypeName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + name.Indentifier += "("; + return {ECandidateKind::FunctionName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + return {ECandidateKind::HintName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + name.Indentifier.append('/'); + if (!context.Object->IsEnclosed) { + name.Indentifier = Quoted(std::move(name.Indentifier)); + } + return {ECandidateKind::FolderName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + if (!context.Object->IsEnclosed) { + name.Indentifier = Quoted(std::move(name.Indentifier)); + } + return {ECandidateKind::TableName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + return {ECandidateKind::ClusterName, std::move(name.Indentifier)}; + } + + if constexpr (std::is_base_of_v) { + return {ECandidateKind::UnknownName, std::move(name.Content)}; + } + }, std::move(name)); + } + + TConfiguration Configuration_; + ILocalSyntaxAnalysis::TPtr SyntaxAnalysis_; + INameService::TPtr Names_; }; ISqlCompletionEngine::TPtr MakeSqlCompletionEngine( @@ -162,6 +226,18 @@ void Out(IOutputStream& out, NSQLComplete::ECandid case NSQLComplete::ECandidateKind::HintName: out << "HintName"; break; + case NSQLComplete::ECandidateKind::FolderName: + out << "FolderName"; + break; + case NSQLComplete::ECandidateKind::TableName: + out << "TableName"; + break; + case NSQLComplete::ECandidateKind::ClusterName: + out << "ClusterName"; + break; + case NSQLComplete::ECandidateKind::UnknownName: + out << "UnknownName"; + break; } } diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h index e74f3646ba91..1bc2c0ecf4e5 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.h +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -22,6 +22,10 @@ namespace NSQLComplete { TypeName, FunctionName, HintName, + FolderName, + TableName, + ClusterName, + UnknownName, }; struct TCandidate { diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index 060dfd42addc..a72446779cf9 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -1,8 +1,15 @@ #include "sql_complete.h" +#include +#include +#include +#include #include #include +#include +#include #include +#include #include #include @@ -14,26 +21,29 @@ using namespace NSQLComplete; -class TDummyException: public std::runtime_error { +class TDummyException: public yexception { public: - TDummyException() - : std::runtime_error("T_T") { + TDummyException() { + Append("T_T"); } }; class TFailingNameService: public INameService { public: - TFuture Lookup(TNameRequest) const override { + NThreading::TFuture Lookup(TNameRequest) const override { auto e = std::make_exception_ptr(TDummyException()); return NThreading::MakeErrorFuture(e); } }; Y_UNIT_TEST_SUITE(SqlCompleteTests) { + using ECandidateKind::ClusterName; + using ECandidateKind::FolderName; using ECandidateKind::FunctionName; using ECandidateKind::HintName; using ECandidateKind::Keyword; using ECandidateKind::PragmaName; + using ECandidateKind::TableName; using ECandidateKind::TypeName; TLexerSupplier MakePureLexerSupplier() { @@ -49,8 +59,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { ISqlCompletionEngine::TPtr MakeSqlCompletionEngineUT() { TLexerSupplier lexer = MakePureLexerSupplier(); + TNameSet names = { - .Pragmas = {"yson.CastToString"}, + .Pragmas = { + "yson.CastToString", + "yt.RuntimeCluster", + "yt.RuntimeClusterSelection", + }, .Types = {"Uint64"}, .Functions = { "StartsWith", @@ -62,27 +77,51 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {EStatementKind::Insert, {"EXPIRATION"}}, }, }; - TFrequencyData frequency = {}; - INameService::TPtr service = MakeStaticNameService(std::move(names), std::move(frequency)); - return MakeSqlCompletionEngine(std::move(lexer), std::move(service)); - } - TCompletionInput SharpedInput(TString& text) { - constexpr char delim = '#'; + THashMap>> fss = { + {"", {{"/", {{"Folder", "local"}, + {"Folder", "test"}, + {"Folder", "prod"}, + {"Folder", ".sys"}}}, + {"/local/", {{"Table", "example"}, + {"Table", "account"}, + {"Table", "abacaba"}}}, + {"/test/", {{"Folder", "service"}, + {"Table", "meta"}}}, + {"/test/service/", {{"Table", "example"}}}, + {"/.sys/", {{"Table", "status"}}}}}, + {"example", + {{"/", {{"Table", "people"}}}}}, + {"yt:saurus", + {{"/", {{"Table", "maxim"}}}}}, + }; - size_t pos = text.find_first_of(delim); - if (pos == TString::npos) { - return { - .Text = text, - }; + TVector clusters; + for (const auto& [cluster, _] : fss) { + clusters.emplace_back(cluster); + } + EraseIf(clusters, [](const auto& s) { return s.empty(); }); + + TFrequencyData frequency; + + IRanking::TPtr ranking = MakeDefaultRanking(frequency); + + THashMap schemasByCluster; + for (auto& [cluster, fs] : fss) { + schemasByCluster[std::move(cluster)] = + MakeSimpleSchema( + MakeStaticSimpleSchema(std::move(fs))); } - Y_ENSURE(!TStringBuf(text).Tail(pos + 1).Contains(delim)); - text.erase(std::begin(text) + pos); - return { - .Text = text, - .CursorPosition = pos, + TVector children = { + MakeStaticNameService(std::move(names), frequency), + MakeSchemaNameService(MakeDispatchSchema(std::move(schemasByCluster))), + MakeClusterNameService(MakeStaticClusterDiscovery(std::move(clusters))), }; + + INameService::TPtr service = MakeUnionNameService(std::move(children), ranking); + + return MakeSqlCompletionEngine(std::move(lexer), std::move(service)); } TVector Complete(ISqlCompletionEngine::TPtr& engine, TString sharped) { @@ -141,6 +180,17 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(Complete(engine, ";"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "; "), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, " ; "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "#SELECT"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "#SELECT * FROM"), expected); + } + + Y_UNIT_TEST(Use) { + TVector expected = { + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + }; + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "USE "), expected); } Y_UNIT_TEST(Alter) { @@ -187,6 +237,28 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "CREATE "), expected); } + Y_UNIT_TEST(CreateTable) { + auto engine = MakeSqlCompletionEngineUT(); + { + TVector expected = { + {FolderName, "`.sys/`"}, + {FolderName, "`local/`"}, + {FolderName, "`prod/`"}, + {FolderName, "`test/`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "IF NOT EXISTS"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "CREATE TABLE #"), expected); + } + { + TVector expected = { + {FolderName, "service/"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "CREATE TABLE `test/#`"), expected); + } + } + Y_UNIT_TEST(Delete) { TVector expected = { {Keyword, "FROM"}, @@ -216,6 +288,21 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "DROP "), expected); } + Y_UNIT_TEST(DropObject) { + TVector expected = { + {FolderName, "`.sys/`"}, + {FolderName, "`local/`"}, + {FolderName, "`prod/`"}, + {FolderName, "`test/`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "IF EXISTS"}, + }; + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "DROP TABLE "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "DROP VIEW "), expected); + } + Y_UNIT_TEST(Explain) { TVector expected = { {Keyword, "ALTER"}, @@ -299,7 +386,9 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { { TVector expected = { {Keyword, "ANSI"}, - {PragmaName, "yson.CastToString"}}; + {PragmaName, "yson.CastToString"}, + {PragmaName, "yt.RuntimeCluster"}, + {PragmaName, "yt.RuntimeClusterSelection"}}; auto completion = engine->CompleteAsync({"PRAGMA "}).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(completion.Candidates, expected); UNIT_ASSERT_VALUES_EQUAL(completion.CompletedToken.Content, ""); @@ -332,6 +421,23 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(completion.Candidates, expected); UNIT_ASSERT_VALUES_EQUAL(completion.CompletedToken.Content, "cast"); } + { + TVector expected = { + {PragmaName, "RuntimeCluster"}, + {PragmaName, "RuntimeClusterSelection"}}; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "pragma yt."), expected); + UNIT_ASSERT_VALUES_EQUAL( + Complete(engine, "pragma yt.RuntimeClusterSelection='force';\npragma yt.Ru"), + expected); + } + { + TVector expected = { + {PragmaName, "RuntimeCluster"}, + {PragmaName, "RuntimeClusterSelection"}}; + UNIT_ASSERT_VALUES_EQUAL( + Complete(engine, "pragma yt.Ru#\n"), + expected); + } } Y_UNIT_TEST(Select) { @@ -377,23 +483,132 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } Y_UNIT_TEST(SelectFrom) { - TVector expected = { - {Keyword, "ANY"}, - {Keyword, "CALLABLE"}, - {Keyword, "DICT"}, - {Keyword, "ENUM"}, - {Keyword, "FLOW"}, - {Keyword, "LIST"}, - {Keyword, "OPTIONAL"}, - {Keyword, "RESOURCE"}, - {Keyword, "SET"}, - {Keyword, "STRUCT"}, - {Keyword, "TAGGED"}, - {Keyword, "TUPLE"}, - {Keyword, "VARIANT"}, - }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM "), expected); + { + TVector expected = { + {FolderName, "`.sys/`"}, + {FolderName, "`local/`"}, + {FolderName, "`prod/`"}, + {FolderName, "`test/`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "ANY"}, + {Keyword, "CALLABLE"}, + {Keyword, "DICT"}, + {Keyword, "ENUM"}, + {Keyword, "FLOW"}, + {Keyword, "LIST"}, + {Keyword, "OPTIONAL"}, + {Keyword, "RESOURCE"}, + {Keyword, "SET"}, + {Keyword, "STRUCT"}, + {Keyword, "TAGGED"}, + {Keyword, "TUPLE"}, + {Keyword, "VARIANT"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM "), expected); + } + { + TVector expected = {}; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM `#"), expected); + } + { + TString input = "SELECT * FROM `#`"; + TVector expected = { + {FolderName, ".sys/"}, + {FolderName, "local/"}, + {FolderName, "prod/"}, + {FolderName, "test/"}, + }; + TCompletion actual = engine->Complete(SharpedInput(input)); + UNIT_ASSERT_VALUES_EQUAL(actual.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(actual.CompletedToken.Content, ""); + } + { + TString input = "SELECT * FROM `local/#`"; + TVector expected = { + {TableName, "abacaba"}, + {TableName, "account"}, + {TableName, "example"}, + }; + TCompletion actual = engine->Complete(SharpedInput(input)); + UNIT_ASSERT_VALUES_EQUAL(actual.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(actual.CompletedToken.Content, ""); + } + { + TString input = "SELECT * FROM `local/a#`"; + TVector expected = { + {TableName, "abacaba"}, + {TableName, "account"}, + }; + TCompletion actual = engine->Complete(SharpedInput(input)); + UNIT_ASSERT_VALUES_EQUAL(actual.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(actual.CompletedToken.Content, "a"); + } + { + TString input = "SELECT * FROM `.sy#`"; + TVector expected = { + {FolderName, ".sys/"}, + }; + TCompletion actual = engine->Complete(SharpedInput(input)); + UNIT_ASSERT_VALUES_EQUAL(actual.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(actual.CompletedToken.Content, ".sy"); + } + { + TString input = "SELECT * FROM `/test/ser#vice/`"; + TVector expected = { + {FolderName, "service/"}, + }; + TCompletion actual = engine->Complete(SharpedInput(input)); + UNIT_ASSERT_VALUES_EQUAL(actual.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(actual.CompletedToken.Content, "ser"); + } + } + + Y_UNIT_TEST(SelectFromCluster) { + auto engine = MakeSqlCompletionEngineUT(); + { + TVector expected = { + {ClusterName, "yt:saurus"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM yt#"), expected); + } + { + TVector expected = { + {ClusterName, "saurus"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM yt:"), expected); + } + { + TVector expected = { + {ClusterName, "saurus"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM yt:saurus#"), expected); + } + { + TVector expected = { + {TableName, "`maxim`"}, + {Keyword, "CALLABLE"}, + {Keyword, "DICT"}, + {Keyword, "ENUM"}, + {Keyword, "FLOW"}, + {Keyword, "LIST"}, + {Keyword, "OPTIONAL"}, + {Keyword, "RESOURCE"}, + {Keyword, "SET"}, + {Keyword, "STRUCT"}, + {Keyword, "TAGGED"}, + {Keyword, "TUPLE"}, + {Keyword, "VARIANT"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM yt:saurus."), expected); + } + { + TVector expected = { + {TableName, "`people`"}, + }; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(1, engine, "SELECT * FROM example."), expected); + } } Y_UNIT_TEST(SelectWhere) { @@ -445,6 +660,28 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "UPSERT "), expected); } + Y_UNIT_TEST(UpsertInto) { + auto engine = MakeSqlCompletionEngineUT(); + { + TVector expected = { + {FolderName, "`.sys/`"}, + {FolderName, "`local/`"}, + {FolderName, "`prod/`"}, + {FolderName, "`test/`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "UPSERT INTO "), expected); + } + { + TVector expected = { + {TableName, "meta"}, + {FolderName, "service/"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "UPSERT INTO `test/#`"), expected); + } + } + Y_UNIT_TEST(TypeName) { TVector expected = { {Keyword, "CALLABLE<("}, @@ -559,6 +796,52 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "INSERT INTO my_table WITH "), expected); } + Y_UNIT_TEST(CursorPosition) { + auto engine = MakeSqlCompletionEngineUT(); + { + TVector expected = { + {Keyword, "AND"}, + {Keyword, "AS"}, + {Keyword, "ASSUME"}, + {Keyword, "BETWEEN"}, + {Keyword, "COLLATE"}, + {Keyword, "EXCEPT"}, + {Keyword, "FROM"}, + {Keyword, "GLOB"}, + {Keyword, "GROUP"}, + {Keyword, "HAVING"}, + {Keyword, "ILIKE"}, + {Keyword, "IN"}, + {Keyword, "INTERSECT"}, + {Keyword, "INTO RESULT"}, + {Keyword, "IS"}, + {Keyword, "ISNULL"}, + {Keyword, "LIKE"}, + {Keyword, "LIMIT"}, + {Keyword, "MATCH"}, + {Keyword, "NOT"}, + {Keyword, "NOTNULL"}, + {Keyword, "OR"}, + {Keyword, "ORDER BY"}, + {Keyword, "REGEXP"}, + {Keyword, "RLIKE"}, + {Keyword, "UNION"}, + {Keyword, "WHERE"}, + {Keyword, "WINDOW"}, + {Keyword, "WITHOUT"}, + {Keyword, "XOR"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT `a`"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT `a`#FROM"), expected); + } + { + TVector expected = { + {Keyword, "FROM"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM# "), expected); + } + } + Y_UNIT_TEST(Enclosed) { TVector empty = {}; @@ -634,11 +917,11 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { wchar32 rune; while (ptr < end) { Y_ENSURE(ReadUTF8CharAndAdvance(rune, ptr, end) == RECODE_OK); - TCompletion completion = engine->CompleteAsync({ - .Text = query, - .CursorPosition = static_cast(std::distance(begin, ptr)), - }) - .GetValueSync(); + TCompletionInput input = { + .Text = query, + .CursorPosition = static_cast(std::distance(begin, ptr)), + }; + TCompletion completion = engine->CompleteAsync(input).GetValueSync(); Y_DO_NOT_OPTIMIZE_AWAY(completion); } } diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp new file mode 100644 index 000000000000..33aef36847a4 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp @@ -0,0 +1,160 @@ +#include "cursor_token_context.h" + +#include +#include + +namespace NSQLComplete { + + namespace { + + bool Tokenize(ILexer::TPtr& lexer, TCompletionInput input, TParsedTokenList& tokens) { + NYql::TIssues issues; + if (!NSQLTranslation::Tokenize( + *lexer, TString(input.Text), /* queryName = */ "", + tokens, issues, /* maxErrors = */ 1)) { + return false; + } + return true; + } + + TCursor GetCursor(const TParsedTokenList& tokens, size_t cursorPosition) { + size_t current = 0; + for (size_t i = 0; i < tokens.size() && current < cursorPosition; ++i) { + const auto& content = tokens[i].Content; + + current += content.size(); + if (current < cursorPosition) { + continue; + } + + TCursor cursor = { + .PrevTokenIndex = i, + .NextTokenIndex = i, + .Position = cursorPosition, + }; + + if (current == cursorPosition) { + cursor.NextTokenIndex += 1; + } + + return cursor; + } + + return { + .PrevTokenIndex = Nothing(), + .NextTokenIndex = 0, + .Position = cursorPosition, + }; + } + + TVector GetTokenPositions(const TParsedTokenList& tokens) { + TVector positions; + positions.reserve(tokens.size()); + size_t pos = 0; + for (const auto& token : tokens) { + positions.emplace_back(pos); + pos += token.Content.size(); + } + return positions; + } + + } // namespace + + bool TRichParsedToken::IsLiteral() const { + return Base->Name == "STRING_VALUE" || + Base->Name == "DIGIGTS" || + Base->Name == "INTEGER_VALUE" || + Base->Name == "REAL"; + } + + TRichParsedToken TokenAt(const TCursorTokenContext& context, size_t index) { + return { + .Base = &context.Tokens.at(index), + .Index = index, + .Position = context.TokenPositions.at(index), + }; + } + + TMaybe TCursorTokenContext::Enclosing() const { + if (Tokens.size() == 1) { + Y_ENSURE(Tokens[0].Name == "EOF"); + return Nothing(); + } + + if (Cursor.PrevTokenIndex.Empty()) { + return Nothing(); + } + + auto token = TokenAt(*this, *Cursor.PrevTokenIndex); + if (Cursor.PrevTokenIndex == Cursor.NextTokenIndex || + !IsWordBoundary(token.Base->Content.back())) { + return token; + } + + return Nothing(); + } + + TMaybe TCursorTokenContext::MatchCursorPrefix(const TVector& pattern) const { + const auto prefix = std::span{Tokens.begin(), Cursor.NextTokenIndex}; + if (prefix.size() < pattern.size()) { + return Nothing(); + } + + ssize_t i = static_cast(prefix.size()) - 1; + ssize_t j = static_cast(pattern.size()) - 1; + for (; 0 <= j; --i, --j) { + if (!pattern[j].empty() && prefix[i].Name != pattern[j]) { + return Nothing(); + } + } + return TokenAt(*this, prefix.size() - pattern.size()); + } + + bool GetStatement( + ILexer::TPtr& lexer, + TCompletionInput input, + TCompletionInput& output, + size_t& output_position) { + TVector statements; + NYql::TIssues issues; + if (!NSQLTranslationV1::SplitQueryToStatements( + TString(input.Text) + ";", lexer, + statements, issues, /* file = */ "", + /* areBlankSkipped = */ false)) { + return false; + } + + size_t& cursor = output_position; + cursor = 0; + for (const auto& statement : statements) { + if (input.CursorPosition < cursor + statement.size()) { + output = { + .Text = input.Text.SubStr(cursor, statement.size()), + .CursorPosition = input.CursorPosition - cursor, + }; + return true; + } + cursor += statement.size(); + } + + output = input; + return true; + } + + bool GetCursorTokenContext(ILexer::TPtr& lexer, TCompletionInput input, TCursorTokenContext& context) { + TParsedTokenList tokens; + if (!Tokenize(lexer, input, tokens)) { + return false; + } + + TVector positions = GetTokenPositions(tokens); + TCursor cursor = GetCursor(tokens, input.CursorPosition); + context = { + .Tokens = std::move(tokens), + .TokenPositions = std::move(positions), + .Cursor = cursor, + }; + return true; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h new file mode 100644 index 000000000000..35d22231e350 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +#include + +#include + +namespace NSQLComplete { + + using NSQLTranslation::ILexer; + using NSQLTranslation::TParsedToken; + using NSQLTranslation::TParsedTokenList; + + struct TCursor { + TMaybe PrevTokenIndex = Nothing(); + size_t NextTokenIndex = PrevTokenIndex ? *PrevTokenIndex : 0; + size_t Position = 0; + }; + + struct TRichParsedToken { + const TParsedToken* Base = nullptr; + size_t Index = 0; + size_t Position = 0; + + bool IsLiteral() const; + }; + + struct TCursorTokenContext { + TParsedTokenList Tokens; + TVector TokenPositions; + TCursor Cursor; + + TMaybe Enclosing() const; + TMaybe MatchCursorPrefix(const TVector& pattern) const; + }; + + bool GetStatement( + ILexer::TPtr& lexer, + TCompletionInput input, + TCompletionInput& output, + size_t& output_position); + + bool GetCursorTokenContext( + ILexer::TPtr& lexer, + TCompletionInput input, + TCursorTokenContext& context); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp new file mode 100644 index 000000000000..0e275cca3b8c --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp @@ -0,0 +1,50 @@ +#include "cursor_token_context.h" + +#include + +#include +#include + +using namespace NSQLComplete; + +Y_UNIT_TEST_SUITE(CursorTokenContextTests) { + + NSQLTranslation::ILexer::TPtr MakeLexer() { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + return NSQLTranslationV1::MakeLexer( + lexers, /* ansi = */ false, /* antlr4 = */ true, + NSQLTranslationV1::ELexerFlavor::Pure); + } + + TCursorTokenContext Context(TString input) { + auto lexer = MakeLexer(); + TCursorTokenContext context; + UNIT_ASSERT(GetCursorTokenContext(lexer, SharpedInput(input), context)); + return context; + } + + Y_UNIT_TEST(Empty) { + auto context = Context(""); + UNIT_ASSERT(context.Cursor.PrevTokenIndex.Empty()); + UNIT_ASSERT_VALUES_EQUAL(context.Cursor.NextTokenIndex, 0); + UNIT_ASSERT_VALUES_EQUAL(context.Cursor.Position, 0); + UNIT_ASSERT(context.Enclosing().Empty()); + } + + Y_UNIT_TEST(Blank) { + UNIT_ASSERT(Context("# ").Enclosing().Empty()); + UNIT_ASSERT(Context(" #").Enclosing().Empty()); + UNIT_ASSERT(Context(" # ").Enclosing().Empty()); + } + + Y_UNIT_TEST(Enclosing) { + UNIT_ASSERT(Context("se#").Enclosing().Defined()); + UNIT_ASSERT(Context("#se").Enclosing().Empty()); + UNIT_ASSERT(Context("`se`#").Enclosing().Empty()); + UNIT_ASSERT(Context("#`se`").Enclosing().Empty()); + UNIT_ASSERT(Context("`se`#`se`").Enclosing().Defined()); + UNIT_ASSERT(Context("\"se\"#\"se\"").Enclosing().Empty()); + } + +} // Y_UNIT_TEST_SUITE(CursorTokenContextTests) diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp index 1c9f146c923d..43c36aea9dd5 100644 --- a/yql/essentials/sql/v1/complete/syntax/format.cpp +++ b/yql/essentials/sql/v1/complete/syntax/format.cpp @@ -35,4 +35,17 @@ namespace NSQLComplete { return text; } + TString Quoted(TString content) { + content.prepend('`'); + content.append('`'); + return content; + } + + TString Unquoted(TString content) { + Y_ENSURE(2 <= content.size() && content.front() == '`' && content.back() == '`'); + content.erase(0, 1); + content.pop_back(); + return content; + } + } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h index 6c2f1b72ac21..58e5d1f1e4a3 100644 --- a/yql/essentials/sql/v1/complete/syntax/format.h +++ b/yql/essentials/sql/v1/complete/syntax/format.h @@ -6,5 +6,7 @@ namespace NSQLComplete { TString FormatKeywords(const TVector& seq); + TString Quoted(TString content); + TString Unquoted(TString content); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp index 252deaf682cb..c080fae5ae47 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp +++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp @@ -7,31 +7,31 @@ namespace NSQLComplete { class TSqlGrammar: public ISqlGrammar { public: TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar) - : Parser(MakeDummyParser()) - , AllTokens(ComputeAllTokens()) - , KeywordTokens(ComputeKeywordTokens(grammar)) - , PunctuationTokens(ComputePunctuationTokens(grammar)) + : Parser_(MakeDummyParser()) + , AllTokens_(ComputeAllTokens()) + , KeywordTokens_(ComputeKeywordTokens(grammar)) + , PunctuationTokens_(ComputePunctuationTokens(grammar)) { } const antlr4::dfa::Vocabulary& GetVocabulary() const override { - return Parser->getVocabulary(); + return Parser_->getVocabulary(); } const std::unordered_set& GetAllTokens() const override { - return AllTokens; + return AllTokens_; } const std::unordered_set& GetKeywordTokens() const override { - return KeywordTokens; + return KeywordTokens_; } const std::unordered_set& GetPunctuationTokens() const override { - return PunctuationTokens; + return PunctuationTokens_; } const std::string& SymbolizedRule(TRuleId rule) const override { - return Parser->getRuleNames().at(rule); + return Parser_->getRuleNames().at(rule); } private: @@ -76,10 +76,10 @@ namespace NSQLComplete { return punctuationTokens; } - const THolder Parser; - const std::unordered_set AllTokens; - const std::unordered_set KeywordTokens; - const std::unordered_set PunctuationTokens; + const THolder Parser_; + const std::unordered_set AllTokens_; + const std::unordered_set KeywordTokens_; + const std::unordered_set PunctuationTokens_; }; const ISqlGrammar& GetSqlGrammar() { diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index c434fa28daf8..549208d4cabc 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -1,9 +1,10 @@ #include "local.h" #include "ansi.h" +#include "cursor_token_context.h" +#include "format.h" #include "grammar.h" #include "parser_call_stack.h" -#include "token.h" #include #include @@ -49,65 +50,77 @@ namespace NSQLComplete { public: explicit TSpecializedLocalSyntaxAnalysis(TLexerSupplier lexer) - : Grammar(&GetSqlGrammar()) + : Grammar_(&GetSqlGrammar()) , Lexer_(lexer(/* ansi = */ IsAnsiLexer)) - , C3(ComputeC3Config()) + , C3_(ComputeC3Config()) { } TLocalSyntaxContext Analyze(TCompletionInput input) override { TCompletionInput statement; - if (!GetStatement(Lexer_, input, statement)) { + size_t statement_position; + if (!GetStatement(Lexer_, input, statement, statement_position)) { return {}; } - auto candidates = C3.Complete(statement); - - TParsedTokenList tokens; - TCaretTokenPosition caret; - if (!TokenizePrefix(statement, tokens, caret)) { + TCursorTokenContext context; + if (!GetCursorTokenContext(Lexer_, statement, context)) { return {}; } - if (IsCaretEnslosed(tokens, caret)) { - return {}; + TC3Candidates candidates = C3_.Complete(statement); + + TLocalSyntaxContext result; + + result.EditRange = EditRange(context); + result.EditRange.Begin += statement_position; + + if (auto enclosing = context.Enclosing()) { + if (enclosing->IsLiteral()) { + return result; + } else if (enclosing->Base->Name == "ID_QUOTED") { + result.Object = ObjectMatch(context, candidates); + return result; + } } - return { - .Keywords = SiftedKeywords(candidates), - .Pragma = PragmaMatch(tokens, candidates), - .IsTypeName = IsTypeNameMatched(candidates), - .Function = FunctionMatch(tokens, candidates), - .Hint = HintMatch(candidates), - }; + result.Keywords = SiftedKeywords(candidates); + result.Pragma = PragmaMatch(context, candidates); + result.Type = TypeMatch(candidates); + result.Function = FunctionMatch(context, candidates); + result.Hint = HintMatch(candidates); + result.Object = ObjectMatch(context, candidates); + result.Cluster = ClusterMatch(context, candidates); + + return result; } private: - IC3Engine::TConfig ComputeC3Config() { + IC3Engine::TConfig ComputeC3Config() const { return { .IgnoredTokens = ComputeIgnoredTokens(), .PreferredRules = ComputePreferredRules(), }; } - std::unordered_set ComputeIgnoredTokens() { - auto ignoredTokens = Grammar->GetAllTokens(); - for (auto keywordToken : Grammar->GetKeywordTokens()) { + std::unordered_set ComputeIgnoredTokens() const { + auto ignoredTokens = Grammar_->GetAllTokens(); + for (auto keywordToken : Grammar_->GetKeywordTokens()) { ignoredTokens.erase(keywordToken); } - for (auto punctuationToken : Grammar->GetPunctuationTokens()) { + for (auto punctuationToken : Grammar_->GetPunctuationTokens()) { ignoredTokens.erase(punctuationToken); } return ignoredTokens; } - std::unordered_set ComputePreferredRules() { + std::unordered_set ComputePreferredRules() const { return GetC3PreferredRules(); } - TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) { - const auto& vocabulary = Grammar->GetVocabulary(); - const auto& keywordTokens = Grammar->GetKeywordTokens(); + TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) const { + const auto& vocabulary = Grammar_->GetVocabulary(); + const auto& keywordTokens = Grammar_->GetKeywordTokens(); TLocalSyntaxContext::TKeywords keywords; for (const auto& token : candidates.Tokens) { @@ -122,40 +135,41 @@ namespace NSQLComplete { } TMaybe PragmaMatch( - const TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TCursorTokenContext& context, const TC3Candidates& candidates) const { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) { return Nothing(); } TLocalSyntaxContext::TPragma pragma; - if (EndsWith(tokens, {"ID_PLAIN", "DOT"})) { - pragma.Namespace = tokens[tokens.size() - 2].Content; - } else if (EndsWith(tokens, {"ID_PLAIN", "DOT", ""})) { - pragma.Namespace = tokens[tokens.size() - 3].Content; + + if (TMaybe begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) { + pragma.Namespace = begin->Base->Content; } return pragma; } - bool IsTypeNameMatched(const TC3Candidates& candidates) { + bool TypeMatch(const TC3Candidates& candidates) const { return AnyOf(candidates.Rules, RuleAdapted(IsLikelyTypeStack)); } TMaybe FunctionMatch( - const TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TCursorTokenContext& context, const TC3Candidates& candidates) const { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyFunctionStack))) { return Nothing(); } TLocalSyntaxContext::TFunction function; - if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE"})) { - function.Namespace = tokens[tokens.size() - 2].Content; - } else if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE", ""})) { - function.Namespace = tokens[tokens.size() - 3].Content; + if (TMaybe begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE", ""}))) { + function.Namespace = begin->Base->Content; } return function; } - TMaybe HintMatch(const TC3Candidates& candidates) { + TMaybe HintMatch(const TC3Candidates& candidates) const { // TODO(YQL-19747): detect local contexts with a single iteration through the candidates.Rules auto rule = FindIf(candidates.Rules, RuleAdapted(IsLikelyHintStack)); if (rule == std::end(candidates.Rules)) { @@ -172,45 +186,103 @@ namespace NSQLComplete { }; } - bool TokenizePrefix(TCompletionInput input, TParsedTokenList& tokens, TCaretTokenPosition& caret) { - NYql::TIssues issues; - if (!NSQLTranslation::Tokenize( - *Lexer_, TString(input.Text), /* queryName = */ "", - tokens, issues, /* maxErrors = */ 1)) { - return false; + TMaybe ObjectMatch( + const TCursorTokenContext& context, const TC3Candidates& candidates) const { + TLocalSyntaxContext::TObject object; + + if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyObjectRefStack))) { + object.Kinds.emplace(EObjectKind::Folder); + } + + if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyExistingTableStack))) { + object.Kinds.emplace(EObjectKind::Folder); + object.Kinds.emplace(EObjectKind::Table); + } + + if (object.Kinds.empty()) { + return Nothing(); } - Y_ENSURE(!tokens.empty() && tokens.back().Name == "EOF"); - tokens.pop_back(); + if (TMaybe begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) { + object.Cluster = begin->Base->Content; + } - caret = CaretTokenPosition(tokens, input.CursorPosition); - tokens.crop(caret.NextTokenIndex + 1); - return true; + if (TMaybe begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT", ""}))) { + object.Provider = begin->Base->Content; + } + + if (auto path = ObjectPath(context)) { + object.Path = *path; + object.IsEnclosed = true; + } + + return object; + } + + TMaybe ObjectPath(const TCursorTokenContext& context) const { + if (auto enclosing = context.Enclosing()) { + TString path = enclosing->Base->Content; + if (enclosing->Base->Name == "ID_QUOTED") { + path = Unquoted(std::move(path)); + } + path.resize(context.Cursor.Position - enclosing->Position - 1); + return path; + } + return Nothing(); } - bool IsCaretEnslosed(const TParsedTokenList& tokens, TCaretTokenPosition caret) { - if (tokens.empty() || caret.PrevTokenIndex != caret.NextTokenIndex) { - return false; + TMaybe ClusterMatch( + const TCursorTokenContext& context, const TC3Candidates& candidates) const { + if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyClusterStack))) { + return Nothing(); } - const auto& token = tokens.back(); - return token.Name == "STRING_VALUE" || - token.Name == "ID_QUOTED" || - token.Name == "DIGIGTS" || - token.Name == "INTEGER_VALUE" || - token.Name == "REAL"; + TLocalSyntaxContext::TCluster cluster; + if (TMaybe begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", ""}))) { + cluster.Provider = begin->Base->Content; + } + return cluster; + } + + TEditRange EditRange(const TCursorTokenContext& context) const { + if (auto enclosing = context.Enclosing()) { + return EditRange(*enclosing, context.Cursor); + } + + return { + .Begin = context.Cursor.Position, + .Length = 0, + }; + } + + TEditRange EditRange(const TRichParsedToken& token, const TCursor& cursor) const { + size_t begin = token.Position; + if (token.Base->Name == "NOT_EQUALS2") { + begin += 1; + } + + return { + .Begin = begin, + .Length = cursor.Position - begin, + }; } - const ISqlGrammar* Grammar; + const ISqlGrammar* Grammar_; NSQLTranslation::ILexer::TPtr Lexer_; - TC3Engine C3; + TC3Engine C3_; }; class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis { public: explicit TLocalSyntaxAnalysis(TLexerSupplier lexer) - : DefaultEngine(lexer) - , AnsiEngine(lexer) + : DefaultEngine_(lexer) + , AnsiEngine_(lexer) { } @@ -223,13 +295,13 @@ namespace NSQLComplete { private: ILocalSyntaxAnalysis& GetSpecializedEngine(bool isAnsiLexer) { if (isAnsiLexer) { - return AnsiEngine; + return AnsiEngine_; } - return DefaultEngine; + return DefaultEngine_; } - TSpecializedLocalSyntaxAnalysis DefaultEngine; - TSpecializedLocalSyntaxAnalysis AnsiEngine; + TSpecializedLocalSyntaxAnalysis DefaultEngine_; + TSpecializedLocalSyntaxAnalysis AnsiEngine_; }; ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(TLexerSupplier lexer) { diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index d58b62c62cd5..8f88d5aa71c5 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -1,15 +1,22 @@ #pragma once +#include #include #include #include #include +#include #include namespace NSQLComplete { + struct TEditRange { + size_t Begin = 0; + size_t Length = 0; + }; + struct TLocalSyntaxContext { using TKeywords = THashMap>; @@ -25,11 +32,26 @@ namespace NSQLComplete { EStatementKind StatementKind; }; + struct TCluster { + TString Provider; + }; + + struct TObject { + TString Provider; + TString Cluster; + TString Path; + THashSet Kinds; + bool IsEnclosed = false; + }; + TKeywords Keywords; TMaybe Pragma; - bool IsTypeName = false; + bool Type = false; TMaybe Function; TMaybe Hint; + TMaybe Object; + TMaybe Cluster; + TEditRange EditRange; }; class ILocalSyntaxAnalysis { diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp index 938483438b1c..ce6c94306d49 100644 --- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp +++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp @@ -13,7 +13,7 @@ namespace NSQLComplete { - const TVector KeywordRules = { + const TVector PreferredRules = { RULE(Keyword), RULE(Keyword_expr_uncompat), RULE(Keyword_table_uncompat), @@ -24,27 +24,13 @@ namespace NSQLComplete { RULE(Keyword_hint_uncompat), RULE(Keyword_as_compat), RULE(Keyword_compat), - }; - - const TVector PragmaNameRules = { - RULE(Opt_id_prefix_or_type), - RULE(An_id), - }; - - const TVector TypeNameRules = { - RULE(Type_name_simple), RULE(An_id_or_type), - }; - - const TVector FunctionNameRules = { + RULE(An_id), RULE(Id_expr), - RULE(An_id_or_type), RULE(Id_or_type), - }; - - const TVector HintNameRules = { RULE(Id_hint), - RULE(An_id), + RULE(Opt_id_prefix_or_type), + RULE(Type_name_simple), }; TVector Symbolized(const TParserCallStack& stack) { @@ -101,6 +87,26 @@ namespace NSQLComplete { Contains({RULE(External_call_param), RULE(An_id)}, stack); } + bool IsLikelyObjectRefStack(const TParserCallStack& stack) { + return Contains({RULE(Object_ref)}, stack); + } + + bool IsLikelyExistingTableStack(const TParserCallStack& stack) { + return !Contains({RULE(Create_table_stmt), + RULE(Simple_table_ref)}, stack) && + (Contains({RULE(Simple_table_ref), + RULE(Simple_table_ref_core), + RULE(Object_ref)}, stack) || + Contains({RULE(Single_source), + RULE(Table_ref), + RULE(Table_key), + RULE(Id_table_or_type)}, stack)); + } + + bool IsLikelyClusterStack(const TParserCallStack& stack) { + return Contains({RULE(Cluster_expr)}, stack); + } + TMaybe StatementKindOf(const TParserCallStack& stack) { for (TRuleId rule : std::ranges::views::reverse(stack)) { if (rule == RULE(Process_core) || rule == RULE(Reduce_core) || rule == RULE(Select_core)) { @@ -115,10 +121,7 @@ namespace NSQLComplete { std::unordered_set GetC3PreferredRules() { std::unordered_set preferredRules; - preferredRules.insert(std::begin(KeywordRules), std::end(KeywordRules)); - preferredRules.insert(std::begin(PragmaNameRules), std::end(PragmaNameRules)); - preferredRules.insert(std::begin(TypeNameRules), std::end(TypeNameRules)); - preferredRules.insert(std::begin(FunctionNameRules), std::end(FunctionNameRules)); + preferredRules.insert(std::begin(PreferredRules), std::end(PreferredRules)); return preferredRules; } diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h index d185b72d6281..d44b824a05eb 100644 --- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h +++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h @@ -15,6 +15,12 @@ namespace NSQLComplete { bool IsLikelyHintStack(const TParserCallStack& stack); + bool IsLikelyObjectRefStack(const TParserCallStack& stack); + + bool IsLikelyExistingTableStack(const TParserCallStack& stack); + + bool IsLikelyClusterStack(const TParserCallStack& stack); + TMaybe StatementKindOf(const TParserCallStack& stack); std::unordered_set GetC3PreferredRules(); diff --git a/yql/essentials/sql/v1/complete/syntax/token.cpp b/yql/essentials/sql/v1/complete/syntax/token.cpp deleted file mode 100644 index b8aee3211c68..000000000000 --- a/yql/essentials/sql/v1/complete/syntax/token.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "token.h" - -#include -#include - -namespace NSQLComplete { - - bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output) { - TVector statements; - NYql::TIssues issues; - if (!NSQLTranslationV1::SplitQueryToStatements( - TString(input.Text) + ";", lexer, - statements, issues, /* file = */ "", - /* areBlankSkipped = */ false)) { - return false; - } - - size_t cursor = 0; - for (const auto& statement : statements) { - if (input.CursorPosition < cursor + statement.size()) { - output = { - .Text = input.Text.SubStr(cursor, statement.size()), - .CursorPosition = input.CursorPosition - cursor, - }; - return true; - } - cursor += statement.size(); - } - - output = input; - return true; - } - - TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition) { - size_t cursor = 0; - for (size_t i = 0; i < tokens.size(); ++i) { - const auto& content = tokens[i].Content; - cursor += content.size(); - if (cursorPosition < cursor) { - return {i, i}; - } else if (cursorPosition == cursor && IsWordBoundary(content.back())) { - return {i, i + 1}; - } - } - return {std::max(tokens.size(), static_cast(1)) - 1, tokens.size()}; - } - - bool EndsWith(const TParsedTokenList& tokens, const TVector& pattern) { - if (tokens.size() < pattern.size()) { - return false; - } - for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) { - if (!pattern[j].empty() && tokens[i].Name != pattern[j]) { - return false; - } - } - return true; - } - -} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/token.h b/yql/essentials/sql/v1/complete/syntax/token.h deleted file mode 100644 index d1e215285a94..000000000000 --- a/yql/essentials/sql/v1/complete/syntax/token.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace NSQLComplete { - - using NSQLTranslation::TParsedTokenList; - - // `PrevTokenIndex` = `NextTokenIndex`, iff caret is enclosed - struct TCaretTokenPosition { - size_t PrevTokenIndex; - size_t NextTokenIndex; - }; - - bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output); - - TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition); - - bool EndsWith(const TParsedTokenList& tokens, const TVector& pattern); - -} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/ut/ya.make b/yql/essentials/sql/v1/complete/syntax/ut/ya.make index e070185af9f6..7e682c5bac08 100644 --- a/yql/essentials/sql/v1/complete/syntax/ut/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ut/ya.make @@ -2,6 +2,11 @@ UNITTEST_FOR(yql/essentials/sql/v1/complete/syntax) SRCS( grammar_ut.cpp + cursor_token_context_ut.cpp +) + +PEERDIR( + yql/essentials/sql/v1/lexer/antlr4_pure ) END() diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make index 9e2e908454bc..7f63e5b23749 100644 --- a/yql/essentials/sql/v1/complete/syntax/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ya.make @@ -2,11 +2,11 @@ LIBRARY() SRCS( ansi.cpp + cursor_token_context.cpp format.cpp grammar.cpp local.cpp parser_call_stack.cpp - token.cpp ) ADDINCL( @@ -21,6 +21,8 @@ PEERDIR( yql/essentials/sql/settings yql/essentials/sql/v1/lexer yql/essentials/sql/v1/reflect + yql/essentials/sql/v1/complete/core + yql/essentials/sql/v1/complete/text ) END() diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make index fbb84f56f257..c978e6e60482 100644 --- a/yql/essentials/sql/v1/complete/ut/ya.make +++ b/yql/essentials/sql/v1/complete/ut/ya.make @@ -7,7 +7,14 @@ SRCS( PEERDIR( yql/essentials/sql/v1/lexer/antlr4_pure yql/essentials/sql/v1/lexer/antlr4_pure_ansi + yql/essentials/sql/v1/complete/name/cluster/static + yql/essentials/sql/v1/complete/name/object/dispatch + yql/essentials/sql/v1/complete/name/object/simple + yql/essentials/sql/v1/complete/name/object/simple/static + yql/essentials/sql/v1/complete/name/service/cluster + yql/essentials/sql/v1/complete/name/service/schema yql/essentials/sql/v1/complete/name/service/static + yql/essentials/sql/v1/complete/name/service/union ) END() diff --git a/yql/essentials/tools/yql_complete/yql_complete b/yql/essentials/tools/yql_complete/yql_complete new file mode 100644 index 000000000000..7fc1116ee5d7 --- /dev/null +++ b/yql/essentials/tools/yql_complete/yql_complete @@ -0,0 +1 @@ +/home/vityaman/.ya/build/symres/bbe5c007c4bcc83d4396e13689e6b39b/yql_complete \ No newline at end of file From 088287dd7674404aa3b278c671bc68ee1dd29e0d Mon Sep 17 00:00:00 2001 From: dimdim11 Date: Tue, 6 May 2025 15:59:47 +0300 Subject: [PATCH 09/24] Delombok after codegen Delombok after codegen commit_hash:98ded9e606f0069d27034c6b7ff30ae61dea486d --- build/export_generators/ide-gradle/codegen.jinja | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build/export_generators/ide-gradle/codegen.jinja b/build/export_generators/ide-gradle/codegen.jinja index 4df2ad85a59c..951fc4b5ad18 100644 --- a/build/export_generators/ide-gradle/codegen.jinja +++ b/build/export_generators/ide-gradle/codegen.jinja @@ -13,6 +13,10 @@ tasks.compileJava.configure { tasks.compileTestJava.configure { dependsOn({{ taskvar }}) } +{%- if current_target.use_annotation_processor|select('startsWith', 'contrib/java/org/projectlombok/lombok')|length %} +tasks.getByName("delombok").mustRunAfter({{ taskvar }}) +{%- endif -%} + {%- if with_kotlin -%} {#- Check main target codegen -#} {%- if varprefix == "codegen" %} From 80fbb0a2adc03fb5a859c4e339becee255eccf71 Mon Sep 17 00:00:00 2001 From: robot-ya-builder Date: Tue, 6 May 2025 16:52:16 +0300 Subject: [PATCH 10/24] External build system generator release 113 Update tools: yexport, os-yexport commit_hash:b92c1390d3fc313ae80cceb82c246f257cbecce5 --- build/external_resources/yexport/public.resources.json | 6 +++--- build/external_resources/yexport/resources.json | 6 +++--- build/mapping.conf.json | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/build/external_resources/yexport/public.resources.json b/build/external_resources/yexport/public.resources.json index 02b4f3014ee8..895e689e761e 100644 --- a/build/external_resources/yexport/public.resources.json +++ b/build/external_resources/yexport/public.resources.json @@ -1,13 +1,13 @@ { "by_platform": { "darwin": { - "uri": "sbr:8632444017" + "uri": "sbr:8661633603" }, "darwin-arm64": { - "uri": "sbr:8632441113" + "uri": "sbr:8661630683" }, "linux": { - "uri": "sbr:8632437510" + "uri": "sbr:8661627954" } } } diff --git a/build/external_resources/yexport/resources.json b/build/external_resources/yexport/resources.json index b29b46e18cf8..149b9f90efb8 100644 --- a/build/external_resources/yexport/resources.json +++ b/build/external_resources/yexport/resources.json @@ -1,13 +1,13 @@ { "by_platform": { "darwin": { - "uri": "sbr:8632348873" + "uri": "sbr:8661569123" }, "darwin-arm64": { - "uri": "sbr:8632345760" + "uri": "sbr:8661565983" }, "linux": { - "uri": "sbr:8632343302" + "uri": "sbr:8661563169" } } } diff --git a/build/mapping.conf.json b/build/mapping.conf.json index 19ad3a545d39..f355753909b2 100644 --- a/build/mapping.conf.json +++ b/build/mapping.conf.json @@ -613,6 +613,7 @@ "8477042967": "{registry_endpoint}/8477042967", "8628672485": "{registry_endpoint}/8628672485", "8632444017": "{registry_endpoint}/8632444017", + "8661633603": "{registry_endpoint}/8661633603", "5811823398": "{registry_endpoint}/5811823398", "5840611310": "{registry_endpoint}/5840611310", "5860185593": "{registry_endpoint}/5860185593", @@ -645,6 +646,7 @@ "8477040717": "{registry_endpoint}/8477040717", "8628670306": "{registry_endpoint}/8628670306", "8632441113": "{registry_endpoint}/8632441113", + "8661630683": "{registry_endpoint}/8661630683", "5811822876": "{registry_endpoint}/5811822876", "5840610640": "{registry_endpoint}/5840610640", "5860184285": "{registry_endpoint}/5860184285", @@ -677,6 +679,7 @@ "8477038461": "{registry_endpoint}/8477038461", "8628668563": "{registry_endpoint}/8628668563", "8632437510": "{registry_endpoint}/8632437510", + "8661627954": "{registry_endpoint}/8661627954", "5766172292": "{registry_endpoint}/5766172292", "5805431504": "{registry_endpoint}/5805431504", "5829027626": "{registry_endpoint}/5829027626", @@ -2005,6 +2008,7 @@ "8477042967": "devtools/yexport/bin/yexport for darwin", "8628672485": "devtools/yexport/bin/yexport for darwin", "8632444017": "devtools/yexport/bin/yexport for darwin", + "8661633603": "devtools/yexport/bin/yexport for darwin", "5811823398": "devtools/yexport/bin/yexport for darwin-arm64", "5840611310": "devtools/yexport/bin/yexport for darwin-arm64", "5860185593": "devtools/yexport/bin/yexport for darwin-arm64", @@ -2037,6 +2041,7 @@ "8477040717": "devtools/yexport/bin/yexport for darwin-arm64", "8628670306": "devtools/yexport/bin/yexport for darwin-arm64", "8632441113": "devtools/yexport/bin/yexport for darwin-arm64", + "8661630683": "devtools/yexport/bin/yexport for darwin-arm64", "5811822876": "devtools/yexport/bin/yexport for linux", "5840610640": "devtools/yexport/bin/yexport for linux", "5860184285": "devtools/yexport/bin/yexport for linux", @@ -2069,6 +2074,7 @@ "8477038461": "devtools/yexport/bin/yexport for linux", "8628668563": "devtools/yexport/bin/yexport for linux", "8632437510": "devtools/yexport/bin/yexport for linux", + "8661627954": "devtools/yexport/bin/yexport for linux", "5766172292": "devtools/ymake/bin/ymake for darwin", "5805431504": "devtools/ymake/bin/ymake for darwin", "5829027626": "devtools/ymake/bin/ymake for darwin", From 1d3ad0cef75fcbd4cc7048d2106b36c0db59eb45 Mon Sep 17 00:00:00 2001 From: robot-contrib Date: Tue, 6 May 2025 16:52:59 +0300 Subject: [PATCH 11/24] Update contrib/restricted/nlohmann_json to 3.12.0 commit_hash:029cc01f8163629b1a436aee7506eb7708bdad98 --- .../.yandex_meta/devtools.copyrights.report | 159 ++++---- .../.yandex_meta/devtools.licenses.report | 100 +++-- .../.yandex_meta/licenses.list.txt | 55 +-- .../nlohmann_json/.yandex_meta/override.nix | 4 +- contrib/restricted/nlohmann_json/CITATION.cff | 4 +- contrib/restricted/nlohmann_json/ChangeLog.md | 355 ++++++++++++++++- contrib/restricted/nlohmann_json/FILES.md | 239 ++++++++++++ contrib/restricted/nlohmann_json/LICENSE.MIT | 2 +- contrib/restricted/nlohmann_json/README.md | 359 ++++++++---------- .../include/nlohmann/adl_serializer.hpp | 4 +- .../nlohmann/byte_container_with_subtype.hpp | 4 +- .../include/nlohmann/detail/abi_macros.hpp | 29 +- .../nlohmann/detail/conversions/from_json.hpp | 88 ++++- .../nlohmann/detail/conversions/to_chars.hpp | 18 +- .../nlohmann/detail/conversions/to_json.hpp | 43 ++- .../include/nlohmann/detail/exceptions.hpp | 44 ++- .../include/nlohmann/detail/hash.hpp | 4 +- .../nlohmann/detail/input/binary_reader.hpp | 131 +++++-- .../nlohmann/detail/input/input_adapters.hpp | 63 ++- .../nlohmann/detail/input/json_sax.hpp | 305 +++++++++++++-- .../include/nlohmann/detail/input/lexer.hpp | 22 +- .../include/nlohmann/detail/input/parser.hpp | 16 +- .../nlohmann/detail/input/position_t.hpp | 4 +- .../detail/iterators/internal_iterator.hpp | 4 +- .../nlohmann/detail/iterators/iter_impl.hpp | 29 +- .../detail/iterators/iteration_proxy.hpp | 19 +- .../detail/iterators/iterator_traits.hpp | 6 +- .../iterators/json_reverse_iterator.hpp | 4 +- .../detail/iterators/primitive_iterator.hpp | 4 +- .../detail/json_custom_base_class.hpp | 4 +- .../include/nlohmann/detail/json_pointer.hpp | 10 +- .../include/nlohmann/detail/json_ref.hpp | 4 +- .../include/nlohmann/detail/macro_scope.hpp | 151 +++++++- .../include/nlohmann/detail/macro_unscope.hpp | 5 +- .../nlohmann/detail/meta/call_std/begin.hpp | 4 +- .../nlohmann/detail/meta/call_std/end.hpp | 4 +- .../nlohmann/detail/meta/cpp_future.hpp | 6 +- .../include/nlohmann/detail/meta/detected.hpp | 4 +- .../nlohmann/detail/meta/identity_tag.hpp | 4 +- .../include/nlohmann/detail/meta/is_sax.hpp | 4 +- .../include/nlohmann/detail/meta/std_fs.hpp | 6 +- .../nlohmann/detail/meta/type_traits.hpp | 42 +- .../include/nlohmann/detail/meta/void_t.hpp | 4 +- .../nlohmann/detail/output/binary_writer.hpp | 42 +- .../detail/output/output_adapters.hpp | 4 +- .../nlohmann/detail/output/serializer.hpp | 12 +- .../include/nlohmann/detail/string_concat.hpp | 4 +- .../include/nlohmann/detail/string_escape.hpp | 4 +- .../include/nlohmann/detail/string_utils.hpp | 37 ++ .../include/nlohmann/detail/value_t.hpp | 4 +- .../nlohmann_json/include/nlohmann/json.hpp | 212 +++++++---- .../include/nlohmann/json_fwd.hpp | 4 +- .../include/nlohmann/ordered_map.hpp | 8 +- .../nlohmann/thirdparty/hedley/hedley.hpp | 6 +- .../thirdparty/hedley/hedley_undef.hpp | 4 +- contrib/restricted/nlohmann_json/ya.make | 4 +- 56 files changed, 2023 insertions(+), 692 deletions(-) create mode 100644 contrib/restricted/nlohmann_json/FILES.md create mode 100644 contrib/restricted/nlohmann_json/include/nlohmann/detail/string_utils.hpp diff --git a/contrib/restricted/nlohmann_json/.yandex_meta/devtools.copyrights.report b/contrib/restricted/nlohmann_json/.yandex_meta/devtools.copyrights.report index 4614c0eaacf6..8d0aef1b9b39 100644 --- a/contrib/restricted/nlohmann_json/.yandex_meta/devtools.copyrights.report +++ b/contrib/restricted/nlohmann_json/.yandex_meta/devtools.copyrights.report @@ -33,134 +33,91 @@ # FILE_INCLUDE - include all file data into licenses text file # ======================= -KEEP COPYRIGHT_SERVICE_LABEL 1d2c160f5770ec8c125d130115e61d07 +KEEP COPYRIGHT_SERVICE_LABEL 3de5bae94597d6fa92ce56fbe6fc21b9 BELONGS ya.make License text: - // SPDX-FileCopyrightText: 2008-2009 Björn Hoehrmann - // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann + // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann + // SPDX-FileCopyrightText: 2016 - 2021 Evan Nemerson // SPDX-License-Identifier: MIT Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/detail/output/serializer.hpp [6:8] + include/nlohmann/thirdparty/hedley/hedley.hpp [8:10] -KEEP COPYRIGHT_SERVICE_LABEL 20253debf95df817809db34d2862371e +KEEP COPYRIGHT_SERVICE_LABEL 3fb410b721d46624abdaeb2473ffa5d6 BELONGS ya.make License text: - Copyright © 2013-2022 [Niels Lohmann](https://nlohmann.me) + // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann + // SPDX-FileCopyrightText: 2018 The Abseil Authors + // SPDX-License-Identifier: MIT Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - README.md [1386:1386] + include/nlohmann/detail/meta/cpp_future.hpp [6:8] -KEEP COPYRIGHT_SERVICE_LABEL 3fb410b721d46624abdaeb2473ffa5d6 +KEEP COPYRIGHT_SERVICE_LABEL 3ff67c9bdbb15537628a2c59c3365bf6 BELONGS ya.make License text: - // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann - // SPDX-FileCopyrightText: 2018 The Abseil Authors + // SPDX-FileCopyrightText: 2008 - 2009 Björn Hoehrmann + // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/detail/meta/cpp_future.hpp [6:8] + include/nlohmann/detail/output/serializer.hpp [6:8] -KEEP COPYRIGHT_SERVICE_LABEL 77db38c27b2cc781224c151d0f786162 +KEEP COPYRIGHT_SERVICE_LABEL 5b4a8a3da1fc1707e9cfad1a7ab52d63 BELONGS ya.make License text: - @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann - @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + Copyright (c) 2013-2025 Niels Lohmann Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/detail/output/serializer.hpp [897:898] + LICENSE.MIT [3:3] -KEEP COPYRIGHT_SERVICE_LABEL 7f9096053ca3484b653eb1c83c7e777d +KEEP COPYRIGHT_SERVICE_LABEL 6738967b04c51108d809b85ac132d547 BELONGS ya.make License text: - // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann - // SPDX-License-Identifier: MIT + Copyright © 2013-2025 [Niels Lohmann](https://nlohmann.me) Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/adl_serializer.hpp [6:7] - include/nlohmann/byte_container_with_subtype.hpp [6:7] - include/nlohmann/detail/abi_macros.hpp [6:7] - include/nlohmann/detail/conversions/from_json.hpp [6:7] - include/nlohmann/detail/conversions/to_chars.hpp [6:8] - include/nlohmann/detail/conversions/to_json.hpp [6:7] - include/nlohmann/detail/exceptions.hpp [6:7] - include/nlohmann/detail/hash.hpp [6:7] - include/nlohmann/detail/input/binary_reader.hpp [6:7] - include/nlohmann/detail/input/input_adapters.hpp [6:7] - include/nlohmann/detail/input/json_sax.hpp [6:7] - include/nlohmann/detail/input/lexer.hpp [6:7] - include/nlohmann/detail/input/parser.hpp [6:7] - include/nlohmann/detail/input/position_t.hpp [6:7] - include/nlohmann/detail/iterators/internal_iterator.hpp [6:7] - include/nlohmann/detail/iterators/iter_impl.hpp [6:7] - include/nlohmann/detail/iterators/iteration_proxy.hpp [6:7] - include/nlohmann/detail/iterators/iterator_traits.hpp [6:7] - include/nlohmann/detail/iterators/json_reverse_iterator.hpp [6:7] - include/nlohmann/detail/iterators/primitive_iterator.hpp [6:7] - include/nlohmann/detail/json_custom_base_class.hpp [6:7] - include/nlohmann/detail/json_pointer.hpp [6:7] - include/nlohmann/detail/json_ref.hpp [6:7] - include/nlohmann/detail/macro_scope.hpp [6:7] - include/nlohmann/detail/macro_unscope.hpp [6:7] - include/nlohmann/detail/meta/call_std/begin.hpp [6:7] - include/nlohmann/detail/meta/call_std/end.hpp [6:7] - include/nlohmann/detail/meta/cpp_future.hpp [6:8] - include/nlohmann/detail/meta/detected.hpp [6:7] - include/nlohmann/detail/meta/identity_tag.hpp [6:7] - include/nlohmann/detail/meta/is_sax.hpp [6:7] - include/nlohmann/detail/meta/std_fs.hpp [6:7] - include/nlohmann/detail/meta/type_traits.hpp [6:7] - include/nlohmann/detail/meta/void_t.hpp [6:7] - include/nlohmann/detail/output/binary_writer.hpp [6:7] - include/nlohmann/detail/output/output_adapters.hpp [6:7] - include/nlohmann/detail/output/serializer.hpp [6:8] - include/nlohmann/detail/string_concat.hpp [6:7] - include/nlohmann/detail/string_escape.hpp [6:7] - include/nlohmann/detail/value_t.hpp [6:7] - include/nlohmann/json.hpp [6:7] - include/nlohmann/json_fwd.hpp [6:7] - include/nlohmann/ordered_map.hpp [6:7] - include/nlohmann/thirdparty/hedley/hedley.hpp [8:10] - include/nlohmann/thirdparty/hedley/hedley_undef.hpp [6:7] + README.md [1323:1323] -KEEP COPYRIGHT_SERVICE_LABEL 940a6050b65ddf2560eacf6325aeedfa +KEEP COPYRIGHT_SERVICE_LABEL 77db38c27b2cc781224c151d0f786162 BELONGS ya.make License text: - // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann - // SPDX-FileCopyrightText: 2016-2021 Evan Nemerson - // SPDX-License-Identifier: MIT + @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann + @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/thirdparty/hedley/hedley.hpp [8:10] + include/nlohmann/detail/output/serializer.hpp [897:898] -KEEP COPYRIGHT_SERVICE_LABEL a021d3bd6d6cbb97cbccc0ab1586c491 +KEEP COPYRIGHT_SERVICE_LABEL 8076846a0e6cd4735d49dcb2ef1f094d BELONGS ya.make License text: - Copyright (c) 2013-2022 Niels Lohmann + result["copyright"] = "(C) 2013-2025 Niels Lohmann"; + result["name"] = "JSON for Modern C++"; + result["url"] = "https://github.com/nlohmann/json"; Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - LICENSE.MIT [3:3] + include/nlohmann/json.hpp [255:257] KEEP COPYRIGHT_SERVICE_LABEL b2d024cee657b6d8a684658564761637 BELONGS ya.make @@ -170,7 +127,7 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - README.md [1398:1398] + README.md [1333:1336] KEEP COPYRIGHT_SERVICE_LABEL c8c3bf583dc390b261a3600ab20c9768 BELONGS ya.make @@ -180,7 +137,7 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - README.md [1396:1396] + README.md [1333:1336] KEEP COPYRIGHT_SERVICE_LABEL ce7525b41b9b057bb4906c82b576055d BELONGS ya.make @@ -197,7 +154,7 @@ KEEP COPYRIGHT_SERVICE_LABEL f00eba1ede61dd156c5c44c3700d4ca0 BELONGS ya.make License text: // SPDX-FileCopyrightText: 2009 Florian Loitsch - // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann + // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL @@ -206,15 +163,59 @@ BELONGS ya.make Files with this license: include/nlohmann/detail/conversions/to_chars.hpp [6:8] -KEEP COPYRIGHT_SERVICE_LABEL f797318120fd0e858229c71aa1f8455b +KEEP COPYRIGHT_SERVICE_LABEL f259313a7a86a144164c66030872d5f6 BELONGS ya.make License text: - result["copyright"] = "(C) 2013-2023 Niels Lohmann"; - result["name"] = "JSON for Modern C++"; - result["url"] = "https://github.com/nlohmann/json"; + // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann + // SPDX-License-Identifier: MIT Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: - include/nlohmann/json.hpp [252:254] + include/nlohmann/adl_serializer.hpp [6:7] + include/nlohmann/byte_container_with_subtype.hpp [6:7] + include/nlohmann/detail/abi_macros.hpp [6:7] + include/nlohmann/detail/conversions/from_json.hpp [6:7] + include/nlohmann/detail/conversions/to_chars.hpp [6:8] + include/nlohmann/detail/conversions/to_json.hpp [6:7] + include/nlohmann/detail/exceptions.hpp [6:7] + include/nlohmann/detail/hash.hpp [6:7] + include/nlohmann/detail/input/binary_reader.hpp [6:7] + include/nlohmann/detail/input/input_adapters.hpp [6:7] + include/nlohmann/detail/input/json_sax.hpp [6:7] + include/nlohmann/detail/input/lexer.hpp [6:7] + include/nlohmann/detail/input/parser.hpp [6:7] + include/nlohmann/detail/input/position_t.hpp [6:7] + include/nlohmann/detail/iterators/internal_iterator.hpp [6:7] + include/nlohmann/detail/iterators/iter_impl.hpp [6:7] + include/nlohmann/detail/iterators/iteration_proxy.hpp [6:7] + include/nlohmann/detail/iterators/iterator_traits.hpp [6:7] + include/nlohmann/detail/iterators/json_reverse_iterator.hpp [6:7] + include/nlohmann/detail/iterators/primitive_iterator.hpp [6:7] + include/nlohmann/detail/json_custom_base_class.hpp [6:7] + include/nlohmann/detail/json_pointer.hpp [6:7] + include/nlohmann/detail/json_ref.hpp [6:7] + include/nlohmann/detail/macro_scope.hpp [6:7] + include/nlohmann/detail/macro_unscope.hpp [6:7] + include/nlohmann/detail/meta/call_std/begin.hpp [6:7] + include/nlohmann/detail/meta/call_std/end.hpp [6:7] + include/nlohmann/detail/meta/cpp_future.hpp [6:8] + include/nlohmann/detail/meta/detected.hpp [6:7] + include/nlohmann/detail/meta/identity_tag.hpp [6:7] + include/nlohmann/detail/meta/is_sax.hpp [6:7] + include/nlohmann/detail/meta/std_fs.hpp [6:7] + include/nlohmann/detail/meta/type_traits.hpp [6:7] + include/nlohmann/detail/meta/void_t.hpp [6:7] + include/nlohmann/detail/output/binary_writer.hpp [6:7] + include/nlohmann/detail/output/output_adapters.hpp [6:7] + include/nlohmann/detail/output/serializer.hpp [6:8] + include/nlohmann/detail/string_concat.hpp [6:7] + include/nlohmann/detail/string_escape.hpp [6:7] + include/nlohmann/detail/string_utils.hpp [6:7] + include/nlohmann/detail/value_t.hpp [6:7] + include/nlohmann/json.hpp [6:7] + include/nlohmann/json_fwd.hpp [6:7] + include/nlohmann/ordered_map.hpp [6:7] + include/nlohmann/thirdparty/hedley/hedley.hpp [8:10] + include/nlohmann/thirdparty/hedley/hedley_undef.hpp [6:7] diff --git a/contrib/restricted/nlohmann_json/.yandex_meta/devtools.licenses.report b/contrib/restricted/nlohmann_json/.yandex_meta/devtools.licenses.report index 6c5af02ff9f2..acc16b8187d4 100644 --- a/contrib/restricted/nlohmann_json/.yandex_meta/devtools.licenses.report +++ b/contrib/restricted/nlohmann_json/.yandex_meta/devtools.licenses.report @@ -33,29 +33,29 @@ # FILE_INCLUDE - include all file data into licenses text file # ======================= -KEEP MIT 098c1e1a2b998a44b2dfa93ea519d45d +KEEP CC0-1.0 03e44fc6d1b905b870bbd63f53b78be0 BELONGS ya.make License text: - The library itself consists of a single header file licensed under the MIT license. However, it is built, tested, documented, and whatnot using a lot of third-party tools and services. Thanks a lot! + - The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). Scancode info: - Original SPDX id: MIT - Score : 100.00 - Match type : NOTICE - Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT + Original SPDX id: CC0-1.0 + Score : 95.00 + Match type : REFERENCE + Links : http://creativecommons.org/publicdomain/zero/1.0/, http://creativecommons.org/publicdomain/zero/1.0/legalcode, https://spdx.org/licenses/CC0-1.0 Files with this license: - README.md [1772:1772] + README.md [1335:1335] -SKIP LicenseRef-scancode-free-unknown 09d2a3b4461c6f8aadc34ba913c9b62f +KEEP MIT 098c1e1a2b998a44b2dfa93ea519d45d BELONGS ya.make License text: - + The library itself consists of a single header file licensed under the MIT license. However, it is built, tested, documented, and whatnot using a lot of third-party tools and services. Thanks a lot! Scancode info: - Original SPDX id: LicenseRef-scancode-free-unknown + Original SPDX id: MIT Score : 100.00 - Match type : REFERENCE - Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/free-unknown.LICENSE + Match type : NOTICE + Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - README.md [1382:1382] + README.md [1752:1752] KEEP MIT 0de64830926d7b28059b0628b12a0676 BELONGS ya.make @@ -67,6 +67,7 @@ BELONGS ya.make Match type : TAG Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: + include/nlohmann/detail/input/lexer.hpp [7:7] include/nlohmann/detail/macro_scope.hpp [7:7] include/nlohmann/detail/output/binary_writer.hpp [7:7] @@ -84,7 +85,7 @@ BELONGS ya.make KEEP MIT 1f7d5654f3b2a7966a91d59c5ed0ac05 BELONGS ya.make -FILE_INCLUDE LICENSE.MIT found in files: ChangeLog.md at line 1051 +FILE_INCLUDE LICENSE.MIT found in files: ChangeLog.md at line 1380 License text: - Update LICENSE.MIT [\#2010](https://github.com/nlohmann/json/pull/2010) ([magamig](https://github.com/magamig)) Scancode info: @@ -93,7 +94,7 @@ FILE_INCLUDE LICENSE.MIT found in files: ChangeLog.md at line 1051 Match type : TAG Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - ChangeLog.md [1051:1051] + ChangeLog.md [1380:1380] KEEP MIT 2995454deea186f128a76b7a5322870b BELONGS ya.make @@ -133,14 +134,27 @@ BELONGS ya.make KEEP Apache-2.0 5c4ed081d4d7bdcc3ffd2786714646e5 BELONGS ya.make License text: - The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). + - The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). Scancode info: Original SPDX id: Apache-2.0 Score : 100.00 Match type : REFERENCE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: - README.md [1402:1402] + README.md [1336:1336] + +SKIP LicenseRef-scancode-free-unknown 7c84fbd34743ac8b5ecf905fe1873b8f +BELONGS ya.make + # Not a license, just some fancy banner + License text: + OSI approved license + Scancode info: + Original SPDX id: LicenseRef-scancode-free-unknown + Score : 100.00 + Match type : REFERENCE + Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/free-unknown.LICENSE + Files with this license: + README.md [1319:1319] KEEP MIT 8384d75c38c570f3edb87cf9f64f2ec2 BELONGS ya.make @@ -163,7 +177,6 @@ BELONGS ya.make include/nlohmann/detail/input/binary_reader.hpp [7:7] include/nlohmann/detail/input/input_adapters.hpp [7:7] include/nlohmann/detail/input/json_sax.hpp [7:7] - include/nlohmann/detail/input/lexer.hpp [7:7] include/nlohmann/detail/input/parser.hpp [7:7] include/nlohmann/detail/input/position_t.hpp [7:7] include/nlohmann/detail/iterators/internal_iterator.hpp [7:7] @@ -189,6 +202,7 @@ BELONGS ya.make include/nlohmann/detail/output/serializer.hpp [8:8] include/nlohmann/detail/string_concat.hpp [7:7] include/nlohmann/detail/string_escape.hpp [7:7] + include/nlohmann/detail/string_utils.hpp [7:7] include/nlohmann/detail/value_t.hpp [7:7] include/nlohmann/json.hpp [7:7] include/nlohmann/json_fwd.hpp [7:7] @@ -196,6 +210,32 @@ BELONGS ya.make include/nlohmann/thirdparty/hedley/hedley.hpp [10:10] include/nlohmann/thirdparty/hedley/hedley_undef.hpp [7:7] +KEEP CC0-1.0 8b9f565465b821b7c30af310191ebfd7 +BELONGS ya.make + License text: + - The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). + Scancode info: + Original SPDX id: CC0-1.0 + Score : 100.00 + Match type : REFERENCE + Links : http://creativecommons.org/publicdomain/zero/1.0/, http://creativecommons.org/publicdomain/zero/1.0/legalcode, https://spdx.org/licenses/CC0-1.0 + Files with this license: + README.md [1335:1335] + +KEEP MIT a13268d00c6f0b9ba97d05f45f0ab821 +BELONGS ya.make +FILE_INCLUDE LICENSE.MIT found in files: FILES.md at line 173, FILES.md at line 182 + License text: + \### `LICENSE.MIT` + Scancode info: + Original SPDX id: MIT + Score : 100.00 + Match type : TAG + Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT + Files with this license: + FILES.md [173:173] + FILES.md [182:182] + KEEP Apache-2.0 bae1ad0735f2d2672df1746ab73234b4 BELONGS ya.make License text: @@ -208,18 +248,6 @@ BELONGS ya.make Files with this license: include/nlohmann/detail/meta/cpp_future.hpp [41:41] -KEEP CC0-1.0 c0bd5a10bf8b93a637d1ab75adbef600 -BELONGS ya.make - License text: - The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). - Scancode info: - Original SPDX id: CC0-1.0 - Score : 80.00 - Match type : REFERENCE - Links : http://creativecommons.org/publicdomain/zero/1.0/, http://creativecommons.org/publicdomain/zero/1.0/legalcode, https://spdx.org/licenses/CC0-1.0 - Files with this license: - README.md [1400:1400] - KEEP MIT cf197bd5d4094c2d8f76ed82f4786aef BELONGS ya.make Note: matched license text is too long. Read it in the source files. @@ -229,7 +257,7 @@ BELONGS ya.make Match type : TEXT Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - README.md [1388:1392] + README.md [1325:1329] KEEP MIT d51298dcb4641b6cc8391c423ab4c650 BELONGS ya.make @@ -240,20 +268,20 @@ BELONGS ya.make Match type : NOTICE Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - README.md [1396:1396] - README.md [1398:1398] + README.md [1333:1333] + README.md [1334:1334] KEEP Apache-2.0 d683c8d4c18c5675d52bab6b519531d9 BELONGS ya.make License text: - The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). + - The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). Scancode info: Original SPDX id: Apache-2.0 Score : 100.00 Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: - README.md [1402:1402] + README.md [1336:1336] KEEP MIT e73b9d3e815186df6ea3e59f2379e088 BELONGS ya.make @@ -277,4 +305,4 @@ BELONGS ya.make Match type : NOTICE Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - README.md [1384:1384] + README.md [1321:1321] diff --git a/contrib/restricted/nlohmann_json/.yandex_meta/licenses.list.txt b/contrib/restricted/nlohmann_json/.yandex_meta/licenses.list.txt index d99f6b3fb6ef..0b9a268488ee 100644 --- a/contrib/restricted/nlohmann_json/.yandex_meta/licenses.list.txt +++ b/contrib/restricted/nlohmann_json/.yandex_meta/licenses.list.txt @@ -1,17 +1,17 @@ ====================Apache-2.0==================== -// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. +- The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). ====================Apache-2.0==================== -The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. ====================CC0-1.0==================== -The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). +- The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). ====================COPYRIGHT==================== - result["copyright"] = "(C) 2013-2023 Niels Lohmann"; + result["copyright"] = "(C) 2013-2025 Niels Lohmann"; result["name"] = "JSON for Modern C++"; result["url"] = "https://github.com/nlohmann/json"; @@ -22,48 +22,47 @@ The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Eva ====================COPYRIGHT==================== -// SPDX-FileCopyrightText: 2008-2009 Björn Hoehrmann -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT +- The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) +- The class contains a slightly modified version of the Grisu2 algorithm from Florian Loitsch which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2009 [Florian Loitsch](https://florian.loitsch.com/) +- The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). +- The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). ====================COPYRIGHT==================== -// SPDX-FileCopyrightText: 2009 Florian Loitsch -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2008 - 2009 Björn Hoehrmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT ====================COPYRIGHT==================== -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-FileCopyrightText: 2009 Florian Loitsch +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT ====================COPYRIGHT==================== -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2016 - 2021 Evan Nemerson // SPDX-License-Identifier: MIT ====================COPYRIGHT==================== -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors // SPDX-License-Identifier: MIT ====================COPYRIGHT==================== -Copyright © 2013-2022 [Niels Lohmann](https://nlohmann.me) - - -====================COPYRIGHT==================== -Copyright (c) 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-License-Identifier: MIT ====================COPYRIGHT==================== -The class contains a slightly modified version of the Grisu2 algorithm from Florian Loitsch which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2009 [Florian Loitsch](https://florian.loitsch.com/) +Copyright © 2013-2025 [Niels Lohmann](https://nlohmann.me) ====================COPYRIGHT==================== -The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) +Copyright (c) 2013-2025 Niels Lohmann ====================COPYRIGHT==================== @@ -73,7 +72,7 @@ The code is distributed under the MIT license, Copyright (c) 2009 Florian Loitsc ====================File: LICENSE.MIT==================== MIT License -Copyright (c) 2013-2022 Niels Lohmann +Copyright (c) 2013-2025 Niels Lohmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -94,6 +93,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +====================MIT==================== +### `LICENSE.MIT` + + +====================MIT==================== +- The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) + + ====================MIT==================== - Update LICENSE.MIT [\#2010](https://github.com/nlohmann/json/pull/2010) ([magamig](https://github.com/magamig)) @@ -133,10 +140,6 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -====================MIT==================== -The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) - - ====================MIT==================== The class is licensed under the [MIT License](https://opensource.org/licenses/MIT): diff --git a/contrib/restricted/nlohmann_json/.yandex_meta/override.nix b/contrib/restricted/nlohmann_json/.yandex_meta/override.nix index 4b1366811e96..15ac163662fd 100644 --- a/contrib/restricted/nlohmann_json/.yandex_meta/override.nix +++ b/contrib/restricted/nlohmann_json/.yandex_meta/override.nix @@ -1,11 +1,11 @@ pkgs: attrs: with pkgs; with attrs; rec { - version = "3.11.3"; + version = "3.12.0"; src = fetchFromGitHub { owner = "nlohmann"; repo = "json"; rev = "v${version}"; - hash = "sha256-7F0Jon+1oWL7uqet5i1IgHX0fUw/+z0QwEcA3zs5xHg="; + hash = "sha256-cECvDOLxgX7Q9R3IE86Hj9JJUxraDQvhoyPDF03B2CY="; }; patches = []; diff --git a/contrib/restricted/nlohmann_json/CITATION.cff b/contrib/restricted/nlohmann_json/CITATION.cff index fd3b767135cf..8a70c17bc295 100644 --- a/contrib/restricted/nlohmann_json/CITATION.cff +++ b/contrib/restricted/nlohmann_json/CITATION.cff @@ -7,8 +7,8 @@ authors: email: mail@nlohmann.me website: https://nlohmann.me title: "JSON for Modern C++" -version: 3.11.3 -date-released: 2023-11-28 +version: 3.12.0 +date-released: 2025-04-07 license: MIT repository-code: "https://github.com/nlohmann" url: https://json.nlohmann.me diff --git a/contrib/restricted/nlohmann_json/ChangeLog.md b/contrib/restricted/nlohmann_json/ChangeLog.md index 656d68bcfc8b..2274fb455fb6 100644 --- a/contrib/restricted/nlohmann_json/ChangeLog.md +++ b/contrib/restricted/nlohmann_json/ChangeLog.md @@ -1,9 +1,340 @@ # Changelog All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). -## [3.11.2](https://github.com/nlohmann/json/releases/tag/3.11.2) (2022-08-12) +## [unreleased](https://github.com/nlohmann/json/releases/tag/unreleased) (2024-12-22) + +[Full Changelog](https://github.com/nlohmann/json/compare/v3.11.3...unreleased) + +- Impossible de read json file create with nlohmann::ordered\_json::dump [\#4556](https://github.com/nlohmann/json/issues/4556) +- Error C2039 : 'json\_sax\_dom\_callback\_parser': is not a member of 'nlohmann::json\_abi\_v3\_11\_3::detail' [\#4529](https://github.com/nlohmann/json/issues/4529) +- `json_fwd.hpp` don't define default template arguments for ordered\_map [\#4518](https://github.com/nlohmann/json/issues/4518) +- new repo version seems stop create any the ingress-nginx controller with opentelemetry-cpp.git [\#4515](https://github.com/nlohmann/json/issues/4515) +- Error converting to/from scoped enumerations [\#4499](https://github.com/nlohmann/json/issues/4499) +- Default initialized iterators are not comparable [\#4493](https://github.com/nlohmann/json/issues/4493) +- Bug json.exception.type\_error.302 [\#4492](https://github.com/nlohmann/json/issues/4492) +- tests fail to build with clang-19 and libc++ due to unsupported `std::char_traits` [\#4490](https://github.com/nlohmann/json/issues/4490) +- Brace-Initialization Fails with json::parse and Key Access on Linux [\#4488](https://github.com/nlohmann/json/issues/4488) +- Crash when parsing nullptr [\#4485](https://github.com/nlohmann/json/issues/4485) +- Namespace macros are not respected in many instances [\#4484](https://github.com/nlohmann/json/issues/4484) +- ohos model to json string garbage characters [\#4481](https://github.com/nlohmann/json/issues/4481) +- Missing newlines in deserialized string [\#4479](https://github.com/nlohmann/json/issues/4479) +- Latest tag not available on NuGet [\#4478](https://github.com/nlohmann/json/issues/4478) +- Invalid union access for get\_ref/get\_ptr with unsigned integer [\#4475](https://github.com/nlohmann/json/issues/4475) +- /accesswallet [\#4469](https://github.com/nlohmann/json/issues/4469) +- struct reflect json with error C2440 [\#4467](https://github.com/nlohmann/json/issues/4467) +- Compiler error when using macro NLOHMANN\_DEFINE\_TYPE\_NON\_INTRUSIVE [\#4463](https://github.com/nlohmann/json/issues/4463) +- Issue when dumping a vector of derived classes [\#4462](https://github.com/nlohmann/json/issues/4462) +- whit std::wstring compile error [\#4460](https://github.com/nlohmann/json/issues/4460) +- Inconsisten operator\[\] [\#4458](https://github.com/nlohmann/json/issues/4458) +- json parse enclosing json object with \[\] [\#4457](https://github.com/nlohmann/json/issues/4457) +- \[bug\] nlohmann::json constructor behaves improperly [\#4450](https://github.com/nlohmann/json/issues/4450) +- parse OOM [\#4449](https://github.com/nlohmann/json/issues/4449) +- Library Cannot Parse JSON File It Wrote [\#4448](https://github.com/nlohmann/json/issues/4448) +- Unexpected Integer Conversion of JSON Values on ARM64 [\#4447](https://github.com/nlohmann/json/issues/4447) +- Structure declared in natvis file template doesn't seem to match current structure of `basic_json<>` [\#4438](https://github.com/nlohmann/json/issues/4438) +- A lot of EOT in json file [\#4436](https://github.com/nlohmann/json/issues/4436) +- CVE-2024-34062 [\#4429](https://github.com/nlohmann/json/issues/4429) +- CVE-2024-39689 [\#4428](https://github.com/nlohmann/json/issues/4428) +- CVE-2024-5569 [\#4427](https://github.com/nlohmann/json/issues/4427) +- CVE-2024-37891 [\#4426](https://github.com/nlohmann/json/issues/4426) +- Tornado vulnerabilities [\#4425](https://github.com/nlohmann/json/issues/4425) +- CVE-2024-35195 [\#4424](https://github.com/nlohmann/json/issues/4424) +- CVE-2024-22195, CVE-2024-34064 [\#4423](https://github.com/nlohmann/json/issues/4423) +- CVE-2024-3651 [\#4422](https://github.com/nlohmann/json/issues/4422) +- CVE-2024-22190 [\#4421](https://github.com/nlohmann/json/issues/4421) +- CVE-2024-39705 [\#4420](https://github.com/nlohmann/json/issues/4420) +- Failing to read complex Unicode string embedded in JSON [\#4417](https://github.com/nlohmann/json/issues/4417) +- Unable to parse JSON string from snake case to camel case [\#4399](https://github.com/nlohmann/json/issues/4399) +- Crashes when I try to use ‘json::at\(\)’ on a properly structured, non null, and correctly constructed ‘.json’ file [\#4387](https://github.com/nlohmann/json/issues/4387) +- JSON\_BuildTests fail when JSON\_DisableEnumSerialization is set to ON [\#4384](https://github.com/nlohmann/json/issues/4384) +- JSON can't parse a simple data [\#4383](https://github.com/nlohmann/json/issues/4383) +- json.exception.type\_error.302 [\#4373](https://github.com/nlohmann/json/issues/4373) +- iteration\_proxy has limited usefulness in C++20 range views [\#4371](https://github.com/nlohmann/json/issues/4371) +- Clone is long due to large history [\#4370](https://github.com/nlohmann/json/issues/4370) +- Can't use nlohmann on Linux [\#4363](https://github.com/nlohmann/json/issues/4363) +- CodeQL suppressions lack justification [\#4361](https://github.com/nlohmann/json/issues/4361) +- \[json.exception.parse\_error.101\] parse error at line 1, column 4520: syntax error while parsing value - invalid string: forbidden character after backslash; last read: '".\? [\#4352](https://github.com/nlohmann/json/issues/4352) +- Cannot unflatten json object [\#4349](https://github.com/nlohmann/json/issues/4349) +- Json parsed from raw string does not interpret lists of objects like json parsed from file [\#4341](https://github.com/nlohmann/json/issues/4341) +- natvis not updated to 3.11.3 [\#4340](https://github.com/nlohmann/json/issues/4340) +- Wrong function name in documentation example [\#4334](https://github.com/nlohmann/json/issues/4334) +- git ref got interpreted as number [\#4332](https://github.com/nlohmann/json/issues/4332) +- Is float valid number been limited? [\#4322](https://github.com/nlohmann/json/issues/4322) +- Crash when construct a new json object [\#4321](https://github.com/nlohmann/json/issues/4321) +- gdb-pretty-print broken since m\_data added [\#4309](https://github.com/nlohmann/json/issues/4309) +- Docs have incorrect info for `update()` [\#4307](https://github.com/nlohmann/json/issues/4307) +- CBOR data cannot be decoded [\#4301](https://github.com/nlohmann/json/issues/4301) +- Inconsistent behaviour of json construction using `std::initializer_list` [\#4299](https://github.com/nlohmann/json/issues/4299) +- Assertion failed when accessing non-existing object with const json object [\#4297](https://github.com/nlohmann/json/issues/4297) +- Validatable release artifacts are not sufficient for packaging \(trying to run tests\) [\#4296](https://github.com/nlohmann/json/issues/4296) +- ordered json pointer corruption [\#4289](https://github.com/nlohmann/json/issues/4289) +- Incorrect floating point parsing [\#4285](https://github.com/nlohmann/json/issues/4285) +- Segfault on parse when using "\#pragma pack \(push, 1\)" [\#4284](https://github.com/nlohmann/json/issues/4284) +- Simple example with nlohmann::ordered\_json doesn't compile [\#4282](https://github.com/nlohmann/json/issues/4282) +- Program crashes with ordered\_json, but works fine with json [\#4279](https://github.com/nlohmann/json/issues/4279) +- JSON parses as array when assigned in initializer list. [\#4278](https://github.com/nlohmann/json/issues/4278) +- Can't run `make amalgamate` [\#4275](https://github.com/nlohmann/json/issues/4275) +- Parsing the unicode string got the wrong result [\#4272](https://github.com/nlohmann/json/issues/4272) +- Issue with including \ [\#4241](https://github.com/nlohmann/json/issues/4241) +- Exception when trying to insert my json object inside json file [\#4239](https://github.com/nlohmann/json/issues/4239) +- `to_json` is erroneously converting enums with underlying unsigned types to signed numbers [\#4236](https://github.com/nlohmann/json/issues/4236) +- Build failure on macOS Sonoma 14.1.1 [\#4228](https://github.com/nlohmann/json/issues/4228) +- Deprecation warning on save action in check-amalgamation CI step [\#4227](https://github.com/nlohmann/json/issues/4227) +- 3.11.3: test suite fails in 4 units [\#4224](https://github.com/nlohmann/json/issues/4224) +- Exception thrown when dumping utf-8 characters when using std::string [\#4213](https://github.com/nlohmann/json/issues/4213) +- patch\_inplace assumes StringType is std::string [\#4134](https://github.com/nlohmann/json/issues/4134) +- Getting a weak-vtables warning with clang on ubuntu 22.04 [\#4087](https://github.com/nlohmann/json/issues/4087) +- SAX interface unexpectedly gets locale-altered float representation. [\#4084](https://github.com/nlohmann/json/issues/4084) +- Feat: hash pin github workflow dependencies [\#4058](https://github.com/nlohmann/json/issues/4058) +- compiler error using clang-16.0.5 when using gcc-13.1 standard library [\#4051](https://github.com/nlohmann/json/issues/4051) +- Missing requirement on `plantuml` binary package [\#4026](https://github.com/nlohmann/json/issues/4026) +- Compile failure for macos 10.10 SDK + darwin14 [\#3991](https://github.com/nlohmann/json/issues/3991) +- Test suite does not compile with C++20 and Clang 17 [\#3979](https://github.com/nlohmann/json/issues/3979) +- `modernize-avoid-c-arrays` clang-tidy warning when using `NLOHMANN_JSON_SERIALIZE_ENUM` macro [\#3924](https://github.com/nlohmann/json/issues/3924) +- JSON\_DIAGNOSTICS trigger assertion [\#3915](https://github.com/nlohmann/json/issues/3915) +- Compiler warning 'array-bounds' on g++12.2.0 on Ubuntu 22.10 kinetic with RelWithDebugInfo [\#3808](https://github.com/nlohmann/json/issues/3808) +- The MSVC team recently test JSON project failed to run test on release configuration on windows\_x64. [\#3542](https://github.com/nlohmann/json/issues/3542) +- Bad JSON diff when removing object in array of object [\#3146](https://github.com/nlohmann/json/issues/3146) +- Limit AppVeyor use [\#3089](https://github.com/nlohmann/json/issues/3089) +- pkgconfig integration wrongly rendered if tests are run [\#2907](https://github.com/nlohmann/json/issues/2907) +- Compile error for json in template and variadic macros. [\#2794](https://github.com/nlohmann/json/issues/2794) +- How to Serialize derived class to JSON object? [\#2199](https://github.com/nlohmann/json/issues/2199) +- \[C++17\] Allow std::optional to convert to nlohmann::json [\#1749](https://github.com/nlohmann/json/issues/1749) -[Full Changelog](https://github.com/nlohmann/json/compare/v3.11.1...3.11.2) +- Fix typo in nlohmann\_define\_derived\_type.md [\#4565](https://github.com/nlohmann/json/pull/4565) ([gregmarr](https://github.com/gregmarr)) +- Add ONLY\_SERIALIZE for NLOHMANN\_DEFINE\_DERIVED\_TYPE\_\* macros [\#4562](https://github.com/nlohmann/json/pull/4562) ([EinarsNG](https://github.com/EinarsNG)) +- Suppress modernize-use-integer-sign-comparison [\#4558](https://github.com/nlohmann/json/pull/4558) ([nlohmann](https://github.com/nlohmann)) +- Bump actions/upload-artifact from 4.4.3 to 4.5.0 [\#4557](https://github.com/nlohmann/json/pull/4557) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Clean up CI [\#4553](https://github.com/nlohmann/json/pull/4553) ([nlohmann](https://github.com/nlohmann)) +- \[StepSecurity\] ci: Harden GitHub Actions [\#4551](https://github.com/nlohmann/json/pull/4551) ([step-security-bot](https://github.com/step-security-bot)) +- Fix token permissions warnings [\#4550](https://github.com/nlohmann/json/pull/4550) ([nlohmann](https://github.com/nlohmann)) +- Add step to build the documentation [\#4549](https://github.com/nlohmann/json/pull/4549) ([nlohmann](https://github.com/nlohmann)) +- Bump mkdocs-material from 9.5.48 to 9.5.49 in /docs/mkdocs [\#4548](https://github.com/nlohmann/json/pull/4548) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Move reuse dependency to requirements.txt file [\#4547](https://github.com/nlohmann/json/pull/4547) ([nlohmann](https://github.com/nlohmann)) +- Clean up [\#4546](https://github.com/nlohmann/json/pull/4546) ([nlohmann](https://github.com/nlohmann)) +- ⬆️ Bump ossf/scorecard-action from 2.3.3 to 2.4.0 [\#4545](https://github.com/nlohmann/json/pull/4545) ([dependabot[bot]](https://github.com/apps/dependabot)) +- ⬆️ Bump lukka/get-cmake from 3.31.0 to 3.31.2 [\#4544](https://github.com/nlohmann/json/pull/4544) ([dependabot[bot]](https://github.com/apps/dependabot)) +- ⬆️ Bump github/codeql-action from 2.27.9 to 3.27.9 [\#4543](https://github.com/nlohmann/json/pull/4543) ([dependabot[bot]](https://github.com/apps/dependabot)) +- ⬆️ Bump watchdog from 2.1.7 to 6.0.0 in /tools/serve\_header [\#4542](https://github.com/nlohmann/json/pull/4542) ([dependabot[bot]](https://github.com/apps/dependabot)) +- ⬆️ Bump pyyaml from 6.0 to 6.0.2 in /tools/serve\_header [\#4541](https://github.com/nlohmann/json/pull/4541) ([dependabot[bot]](https://github.com/apps/dependabot)) +- ⬆️ Bump actions/github-script from 6.4.0 to 7.0.1 [\#4540](https://github.com/nlohmann/json/pull/4540) ([dependabot[bot]](https://github.com/apps/dependabot)) +- \[StepSecurity\] Apply security best practices [\#4539](https://github.com/nlohmann/json/pull/4539) ([step-security-bot](https://github.com/step-security-bot)) +- Set parents after insert call [\#4537](https://github.com/nlohmann/json/pull/4537) ([nlohmann](https://github.com/nlohmann)) +- Allow patch and diff to be used with arbitrary string types [\#4536](https://github.com/nlohmann/json/pull/4536) ([nlohmann](https://github.com/nlohmann)) +- Add more package managers [\#4533](https://github.com/nlohmann/json/pull/4533) ([nlohmann](https://github.com/nlohmann)) +- Replace EOF with char\_traits [\#4532](https://github.com/nlohmann/json/pull/4532) ([nlohmann](https://github.com/nlohmann)) +- Fix return value of get\_ptr for unsigned integers [\#4525](https://github.com/nlohmann/json/pull/4525) ([nlohmann](https://github.com/nlohmann)) +- Add more GCC warning flags [\#4524](https://github.com/nlohmann/json/pull/4524) ([nlohmann](https://github.com/nlohmann)) +- Update licenses [\#4521](https://github.com/nlohmann/json/pull/4521) ([nlohmann](https://github.com/nlohmann)) +- json start/end position implementation [\#4517](https://github.com/nlohmann/json/pull/4517) ([sushshring](https://github.com/sushshring)) +- Overwork documentation [\#4516](https://github.com/nlohmann/json/pull/4516) ([nlohmann](https://github.com/nlohmann)) +- Allow comparing default initialized iterators [\#4512](https://github.com/nlohmann/json/pull/4512) ([nlohmann](https://github.com/nlohmann)) +- fix: integer parsed as float when EINTR set in errno [\#4506](https://github.com/nlohmann/json/pull/4506) ([StuartGorman](https://github.com/StuartGorman)) +- Make SAX output locale-independent [\#4505](https://github.com/nlohmann/json/pull/4505) ([nlohmann](https://github.com/nlohmann)) +- Skip enum tests when JSON\_DisableEnumSerialization=ON [\#4504](https://github.com/nlohmann/json/pull/4504) ([nlohmann](https://github.com/nlohmann)) +- Fix weak-vtables warning [\#4500](https://github.com/nlohmann/json/pull/4500) ([nlohmann](https://github.com/nlohmann)) +- Suppress warnings in NLOHMANN\_JSON\_SERIALIZE\_ENUM [\#4497](https://github.com/nlohmann/json/pull/4497) ([nlohmann](https://github.com/nlohmann)) +- Add comment for \#4494 [\#4496](https://github.com/nlohmann/json/pull/4496) ([nlohmann](https://github.com/nlohmann)) +- Add test for libstdc++ [\#4495](https://github.com/nlohmann/json/pull/4495) ([nlohmann](https://github.com/nlohmann)) +- Another desperate try to fix the CI [\#4489](https://github.com/nlohmann/json/pull/4489) ([nlohmann](https://github.com/nlohmann)) +- Possible fix for \#4485 [\#4487](https://github.com/nlohmann/json/pull/4487) ([jordan-hoang](https://github.com/jordan-hoang)) +- Update CONTRIBUTING.md [\#4486](https://github.com/nlohmann/json/pull/4486) ([zerocukor287](https://github.com/zerocukor287)) +- Allow overriding the CMake target name [\#4483](https://github.com/nlohmann/json/pull/4483) ([iboB](https://github.com/iboB)) +- Update is\_structured.md [\#4472](https://github.com/nlohmann/json/pull/4472) ([thetimr](https://github.com/thetimr)) +- Add CPack support [\#4459](https://github.com/nlohmann/json/pull/4459) ([zjyhjqs](https://github.com/zjyhjqs)) +- CMake: generate a pkg-config file that follow pkg-config conventions [\#4456](https://github.com/nlohmann/json/pull/4456) ([dcbaker](https://github.com/dcbaker)) +- Update natvis to reflect 3.11.3 and the current structure of basic\_json [\#4451](https://github.com/nlohmann/json/pull/4451) ([gmyers18](https://github.com/gmyers18)) +- Docs: fix typos of 'whether' in `operator_{gt,le,lt}.md` [\#4412](https://github.com/nlohmann/json/pull/4412) ([tsnl](https://github.com/tsnl)) +- Remove alwayslink=True Bazel flag [\#4396](https://github.com/nlohmann/json/pull/4396) ([mering](https://github.com/mering)) +- Optimize binary `get_number` implementation by reading multiple bytes at once [\#4391](https://github.com/nlohmann/json/pull/4391) ([TianyiChen](https://github.com/TianyiChen)) +- Make iterator\_proxy\_value a forward\_iterator \(\#4371\) [\#4372](https://github.com/nlohmann/json/pull/4372) ([captaincrutches](https://github.com/captaincrutches)) +- Add lgtm explanation [\#4362](https://github.com/nlohmann/json/pull/4362) ([nlohmann](https://github.com/nlohmann)) +- chore: fix some typos in comments [\#4345](https://github.com/nlohmann/json/pull/4345) ([laterlaugh](https://github.com/laterlaugh)) +- Fix gdb pretty printer [\#4343](https://github.com/nlohmann/json/pull/4343) ([MrJia1997](https://github.com/MrJia1997)) +- Fix for incorrect function name in documentation example [\#4342](https://github.com/nlohmann/json/pull/4342) ([alexprabhat99](https://github.com/alexprabhat99)) +- Fixed an error in the `Custom data source` example. [\#4335](https://github.com/nlohmann/json/pull/4335) ([philip-paul-mueller](https://github.com/philip-paul-mueller)) +- Updated exception handling to catch const reference in out\_of\_range [\#4331](https://github.com/nlohmann/json/pull/4331) ([LeilaShcheglova](https://github.com/LeilaShcheglova)) +- \#4307 Updated docx to 3.10.5 from 3.10.4 [\#4310](https://github.com/nlohmann/json/pull/4310) ([AniketDhemare](https://github.com/AniketDhemare)) +- Align astyle flags in Makefile with CI [\#4277](https://github.com/nlohmann/json/pull/4277) ([serge-s](https://github.com/serge-s)) +- Suppress Clang-Tidy warnings [\#4276](https://github.com/nlohmann/json/pull/4276) ([nlohmann](https://github.com/nlohmann)) +- Remove broken link from CONTRIBUTING.md [\#4274](https://github.com/nlohmann/json/pull/4274) ([serge-s](https://github.com/serge-s)) +- Fix version in json\_has\_static\_rtti.md [\#4269](https://github.com/nlohmann/json/pull/4269) ([ALF-ONE](https://github.com/ALF-ONE)) +- Add support of multi-dim C-style array member of struct. [\#4262](https://github.com/nlohmann/json/pull/4262) ([peng-wang-cn](https://github.com/peng-wang-cn)) +- Docs: Fix wrong code usage in the Value access section of `json_pointer.md` [\#4255](https://github.com/nlohmann/json/pull/4255) ([Fallen-Breath](https://github.com/Fallen-Breath)) +- Fix `to_json` for enums when the enum has an unsigned underlying type. [\#4237](https://github.com/nlohmann/json/pull/4237) ([TheJCAB](https://github.com/TheJCAB)) +- feat: Rebase `feature/optional` to `develop` [\#4036](https://github.com/nlohmann/json/pull/4036) ([fsandhei](https://github.com/fsandhei)) +- Add NLOHMANN\_DEFINE\_DERIVED\_TYPE\_\* macros [\#4033](https://github.com/nlohmann/json/pull/4033) ([rotolof](https://github.com/rotolof)) + +## [v3.11.3](https://github.com/nlohmann/json/releases/tag/v3.11.3) (2023-11-28) + +[Full Changelog](https://github.com/nlohmann/json/compare/v3.11.2...v3.11.3) + +- Parser and constructor resolve integer types differently [\#4207](https://github.com/nlohmann/json/issues/4207) +- README.md overuses `template` keyword before `get` function [\#4205](https://github.com/nlohmann/json/issues/4205) +- Exception SIGSEGV - Segmentation violation signal on file parsing \(v3.11.2, linux, doctest\) [\#4193](https://github.com/nlohmann/json/issues/4193) +- In highly nested functions, passing json into a function leads to a segmentation fault/bus error [\#4186](https://github.com/nlohmann/json/issues/4186) +- why a single-object json file appears in an array [\#4183](https://github.com/nlohmann/json/issues/4183) +- Initializing `json` by direct initialization and copy initialization invokes different constructors [\#4174](https://github.com/nlohmann/json/issues/4174) +- Deprecation warning about std::char\_traits\ [\#4163](https://github.com/nlohmann/json/issues/4163) +- LLVM 16.0.6 issues warning for literal operators when Wdeprecated-literal-operator [\#4129](https://github.com/nlohmann/json/issues/4129) +- GCC compiler warning about violating the C++ One Definition Rule \[-Wodr\] [\#4116](https://github.com/nlohmann/json/issues/4116) +- error: building nlohmann-json:arm64-osx failed with: BUILD\_FAILED [\#4091](https://github.com/nlohmann/json/issues/4091) +- dump\(\): Non-conforming with JSON-spec escape of strings? [\#4088](https://github.com/nlohmann/json/issues/4088) +- Compiling in visual studio 2022 gives a warning [\#4081](https://github.com/nlohmann/json/issues/4081) +- Upgrade CMake minimum version [\#4076](https://github.com/nlohmann/json/issues/4076) +- \ character in the content of a string cause error in parser.? [\#4067](https://github.com/nlohmann/json/issues/4067) +- JSON Parsing Freeze Issue on Nintendo Switch [\#4066](https://github.com/nlohmann/json/issues/4066) +- Clang++ compilation fails on extremely small example [\#4061](https://github.com/nlohmann/json/issues/4061) +- how about open a new repository for header only version? [\#4060](https://github.com/nlohmann/json/issues/4060) +- json::count returns only 0 or 1 [\#4052](https://github.com/nlohmann/json/issues/4052) +- std::function error [\#4050](https://github.com/nlohmann/json/issues/4050) +- Json package not compiling properly [\#4042](https://github.com/nlohmann/json/issues/4042) +- Explicit conversion example in docs should use `template get` [\#4038](https://github.com/nlohmann/json/issues/4038) +- Improve wording of parse\_error exception [\#4037](https://github.com/nlohmann/json/issues/4037) +- Parse error on valid JSON file [\#4028](https://github.com/nlohmann/json/issues/4028) +- Empty JSON object returns size of 1 [\#4027](https://github.com/nlohmann/json/issues/4027) +- Help needed to fix CI [\#4025](https://github.com/nlohmann/json/issues/4025) +- Security vulnerabilities detected: CVE-2022-24439, WS-2022-0438, WS-2022-0437 [\#4020](https://github.com/nlohmann/json/issues/4020) +- multithreading use from\_msgpack leading very slow [\#4016](https://github.com/nlohmann/json/issues/4016) +- Error with sol for Lua: items\(\) is not a recognized container [\#4012](https://github.com/nlohmann/json/issues/4012) +- Parser does not read non ascii characters : ŞÜİĞ [\#4007](https://github.com/nlohmann/json/issues/4007) +- malloc\(\): unaligned fastbin chunk detected [\#3999](https://github.com/nlohmann/json/issues/3999) +- try/catch block doesn't work while accessing const json& array. [\#3998](https://github.com/nlohmann/json/issues/3998) +- a bug about list [\#3995](https://github.com/nlohmann/json/issues/3995) +- heap corruption when i use nlohmann::json::accept function to check a valid json [\#3994](https://github.com/nlohmann/json/issues/3994) +- Exception on gcc but not apple clang [\#3986](https://github.com/nlohmann/json/issues/3986) +- Can't support convert the type? std::string json\_str = R"\({"value": "3.1415"}\)"; float value = j\["value"\].get\\(\); [\#3984](https://github.com/nlohmann/json/issues/3984) +- `#pragma once` not supported with C++20 modules in clang [\#3974](https://github.com/nlohmann/json/issues/3974) +- const array\_t::operator\[\] results in buffer overflow / segv on nullptr on out of bounds access [\#3973](https://github.com/nlohmann/json/issues/3973) +- Set minimal permissions to Github Workflows [\#3971](https://github.com/nlohmann/json/issues/3971) +- Parsing array error [\#3968](https://github.com/nlohmann/json/issues/3968) +- why I can return tuple as json? [\#3961](https://github.com/nlohmann/json/issues/3961) +- type must be number, but is null [\#3956](https://github.com/nlohmann/json/issues/3956) +- Class Composition of json members produces incorrect json when constructing with initialization list [\#3955](https://github.com/nlohmann/json/issues/3955) +- exit without error message [\#3948](https://github.com/nlohmann/json/issues/3948) +- NLOHMANN\_DEFINE\_TYPE\_INTRUSIVE doesn't work with "json\_fwd.hpp" [\#3946](https://github.com/nlohmann/json/issues/3946) +- Dangerous use of pull\_request\_target [\#3945](https://github.com/nlohmann/json/issues/3945) +- Test \#7: test-bjdata\_cpp11 ............................\*\*\*Failed [\#3941](https://github.com/nlohmann/json/issues/3941) +- Memory leak detection with basic usage of NLOHMANN\_JSON\_SERIALIZE\_ENUM [\#3939](https://github.com/nlohmann/json/issues/3939) +- Parse doesnt work [\#3936](https://github.com/nlohmann/json/issues/3936) +- Clean up badges [\#3935](https://github.com/nlohmann/json/issues/3935) +- \[json.exception.type\_error.305\] cannot use operator\[\] with a string argument with array [\#3931](https://github.com/nlohmann/json/issues/3931) +- GCC 13 build failures [\#3927](https://github.com/nlohmann/json/issues/3927) +- Exception throw even though code is inside try/catch [\#3926](https://github.com/nlohmann/json/issues/3926) +- Please fix failing tests [\#3923](https://github.com/nlohmann/json/issues/3923) +- Security vulnerability in dependency: future 0.18.2 [\#3922](https://github.com/nlohmann/json/issues/3922) +- json pretty printer causes python exceptions on non-json types [\#3919](https://github.com/nlohmann/json/issues/3919) +- how does a normal basic\_json\<\> object cuase assertion `false` [\#3918](https://github.com/nlohmann/json/issues/3918) +- The library can not parse JSON generate by Chome DevTools Protocol [\#3903](https://github.com/nlohmann/json/issues/3903) +- Typo in `cmake/test.cmake` [\#3902](https://github.com/nlohmann/json/issues/3902) +- Parser adds wrapping array when compiled with GCC [\#3897](https://github.com/nlohmann/json/issues/3897) +- when i use for\(auto iter& : jsonObject\) it occure some error [\#3893](https://github.com/nlohmann/json/issues/3893) +- Check Drone CI [\#3890](https://github.com/nlohmann/json/issues/3890) +- Json::accept\(std::ifstream\) [\#3884](https://github.com/nlohmann/json/issues/3884) +- \[json.exception.parse\_error.101\] parse error at line 1, column 1: syntax error while parsing value - unexpected end of input; expected '\[', '{', or a literal [\#3882](https://github.com/nlohmann/json/issues/3882) +- Memory leak when exception is thrown in adl\_serializer::to\_json [\#3881](https://github.com/nlohmann/json/issues/3881) +- building with cmake [\#3880](https://github.com/nlohmann/json/issues/3880) +- \[json.exception.type\_error.316\] invalid UTF-8 byte at index 0: 0xB6 [\#3879](https://github.com/nlohmann/json/issues/3879) +- Visual Studio 2015 C2664 error std::pair\< [\#3867](https://github.com/nlohmann/json/issues/3867) +- I want the data field to be empty serialized and deserialized to each other [\#3866](https://github.com/nlohmann/json/issues/3866) +- Generated natvis is invalid XML [\#3858](https://github.com/nlohmann/json/issues/3858) +- Json Arrays have inconsistent nesting levels across different OSs [\#3854](https://github.com/nlohmann/json/issues/3854) +- Occur error when parse character '\' [\#3844](https://github.com/nlohmann/json/issues/3844) +- Proccess crash as soon as I parse json [\#3843](https://github.com/nlohmann/json/issues/3843) +- json::parse and constructor with the same json generates different type [\#3842](https://github.com/nlohmann/json/issues/3842) +- json::accept return false on valid JSON [\#3838](https://github.com/nlohmann/json/issues/3838) +- decode\(state, codep, byte\) generates warnings. [\#3837](https://github.com/nlohmann/json/issues/3837) +- Arithmetic operators are not working as expected [\#3832](https://github.com/nlohmann/json/issues/3832) +- array\_index does not catch std::invalid\_argument exception from std::stoull [\#3831](https://github.com/nlohmann/json/issues/3831) +- 3.11.2: test suite is failing in two units [\#3828](https://github.com/nlohmann/json/issues/3828) +- Compile Error on g++ using get\(\) function [\#3827](https://github.com/nlohmann/json/issues/3827) +- nlohmann::json::parse can't handle the "€" symbol. [\#3825](https://github.com/nlohmann/json/issues/3825) +- When reading a non-existent key, I unexpectedly get a value. Is it the wrong way I use it? [\#3811](https://github.com/nlohmann/json/issues/3811) +- Code analysis warning string\_concat.hpp C26800: Use of a moved from object [\#3805](https://github.com/nlohmann/json/issues/3805) +- The code used to build with 3.10.2 but fails now [\#3804](https://github.com/nlohmann/json/issues/3804) +- Inconsistent Behaviour of NaN & Null Values [\#3799](https://github.com/nlohmann/json/issues/3799) +- json\_fwd.hpp doesn't work [\#3796](https://github.com/nlohmann/json/issues/3796) +- Compilation fails in MSVC 22 [\#3787](https://github.com/nlohmann/json/issues/3787) +- parsing json with missing key throws exception [\#3784](https://github.com/nlohmann/json/issues/3784) +- Allow to disable inline namespaces completley [\#3746](https://github.com/nlohmann/json/issues/3746) +- double free or Assertion failed! [\#3729](https://github.com/nlohmann/json/issues/3729) +- Amalgated json\_fwd.hpp not included in include.zip [\#3727](https://github.com/nlohmann/json/issues/3727) +- INT64\_MIN/MAX not defined for newer g++ [\#3722](https://github.com/nlohmann/json/issues/3722) +- Compilation error with JSON\_DIAGNOSTICS enabled [\#3717](https://github.com/nlohmann/json/issues/3717) +- class-level enum not serialized as string via NLOHMANN\_JSON\_SERIALIZE\_ENUM [\#3715](https://github.com/nlohmann/json/issues/3715) +- Local copy given by operator\[\] or at\(\) [\#3704](https://github.com/nlohmann/json/issues/3704) +- nlohmann::to\_json method not acting as expected with nlohmann::adl\_serializer specialization. [\#3340](https://github.com/nlohmann/json/issues/3340) +- braced-init-list: array vs other constructor [\#2583](https://github.com/nlohmann/json/issues/2583) + +- JSON for Modern C++ 3.11.3 [\#4222](https://github.com/nlohmann/json/pull/4222) ([nlohmann](https://github.com/nlohmann)) +- Update documentation for the next release [\#4216](https://github.com/nlohmann/json/pull/4216) ([nlohmann](https://github.com/nlohmann)) +- Fix failing CI checks [\#4215](https://github.com/nlohmann/json/pull/4215) ([colbychaskell](https://github.com/colbychaskell)) +- Fix CI \(again\) [\#4196](https://github.com/nlohmann/json/pull/4196) ([nlohmann](https://github.com/nlohmann)) +- fix cmake header path in install with custom CMAKE\_INSTALL\_INCLUDEDIR [\#4194](https://github.com/nlohmann/json/pull/4194) ([bebuch](https://github.com/bebuch)) +- Add more specific error message when attempting to parse empty input [\#4180](https://github.com/nlohmann/json/pull/4180) ([colbychaskell](https://github.com/colbychaskell)) +- Fix char\_traits deprecation warning [\#4179](https://github.com/nlohmann/json/pull/4179) ([colbychaskell](https://github.com/colbychaskell)) +- Fix MinGW CI [\#4175](https://github.com/nlohmann/json/pull/4175) ([scribam](https://github.com/scribam)) +- Fix spellcheck issue [\#4173](https://github.com/nlohmann/json/pull/4173) ([mwestphal](https://github.com/mwestphal)) +- Fix source highlighting in user defined type macros docs [\#4169](https://github.com/nlohmann/json/pull/4169) ([ZeronSix](https://github.com/ZeronSix)) +- Fix deprecation warning [\#4161](https://github.com/nlohmann/json/pull/4161) ([nlohmann](https://github.com/nlohmann)) +- Fix CI [\#4160](https://github.com/nlohmann/json/pull/4160) ([nlohmann](https://github.com/nlohmann)) +- Update index.md [\#4159](https://github.com/nlohmann/json/pull/4159) ([miny1233](https://github.com/miny1233)) +- Update index.md [\#4149](https://github.com/nlohmann/json/pull/4149) ([HO-COOH](https://github.com/HO-COOH)) +- Correct a typo in serve\_header/README.md [\#4143](https://github.com/nlohmann/json/pull/4143) ([felixonmars](https://github.com/felixonmars)) +- Fixed init-list construction when size\_type is not int [\#4140](https://github.com/nlohmann/json/pull/4140) ([tomalakgeretkal](https://github.com/tomalakgeretkal)) +- Update CODEOWNERS [\#4126](https://github.com/nlohmann/json/pull/4126) ([tarolling](https://github.com/tarolling)) +- Accept NEW CMake policies up to CMake 3.14 [\#4112](https://github.com/nlohmann/json/pull/4112) ([craigscott-crascit](https://github.com/craigscott-crascit)) +- Fix typo in afl\_driver.cpp [\#4109](https://github.com/nlohmann/json/pull/4109) ([eltociear](https://github.com/eltociear)) +- Capture exceptions by const& in docs. [\#4099](https://github.com/nlohmann/json/pull/4099) ([iwanders](https://github.com/iwanders)) +- Fix CI, again [\#4083](https://github.com/nlohmann/json/pull/4083) ([nlohmann](https://github.com/nlohmann)) +- Fix Clang-Tidy warnings [\#4047](https://github.com/nlohmann/json/pull/4047) ([nlohmann](https://github.com/nlohmann)) +- Fix compile error with \_HAS\_STATIC\_RTTI=0 [\#4046](https://github.com/nlohmann/json/pull/4046) ([ALF-ONE](https://github.com/ALF-ONE)) +- Add to CONTRIBUTING.md that `make pretty` is required for test updates. [\#4045](https://github.com/nlohmann/json/pull/4045) ([gregmarr](https://github.com/gregmarr)) +- Added to tests the file unit-algorithm.cpp \(c++ 11\) functions from algorithm library [\#4044](https://github.com/nlohmann/json/pull/4044) ([Tomerkm](https://github.com/Tomerkm)) +- Use template get instead of get in examples [\#4039](https://github.com/nlohmann/json/pull/4039) ([tusooa](https://github.com/tusooa)) +- Support Apple's Swift Package Manager [\#4010](https://github.com/nlohmann/json/pull/4010) ([aleksproger](https://github.com/aleksproger)) +- Add Vcpkg port version badge [\#3988](https://github.com/nlohmann/json/pull/3988) ([njakob](https://github.com/njakob)) +- Fix CI + new Doctest [\#3985](https://github.com/nlohmann/json/pull/3985) ([nlohmann](https://github.com/nlohmann)) +- Set minimal permissions to Github Workflows [\#3972](https://github.com/nlohmann/json/pull/3972) ([joycebrum](https://github.com/joycebrum)) +- Refactor amalgamation workflow to avoid dangerous use of pull\_request\_target [\#3969](https://github.com/nlohmann/json/pull/3969) ([joycebrum](https://github.com/joycebrum)) +- Fix typo in test.cmake [\#3951](https://github.com/nlohmann/json/pull/3951) ([theevilone45](https://github.com/theevilone45)) +- tests/unit-iterators2: use std::ranges::equals for range comparisons [\#3950](https://github.com/nlohmann/json/pull/3950) ([ArsenArsen](https://github.com/ArsenArsen)) +- 3935, removed lgtm badge and added Cirrus CI badge [\#3937](https://github.com/nlohmann/json/pull/3937) ([haadfida](https://github.com/haadfida)) +- ⬆️ Bump future from 0.18.2 to 0.18.3 in /docs/mkdocs [\#3934](https://github.com/nlohmann/json/pull/3934) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Change 2022 to 2023 [\#3932](https://github.com/nlohmann/json/pull/3932) ([floriansegginger](https://github.com/floriansegginger)) +- PrettyPrinter: Check if match is valid before accessing group [\#3920](https://github.com/nlohmann/json/pull/3920) ([Finkman](https://github.com/Finkman)) +- Fix CI issues [\#3906](https://github.com/nlohmann/json/pull/3906) ([barcode](https://github.com/barcode)) +- Prevent memory leak when exception is thrown in adl\_serializer::to\_json [\#3901](https://github.com/nlohmann/json/pull/3901) ([barcode](https://github.com/barcode)) +- custom allocators: define missing 'rebind' type [\#3895](https://github.com/nlohmann/json/pull/3895) ([trofi](https://github.com/trofi)) +- Try old MinGW script [\#3892](https://github.com/nlohmann/json/pull/3892) ([nlohmann](https://github.com/nlohmann)) +- Upgrade Python packages [\#3891](https://github.com/nlohmann/json/pull/3891) ([nlohmann](https://github.com/nlohmann)) +- Fix warning about moved from object [\#3889](https://github.com/nlohmann/json/pull/3889) ([nlohmann](https://github.com/nlohmann)) +- Remove a magic number [\#3888](https://github.com/nlohmann/json/pull/3888) ([nlohmann](https://github.com/nlohmann)) +- Add migration guide [\#3887](https://github.com/nlohmann/json/pull/3887) ([nlohmann](https://github.com/nlohmann)) +- Clang 15 [\#3876](https://github.com/nlohmann/json/pull/3876) ([nlohmann](https://github.com/nlohmann)) +- Bump certifi from 2022.6.15 to 2022.12.7 in /docs/mkdocs [\#3872](https://github.com/nlohmann/json/pull/3872) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix natvis XML [\#3863](https://github.com/nlohmann/json/pull/3863) ([nlohmann](https://github.com/nlohmann)) +- Fix pipeline [\#3862](https://github.com/nlohmann/json/pull/3862) ([nlohmann](https://github.com/nlohmann)) +- Add CIFuzz CI GitHub action [\#3845](https://github.com/nlohmann/json/pull/3845) ([DavidKorczynski](https://github.com/DavidKorczynski)) +- Add serialization-only user defined type macros [\#3816](https://github.com/nlohmann/json/pull/3816) ([ZeronSix](https://github.com/ZeronSix)) +- Bump joblib from 1.1.0 to 1.2.0 in /docs/mkdocs [\#3781](https://github.com/nlohmann/json/pull/3781) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix some typos for n-dimensional arrays [\#3767](https://github.com/nlohmann/json/pull/3767) ([striezel](https://github.com/striezel)) +- Fix 'declaration hides global declaration' warning [\#3751](https://github.com/nlohmann/json/pull/3751) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Fix typos in .md files [\#3748](https://github.com/nlohmann/json/pull/3748) ([tocic](https://github.com/tocic)) +- Update Codacy link [\#3740](https://github.com/nlohmann/json/pull/3740) ([nlohmann](https://github.com/nlohmann)) +- Add missing files to release artifacts [\#3728](https://github.com/nlohmann/json/pull/3728) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Add dark mode toggle to documentation [\#3726](https://github.com/nlohmann/json/pull/3726) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Add clang-tools to required tools for ci\_static\_analysis\_clang [\#3724](https://github.com/nlohmann/json/pull/3724) ([nlohmann](https://github.com/nlohmann)) +- Replace limit macros with std::numeric\_limits [\#3723](https://github.com/nlohmann/json/pull/3723) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Add missing \ include [\#3719](https://github.com/nlohmann/json/pull/3719) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Add Bazel build support [\#3709](https://github.com/nlohmann/json/pull/3709) ([Vertexwahn](https://github.com/Vertexwahn)) +- Use official Clang/GCC containers [\#3703](https://github.com/nlohmann/json/pull/3703) ([nlohmann](https://github.com/nlohmann)) +- Add 'Check amalgamation' workflow [\#3693](https://github.com/nlohmann/json/pull/3693) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) +- Allow custom base class as node customization point [\#3110](https://github.com/nlohmann/json/pull/3110) ([barcode](https://github.com/barcode)) + +## [v3.11.2](https://github.com/nlohmann/json/releases/tag/v3.11.2) (2022-08-12) + +[Full Changelog](https://github.com/nlohmann/json/compare/v3.11.1...v3.11.2) - MSVC natvis visualizer does not work after introduction of inline ABI namespace [\#3696](https://github.com/nlohmann/json/issues/3696) - The use of parenthesis gives compilation errors in some situations [\#3682](https://github.com/nlohmann/json/issues/3682) @@ -145,7 +476,7 @@ All notable changes to this project will be documented in this file. This projec - Fix warning [\#3634](https://github.com/nlohmann/json/pull/3634) ([nlohmann](https://github.com/nlohmann)) - Add license header to new files [\#3633](https://github.com/nlohmann/json/pull/3633) ([nlohmann](https://github.com/nlohmann)) - Add a unit test including windows.h [\#3631](https://github.com/nlohmann/json/pull/3631) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) -- Fixed latest build error in msvc platform [\#3630](https://github.com/nlohmann/json/pull/3630) ([KsaNL](https://github.com/KsaNL)) +- Fixed latest build error in msvc platform [\#3630](https://github.com/nlohmann/json/pull/3630) ([Lioncky](https://github.com/Lioncky)) - Add regression tests for \#3204 and \#3333 [\#3629](https://github.com/nlohmann/json/pull/3629) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) - Fix patch::add creating nonexistent parents [\#3628](https://github.com/nlohmann/json/pull/3628) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) - Adjust JSON Pointer examples [\#3622](https://github.com/nlohmann/json/pull/3622) ([nlohmann](https://github.com/nlohmann)) @@ -178,7 +509,7 @@ All notable changes to this project will be documented in this file. This projec - Use REUSE framework [\#3546](https://github.com/nlohmann/json/pull/3546) ([nlohmann](https://github.com/nlohmann)) - Use `std::iterator_traits` to extract `iterator_category` [\#3544](https://github.com/nlohmann/json/pull/3544) ([Mike-Leo-Smith](https://github.com/Mike-Leo-Smith)) - BJData dimension length can not be string\_t::npos, fix \#3541 [\#3543](https://github.com/nlohmann/json/pull/3543) ([fangq](https://github.com/fangq)) -- Allow disabling default enum conversions [\#3536](https://github.com/nlohmann/json/pull/3536) ([zxey](https://github.com/zxey)) +- Allow disabling default enum conversions [\#3536](https://github.com/nlohmann/json/pull/3536) ([richardhozak](https://github.com/richardhozak)) - Add to\_json\(\) for std::vector\::reference [\#3534](https://github.com/nlohmann/json/pull/3534) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) - CI: Enable 32bit unit test \(3\) [\#3532](https://github.com/nlohmann/json/pull/3532) ([falbrechtskirchinger](https://github.com/falbrechtskirchinger)) - Use new CI image [\#3528](https://github.com/nlohmann/json/pull/3528) ([nlohmann](https://github.com/nlohmann)) @@ -639,7 +970,6 @@ All notable changes to this project will be documented in this file. This projec - array\_index possible out of range [\#2205](https://github.com/nlohmann/json/issues/2205) - Object deserialized as array [\#2204](https://github.com/nlohmann/json/issues/2204) - Sending to a function a reference to a sub-branch [\#2200](https://github.com/nlohmann/json/issues/2200) -- How to Serialize derived class to JSON object? [\#2199](https://github.com/nlohmann/json/issues/2199) - JSON incorrectly serialized [\#2198](https://github.com/nlohmann/json/issues/2198) - Exception Unhandled out\_of\_range error [\#2197](https://github.com/nlohmann/json/issues/2197) - msgpack serialisation : float is treated as 64bit float, not 32bit float. [\#2196](https://github.com/nlohmann/json/issues/2196) @@ -672,7 +1002,6 @@ All notable changes to this project will be documented in this file. This projec - Compilation failure using Clang on Windows [\#1898](https://github.com/nlohmann/json/issues/1898) - Fail to build when including json.hpp as a system include [\#1818](https://github.com/nlohmann/json/issues/1818) - Parsing string into json doesn't preserve the order correctly. [\#1817](https://github.com/nlohmann/json/issues/1817) -- \[C++17\] Allow std::optional to convert to nlohmann::json [\#1749](https://github.com/nlohmann/json/issues/1749) - How can I save json object in file in order? [\#1717](https://github.com/nlohmann/json/issues/1717) - Support for Comments [\#1513](https://github.com/nlohmann/json/issues/1513) - clang compiler: error : unknown type name 'not' [\#1119](https://github.com/nlohmann/json/issues/1119) @@ -1692,7 +2021,7 @@ All notable changes to this project will be documented in this file. This projec - Use a version check to provide backwards comatible CMake imported target names [\#1245](https://github.com/nlohmann/json/pull/1245) ([chuckatkins](https://github.com/chuckatkins)) - Fix issue \#1237 [\#1238](https://github.com/nlohmann/json/pull/1238) ([theodelrieu](https://github.com/theodelrieu)) - Add a get overload taking a parameter. [\#1231](https://github.com/nlohmann/json/pull/1231) ([theodelrieu](https://github.com/theodelrieu)) -- Move lambda out of unevaluated context [\#1230](https://github.com/nlohmann/json/pull/1230) ([mandreyel](https://github.com/mandreyel)) +- Move lambda out of unevaluated context [\#1230](https://github.com/nlohmann/json/pull/1230) ([vimpunk](https://github.com/vimpunk)) - Remove static asserts [\#1228](https://github.com/nlohmann/json/pull/1228) ([theodelrieu](https://github.com/theodelrieu)) - Better error 305 [\#1221](https://github.com/nlohmann/json/pull/1221) ([rivertam](https://github.com/rivertam)) - Fix \#1213 [\#1214](https://github.com/nlohmann/json/pull/1214) ([simnalamburt](https://github.com/simnalamburt)) @@ -1859,8 +2188,8 @@ All notable changes to this project will be documented in this file. This projec - Fix unit tests that were silently skipped or crashed \(depending on the compiler\) [\#1176](https://github.com/nlohmann/json/pull/1176) ([grembo](https://github.com/grembo)) - Refactor/no virtual sax [\#1153](https://github.com/nlohmann/json/pull/1153) ([theodelrieu](https://github.com/theodelrieu)) - Fixed compiler error in VS 2015 for debug mode [\#1151](https://github.com/nlohmann/json/pull/1151) ([sonulohani](https://github.com/sonulohani)) -- Fix links to cppreference named requirements \(formerly concepts\) [\#1144](https://github.com/nlohmann/json/pull/1144) ([jrakow](https://github.com/jrakow)) -- meson: fix include directory [\#1142](https://github.com/nlohmann/json/pull/1142) ([jrakow](https://github.com/jrakow)) +- Fix links to cppreference named requirements \(formerly concepts\) [\#1144](https://github.com/nlohmann/json/pull/1144) ([ghost](https://github.com/ghost)) +- meson: fix include directory [\#1142](https://github.com/nlohmann/json/pull/1142) ([ghost](https://github.com/ghost)) - Feature/unordered map conversion [\#1138](https://github.com/nlohmann/json/pull/1138) ([theodelrieu](https://github.com/theodelrieu)) - fixed compile error for \#1045 [\#1134](https://github.com/nlohmann/json/pull/1134) ([Daniel599](https://github.com/Daniel599)) - test \(non\)equality for alt\_string implementation [\#1130](https://github.com/nlohmann/json/pull/1130) ([agrianius](https://github.com/agrianius)) @@ -1869,7 +2198,7 @@ All notable changes to this project will be documented in this file. This projec - fix typo in README [\#1078](https://github.com/nlohmann/json/pull/1078) ([martin-mfg](https://github.com/martin-mfg)) - Fix typo [\#1058](https://github.com/nlohmann/json/pull/1058) ([dns13](https://github.com/dns13)) - Misc cmake packaging enhancements [\#1048](https://github.com/nlohmann/json/pull/1048) ([chuckatkins](https://github.com/chuckatkins)) -- Fixed incorrect LLVM version number in README [\#1047](https://github.com/nlohmann/json/pull/1047) ([jammehcow](https://github.com/jammehcow)) +- Fixed incorrect LLVM version number in README [\#1047](https://github.com/nlohmann/json/pull/1047) ([jupjohn](https://github.com/jupjohn)) - Fix trivial typo in comment. [\#1043](https://github.com/nlohmann/json/pull/1043) ([coryan](https://github.com/coryan)) - Package Manager: Spack [\#1041](https://github.com/nlohmann/json/pull/1041) ([ax3l](https://github.com/ax3l)) - CMake: 3.8+ is Sufficient [\#1040](https://github.com/nlohmann/json/pull/1040) ([ax3l](https://github.com/ax3l)) @@ -2398,7 +2727,7 @@ All notable changes to this project will be documented in this file. This projec - Fix "not constraint" grammar in docs [\#674](https://github.com/nlohmann/json/pull/674) ([wincent](https://github.com/wincent)) - Add documentation for integration with CMake and hunter [\#671](https://github.com/nlohmann/json/pull/671) ([dan-42](https://github.com/dan-42)) - REFACTOR: rewrite CMakeLists.txt for better inlcude and reuse [\#669](https://github.com/nlohmann/json/pull/669) ([dan-42](https://github.com/dan-42)) -- enable\_testing only if the JSON\_BuildTests is ON [\#666](https://github.com/nlohmann/json/pull/666) ([effolkronium](https://github.com/effolkronium)) +- enable\_testing only if the JSON\_BuildTests is ON [\#666](https://github.com/nlohmann/json/pull/666) ([ilqvya](https://github.com/ilqvya)) - Support moving from rvalues in std::initializer\_list [\#663](https://github.com/nlohmann/json/pull/663) ([himikof](https://github.com/himikof)) - add ensure\_ascii parameter to dump. \#330 [\#654](https://github.com/nlohmann/json/pull/654) ([ryanjmulder](https://github.com/ryanjmulder)) - Rename BuildTests to JSON\_BuildTests [\#652](https://github.com/nlohmann/json/pull/652) ([olegendo](https://github.com/olegendo)) @@ -2917,11 +3246,11 @@ All notable changes to this project will be documented in this file. This projec - Keyword 'inline' is useless when member functions are defined in headers [\#87](https://github.com/nlohmann/json/pull/87) ([ahamez](https://github.com/ahamez)) - Remove useless typename [\#86](https://github.com/nlohmann/json/pull/86) ([ahamez](https://github.com/ahamez)) - Avoid warning with Xcode's clang [\#85](https://github.com/nlohmann/json/pull/85) ([ahamez](https://github.com/ahamez)) -- Fix typos [\#73](https://github.com/nlohmann/json/pull/73) ([aqnouch](https://github.com/aqnouch)) +- Fix typos [\#73](https://github.com/nlohmann/json/pull/73) ([maqnouch](https://github.com/maqnouch)) - Replace `default_callback` function with `nullptr` and check for null… [\#72](https://github.com/nlohmann/json/pull/72) ([aburgh](https://github.com/aburgh)) - support enum [\#71](https://github.com/nlohmann/json/pull/71) ([likebeta](https://github.com/likebeta)) - Fix performance regression introduced with the parsing callback feature. [\#69](https://github.com/nlohmann/json/pull/69) ([aburgh](https://github.com/aburgh)) -- Improve the implementations of the comparission-operators [\#63](https://github.com/nlohmann/json/pull/63) ([Florianjw](https://github.com/Florianjw)) +- Improve the implementations of the comparission-operators [\#63](https://github.com/nlohmann/json/pull/63) ([Fiona-J-W](https://github.com/Fiona-J-W)) - Fix compilation of json\_unit with GCC 5 [\#59](https://github.com/nlohmann/json/pull/59) ([dkopecek](https://github.com/dkopecek)) - Parse streams incrementally. [\#40](https://github.com/nlohmann/json/pull/40) ([aburgh](https://github.com/aburgh)) - Feature/small float serialization [\#38](https://github.com/nlohmann/json/pull/38) ([jrandall](https://github.com/jrandall)) diff --git a/contrib/restricted/nlohmann_json/FILES.md b/contrib/restricted/nlohmann_json/FILES.md new file mode 100644 index 000000000000..a7a35f7f235c --- /dev/null +++ b/contrib/restricted/nlohmann_json/FILES.md @@ -0,0 +1,239 @@ +# Supporting files + +This file describes the source for supporting files; that is, files that are not part of the library, but define the infrastructure and other aspects of the project. + +- [Continuous Integration](#continuous-integration) +- [GitHub](#github) +- [REUSE](#reuse) +- [Package Managers](#package-managers) + +## Continuous Integration + +### `.cirrus.yml` + +Configuration file for the pipeline at [Cirrus CI](https://cirrus-ci.com/github/nlohmann/json). + +Further documentation: + +- [Writing tasks](https://cirrus-ci.org/guide/writing-tasks/) + +> [!IMPORTANT] +> The filename `.cirrus.yml` and position (root of the repository) are predetermined by Cirrus CI. + +### `.github/external_ci/appveyor.yml` + +Configuration for the pipelines at [AppVeyor](https://ci.appveyor.com/project/nlohmann/json). + +Further documentation: + +- [appveyor.yml reference](https://www.appveyor.com/docs/appveyor-yml/) + +> [!NOTE] +> The filename can be freely configured in the AppVeyor project. + +## GitHub + +### `CITATION.cff` + +A file to configure the citation for the repository which is displayed in the sidebar of the project. + +Further documentation: + +- [About CITATION files](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-citation-files) + +> [!IMPORTANT] +> The filename `CITATION.cff` and position (root of the repository) are predetermined by GitHub. + +### `.github/CODE_OF_CONDUCT.md` + +The code of conduct for the project. This is the Markdown version of the [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/version/2/1/code_of_conduct/). The code of conduct is linked on the [Community Standards](https://github.com/nlohmann/json/community) page and is mentioned by the Sentiment Bot. + +Further documentation: + +- [Adding a code of conduct to your project](https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/adding-a-code-of-conduct-to-your-project) + +> [!IMPORTANT] +> The filename `.github/CODE_OF_CONDUCT.md` is predetermined by GitHub. + +> [!NOTE] +> The file is part of the documentation and is included in `docs/mkdocs/docs/community/code_of_conduct.md`. + +### `.github/CODEOWNERS` + +The code owners file for the project which is used to select reviewers for new pull requests. + +Further documentation: + +- [About code owners](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners) + +> [!IMPORTANT] +> The filename `.github/CODEOWNERS` is predetermined by GitHub. + +### `.github/config.yml` + +Configuration file for [probot](https://probot.github.io/apps/), in particular the [Sentiment Bot](https://probot.github.io/apps/sentiment-bot/) and the [Request Info](https://probot.github.io/apps/request-info/). + +> [!IMPORTANT] +> The filename `.github/config.yml` is predetermined by probot. + +### `.github/CONTRIBUTING.md` + +The contribution guidelines which are linked in the [Community Standards](https://github.com/nlohmann/json/community) and at . + +Further documentation: + +- [Setting guidelines for repository contributors](https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/setting-guidelines-for-repository-contributors) + +> [!IMPORTANT] +> The filename `.github/CONTRIBUTING.md` is predetermined by GitHub. + +> [!NOTE] +> The file is part of the documentation and is included in `docs/mkdocs/docs/community/contribution_guidelines.md`. + +### `.github/dependabot.yml` + +The configuration of [dependabot](https://github.com/dependabot) which ensures the dependencies (GitHub actions and Python packages used in the CI) remain up-to-date. + +Further documentation: + +- [Configuring Dependabot security updates](https://docs.github.com/en/code-security/dependabot/dependabot-security-updates/configuring-dependabot-security-updates) + +> [!IMPORTANT] +> The filename `.github/dependabot.yml` is predetermined by GitHub. + +### `.github/FUNDING.yml` + +A file to configure the sponsor button of the repository which is displayed in the sidebar of the project. + +Further documentation: + +- [Displaying a sponsor button in your repository](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/displaying-a-sponsor-button-in-your-repository) + +> [!IMPORTANT] +> The filename `.github/FUNDING.yml` is predetermined by GitHub. + +### `.github/ISSUE_TEMPLATE/bug.yaml` + +Issue form template for bugs. + +Further documentation: + +- [Configuring issue templates for your repository](https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/configuring-issue-templates-for-your-repository) + +> [!IMPORTANT] +> The folder `.github/ISSUE_TEMPLATE` is predetermined by GitHub. + +### `.github/ISSUE_TEMPLATE/config.yaml` + +Issue template chooser configuration. The file is used to configure the dialog when a new issue is created. + +Further documentation: + +- [Configuring issue templates for your repository](https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/configuring-issue-templates-for-your-repository) + +> [!IMPORTANT] +> The filename `.github/ISSUE_TEMPLATE/config.yaml` is predetermined by GitHub. + +### `.github/labeler.yml` + +Configuration file for the "Pull Request Labeler" workflow defined in `workflows/labeler.yml`. This file defines rules how labels are assigned to pull requests based on which files are changed. + +Further documentation: + +- [Label manager for PRs and issues based on configurable conditions](https://github.com/srvaroa/labeler) + +> [!NOTE] +> The filename defaults to `.github/labeler.yml` and can be configured in the workflow. + +### `.github/PULL_REQUEST_TEMPLATE.md` + +The pull request template which prefills new pull requests. + +Further documentation: + +- [Creating a pull request template for your repository](https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/creating-a-pull-request-template-for-your-repository) + +> [!IMPORTANT] +> The filename `.github/PULL_REQUEST_TEMPLATE.md` is predetermined by GitHub. + +### `.github/SECURITY.md` + +The goal is to describe how to securely report security vulnerabilities for this repository. The security policy is linked at . + +Further documentation: + +- [Adding a security policy to your repository](https://docs.github.com/en/code-security/getting-started/adding-a-security-policy-to-your-repository) + +> [!IMPORTANT] +> The filename `.github/SECURITY.yml` is predetermined by GitHub. + +> [!NOTE] +> The file is part of the documentation and is included in `docs/mkdocs/docs/community/security_policy.md`. + +### `LICENSE.MIT` + +The license of the project. + +Further documentation: + +- [Adding a license to a repository](https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/adding-a-license-to-a-repository) + +> [!IMPORTANT] +> The filename `LICENSE.MIT` is partly predetermined by GitHub. The root filename must be `LICENSE`. + +## REUSE + +### `.reuse/dep5` + +The file defines the licenses of certain third-party component in the repository. The root `Makefile` contains a target `reuse` that checks for compliance. + +Further documentation: + +- [DEP5](https://reuse.software/spec-3.2/#dep5-deprecated) +- [reuse command-line tool](https://pypi.org/project/reuse/) +- [documentation of linting](https://reuse.readthedocs.io/en/stable/man/reuse-lint.html) +- [REUSE](http://reuse.software) + +> [!IMPORTANT] +> The filename `.reuse/dep5` is predetermined by REUSE. Alternatively, a `REUSE.toml` file can be used. + +### `.reuse/templates` + +Copyright header templates for source files. The root `Makefile` contains a target `reuse` that updates copyright headers with the templates. + +Further information: + +- [reuse command-line tool](https://pypi.org/project/reuse/) +- [documentation on templates](https://reuse.readthedocs.io/en/stable/man/reuse-annotate.html#cmdoption-t) +- [REUSE](http://reuse.software) + +> [!IMPORTANT] +> The folder name `.reuse/templates` is predetermined by REUSE. + +### `LICENSES` + +A folder that contains every license of all licenses files (library and third-party code). + +Further documentation: + +- [REUSE specification](https://reuse.software/spec-3.3/) + +> [!IMPORTANT] +> The folder name `LICENSES` is predetermined by REUSE. + + +## Package Managers + +### `BUILD.bazel` + +The file can be updated by calling + +```shell +make BUILD.bazel +``` + +### `meson.build` + +### `Package.swift` + +### `WORKSPACE.bazel` diff --git a/contrib/restricted/nlohmann_json/LICENSE.MIT b/contrib/restricted/nlohmann_json/LICENSE.MIT index 1c1f7a690d81..a1dacc8dbbd9 100644 --- a/contrib/restricted/nlohmann_json/LICENSE.MIT +++ b/contrib/restricted/nlohmann_json/LICENSE.MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2013-2022 Niels Lohmann +Copyright (c) 2013-2025 Niels Lohmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/contrib/restricted/nlohmann_json/README.md b/contrib/restricted/nlohmann_json/README.md index 910902706273..f97afeb412f3 100644 --- a/contrib/restricted/nlohmann_json/README.md +++ b/contrib/restricted/nlohmann_json/README.md @@ -1,4 +1,4 @@ -[![JSON for Modern C++](docs/json.gif)](https://github.com/nlohmann/json/releases) +[![JSON for Modern C++](docs/mkdocs/docs/images/json.gif)](https://github.com/nlohmann/json/releases) [![Build Status](https://ci.appveyor.com/api/projects/status/1acb366xfyg3qybk/branch/develop?svg=true)](https://ci.appveyor.com/project/nlohmann/json) [![Ubuntu](https://github.com/nlohmann/json/workflows/Ubuntu/badge.svg)](https://github.com/nlohmann/json/actions?query=workflow%3AUbuntu) @@ -6,19 +6,19 @@ [![Windows](https://github.com/nlohmann/json/workflows/Windows/badge.svg)](https://github.com/nlohmann/json/actions?query=workflow%3AWindows) [![Coverage Status](https://coveralls.io/repos/github/nlohmann/json/badge.svg?branch=develop)](https://coveralls.io/github/nlohmann/json?branch=develop) [![Coverity Scan Build Status](https://scan.coverity.com/projects/5550/badge.svg)](https://scan.coverity.com/projects/nlohmann-json) -[![Codacy Badge](https://app.codacy.com/project/badge/Grade/e0d1a9d5d6fd46fcb655c4cb930bb3e8)](https://www.codacy.com/gh/nlohmann/json/dashboard?utm_source=github.com&utm_medium=referral&utm_content=nlohmann/json&utm_campaign=Badge_Grade) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/e0d1a9d5d6fd46fcb655c4cb930bb3e8)](https://app.codacy.com/gh/nlohmann/json/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Cirrus CI](https://api.cirrus-ci.com/github/nlohmann/json.svg)](https://cirrus-ci.com/github/nlohmann/json) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/json.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:json) [![Try online](https://img.shields.io/badge/try-online-blue.svg)](https://wandbox.org/permlink/1mp10JbaANo6FUc7) [![Documentation](https://img.shields.io/badge/docs-mkdocs-blue.svg)](https://json.nlohmann.me) [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/nlohmann/json/master/LICENSE.MIT) [![GitHub Releases](https://img.shields.io/github/release/nlohmann/json.svg)](https://github.com/nlohmann/json/releases) -[![Vcpkg Version](https://img.shields.io/vcpkg/v/nlohmann-json)](https://vcpkg.link/ports/nlohmann-json) [![Packaging status](https://repology.org/badge/tiny-repos/nlohmann-json.svg)](https://repology.org/project/nlohmann-json/versions) [![GitHub Downloads](https://img.shields.io/github/downloads/nlohmann/json/total)](https://github.com/nlohmann/json/releases) [![GitHub Issues](https://img.shields.io/github/issues/nlohmann/json.svg)](https://github.com/nlohmann/json/issues) [![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/nlohmann/json.svg)](https://isitmaintained.com/project/nlohmann/json "Average time to resolve an issue") [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/289/badge)](https://bestpractices.coreinfrastructure.org/projects/289) +[![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/nlohmann/json/badge)](https://scorecard.dev/viewer/?uri=github.com/nlohmann/json) [![GitHub Sponsors](https://img.shields.io/badge/GitHub-Sponsors-ff69b4)](https://github.com/sponsors/nlohmann) [![REUSE status](https://api.reuse.software/badge/github.com/nlohmann/json)](https://api.reuse.software/info/github.com/nlohmann/json) [![Discord](https://img.shields.io/discord/1003743314341793913)](https://discord.gg/6mrGXKvX7y) @@ -26,6 +26,7 @@ - [Design goals](#design-goals) - [Sponsors](#sponsors) - [Support](#support) ([documentation](https://json.nlohmann.me), [FAQ](https://json.nlohmann.me/home/faq/), [discussions](https://github.com/nlohmann/json/discussions), [API](https://json.nlohmann.me/api/basic_json/), [bug issues](https://github.com/nlohmann/json/issues)) +- [Quick reference](#quick-reference) - [Examples](#examples) - [Read JSON from a file](#read-json-from-a-file) - [Creating `json` objects from JSON literals](#creating-json-objects-from-json-literals) @@ -39,6 +40,7 @@ - [Conversions to/from arbitrary types](#arbitrary-types-conversions) - [Specializing enum conversion](#specializing-enum-conversion) - [Binary formats (BSON, CBOR, MessagePack, UBJSON, and BJData)](#binary-formats-bson-cbor-messagepack-ubjson-and-bjdata) +- [Customers](#customers) - [Supported compilers](#supported-compilers) - [Integration](#integration) - [CMake](#cmake) @@ -48,7 +50,6 @@ - [Contact](#contact) - [Thanks](#thanks) - [Used third-party tools](#used-third-party-tools) -- [Projects using JSON for Modern C++](#projects-using-json-for-modern-c) - [Notes](#notes) - [Execute unit tests](#execute-unit-tests) @@ -58,9 +59,9 @@ There are myriads of [JSON](https://json.org) libraries out there, and each may - **Intuitive syntax**. In languages such as Python, JSON feels like a first class data type. We used all the operator magic of modern C++ to achieve the same feeling in your code. Check out the [examples below](#examples) and you'll know what I mean. -- **Trivial integration**. Our whole code consists of a single header file [`json.hpp`](https://github.com/nlohmann/json/blob/develop/single_include/nlohmann/json.hpp). That's it. No library, no subproject, no dependencies, no complex build system. The class is written in vanilla C++11. All in all, everything should require no adjustment of your compiler flags or project settings. +- **Trivial integration**. Our whole code consists of a single header file [`json.hpp`](https://github.com/nlohmann/json/blob/develop/single_include/nlohmann/json.hpp). That's it. No library, no subproject, no dependencies, no complex build system. The class is written in vanilla C++11. All in all, everything should require no adjustment of your compiler flags or project settings. The library is also included in all popular [package managers](https://json.nlohmann.me/integration/package_managers/). -- **Serious testing**. Our code is heavily [unit-tested](https://github.com/nlohmann/json/tree/develop/tests/src) and covers [100%](https://coveralls.io/r/nlohmann/json) of the code, including all exceptional behavior. Furthermore, we checked with [Valgrind](https://valgrind.org) and the [Clang Sanitizers](https://clang.llvm.org/docs/index.html) that there are no memory leaks. [Google OSS-Fuzz](https://github.com/google/oss-fuzz/tree/master/projects/json) additionally runs fuzz tests against all parsers 24/7, effectively executing billions of tests so far. To maintain high quality, the project is following the [Core Infrastructure Initiative (CII) best practices](https://bestpractices.coreinfrastructure.org/projects/289). +- **Serious testing**. Our code is heavily [unit-tested](https://github.com/nlohmann/json/tree/develop/tests/src) and covers [100%](https://coveralls.io/r/nlohmann/json) of the code, including all exceptional behavior. Furthermore, we checked with [Valgrind](https://valgrind.org) and the [Clang Sanitizers](https://clang.llvm.org/docs/index.html) that there are no memory leaks. [Google OSS-Fuzz](https://github.com/google/oss-fuzz/tree/master/projects/json) additionally runs fuzz tests against all parsers 24/7, effectively executing billions of tests so far. To maintain high quality, the project is following the [Core Infrastructure Initiative (CII) best practices](https://bestpractices.coreinfrastructure.org/projects/289). See the [quality assurance](https://json.nlohmann.me/community/quality_assurance) overview documentation. Other aspects were not so important to us: @@ -70,14 +71,14 @@ Other aspects were not so important to us: See the [contribution guidelines](https://github.com/nlohmann/json/blob/master/.github/CONTRIBUTING.md#please-dont) for more information. - ## Sponsors You can sponsor this library at [GitHub Sponsors](https://github.com/sponsors/nlohmann). -### :office: Corporate Sponsor +### :raising_hand: Priority Sponsor -[![](https://upload.wikimedia.org/wikipedia/commons/thumb/9/9e/Codacy-logo-black.svg/320px-Codacy-logo-black.svg.png)](https://github.com/codacy/About) +- [Martti Laine](https://github.com/codeclown) +- [Paul Harrington](https://github.com/phrrngtn) ### :label: Named Sponsors @@ -88,20 +89,49 @@ You can sponsor this library at [GitHub Sponsors](https://github.com/sponsors/nl - [Steve Wagner](https://github.com/ciroque) - [Lion Yang](https://github.com/LionNatsu) +### Further support + +The development of the library is further supported by JetBrains by providing free access to their IDE tools. + +[![JetBrains logo.](https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg)](https://jb.gg/OpenSourceSupport) + Thanks everyone! ## Support :question: If you have a **question**, please check if it is already answered in the [**FAQ**](https://json.nlohmann.me/home/faq/) or the [**Q&A**](https://github.com/nlohmann/json/discussions/categories/q-a) section. If not, please [**ask a new question**](https://github.com/nlohmann/json/discussions/new) there. -:books: If you want to **learn more** about how to use the library, check out the rest of the [**README**](#examples), have a look at [**code examples**](https://github.com/nlohmann/json/tree/develop/docs/examples), or browse through the [**help pages**](https://json.nlohmann.me). +:books: If you want to **learn more** about how to use the library, check out the rest of the [**README**](#examples), have a look at [**code examples**](https://github.com/nlohmann/json/tree/develop/docs/mkdocs/docs/examples), or browse through the [**help pages**](https://json.nlohmann.me). -:construction: If you want to understand the **API** better, check out the [**API Reference**](https://json.nlohmann.me/api/basic_json/). +:construction: If you want to understand the **API** better, check out the [**API Reference**](https://json.nlohmann.me/api/basic_json/) or have a look at the [quick reference](#quick-reference) below. :bug: If you found a **bug**, please check the [**FAQ**](https://json.nlohmann.me/home/faq/) if it is a known issue or the result of a design decision. Please also have a look at the [**issue list**](https://github.com/nlohmann/json/issues) before you [**create a new issue**](https://github.com/nlohmann/json/issues/new/choose). Please provide as much information as possible to help us understand and reproduce your issue. There is also a [**docset**](https://github.com/Kapeli/Dash-User-Contributions/tree/master/docsets/JSON_for_Modern_C%2B%2B) for the documentation browsers [Dash](https://kapeli.com/dash), [Velocity](https://velocity.silverlakesoftware.com), and [Zeal](https://zealdocs.org) that contains the full [documentation](https://json.nlohmann.me) as offline resource. +## Quick reference + +- **Constructors** [basic_json](https://json.nlohmann.me/api/basic_json/basic_json), [array](https://json.nlohmann.me/api/basic_json/array), [binary](https://json.nlohmann.me/api/basic_json/binary), [object](https://json.nlohmann.me/api/basic_json/object) +- **Object inspection**: [type](https://json.nlohmann.me/api/basic_json/type), [operator value_t](https://json.nlohmann.me/api/basic_json/operator_value_t), [type_name](https://json.nlohmann.me/api/basic_json/type_name), [is_primitive](https://json.nlohmann.me/api/basic_json/is_primitive), [is_structured](https://json.nlohmann.me/api/basic_json/is_structured), [is_null](https://json.nlohmann.me/api/basic_json/is_null), [is_boolean](https://json.nlohmann.me/api/basic_json/is_boolean), [is_number](https://json.nlohmann.me/api/basic_json/is_number), [is_number_integer](https://json.nlohmann.me/api/basic_json/is_number_integer), [is_number_unsigned](https://json.nlohmann.me/api/basic_json/is_number_unsigned), [is_number_float](https://json.nlohmann.me/api/basic_json/is_number_float), [is_object](https://json.nlohmann.me/api/basic_json/is_object), [is_array](https://json.nlohmann.me/api/basic_json/is_array), [is_string](https://json.nlohmann.me/api/basic_json/is_string), [is_binary](https://json.nlohmann.me/api/basic_json/is_binary), [is_discarded](https://json.nlohmann.me/api/basic_json/is_discarded) +- **Value access**; [get](https://json.nlohmann.me/api/basic_json/get), [get_to](https://json.nlohmann.me/api/basic_json/get_to), [get_ptr](https://json.nlohmann.me/api/basic_json/get_ptr), [get_ref](https://json.nlohmann.me/api/basic_json/get_ref), [operator ValueType](https://json.nlohmann.me/api/basic_json/operator_ValueType), [get_binary](https://json.nlohmann.me/api/basic_json/get_binary) +- **Element access**: [at](https://json.nlohmann.me/api/basic_json/at), [operator[]](https://json.nlohmann.me/api/basic_json/operator[]), [value](https://json.nlohmann.me/api/basic_json/value), [front](https://json.nlohmann.me/api/basic_json/front), [back](https://json.nlohmann.me/api/basic_json/back) +- **Lookup**: [find](https://json.nlohmann.me/api/basic_json/find), [count](https://json.nlohmann.me/api/basic_json/count), [contains](https://json.nlohmann.me/api/basic_json/contains) +- **Iterators**: [begin](https://json.nlohmann.me/api/basic_json/begin), [cbegin](https://json.nlohmann.me/api/basic_json/cbegin), [end](https://json.nlohmann.me/api/basic_json/end), [cend](https://json.nlohmann.me/api/basic_json/cend), [rbegin](https://json.nlohmann.me/api/basic_json/rbegin), [rend](https://json.nlohmann.me/api/basic_json/rend), [crbegin](https://json.nlohmann.me/api/basic_json/crbegin), [crend](https://json.nlohmann.me/api/basic_json/crend), [items](https://json.nlohmann.me/api/basic_json/items) +- **Capacity**: [empty](https://json.nlohmann.me/api/basic_json/empty), [size](https://json.nlohmann.me/api/basic_json/size), [max_size](https://json.nlohmann.me/api/basic_json/max_size) +- **Modifiers**: [clear](https://json.nlohmann.me/api/basic_json/clear), [push_back](https://json.nlohmann.me/api/basic_json/push_back), [operator+=](https://json.nlohmann.me/api/basic_json/operator+=), [emplace_back](https://json.nlohmann.me/api/basic_json/emplace_back), [emplace](https://json.nlohmann.me/api/basic_json/emplace), [erase](https://json.nlohmann.me/api/basic_json/erase), [insert](https://json.nlohmann.me/api/basic_json/insert), [update](https://json.nlohmann.me/api/basic_json/update), [swap](https://json.nlohmann.me/api/basic_json/swap) +- **Lexicographical comparison operators**: [operator==](https://json.nlohmann.me/api/basic_json/operator_eq), [operator!=](https://json.nlohmann.me/api/basic_json/operator_ne), [operator<](https://json.nlohmann.me/api/basic_json/operator_lt), [operator>](https://json.nlohmann.me/api/basic_json/operator_gt), [operator<=](https://json.nlohmann.me/api/basic_json/operator_le), [operator>=](https://json.nlohmann.me/api/basic_json/operator_ge), [operator<=>](https://json.nlohmann.me/api/basic_json/operator_spaceship) +- **Serialization / Dumping**: [dump](https://json.nlohmann.me/api/basic_json/dump) +- **Deserialization / Parsing**: [parse](https://json.nlohmann.me/api/basic_json/parse), [accept](https://json.nlohmann.me/api/basic_json/accept), [sax_parse](https://json.nlohmann.me/api/basic_json/sax_parse) +- **JSON Pointer functions**: [flatten](https://json.nlohmann.me/api/basic_json/flatten), [unflatten](https://json.nlohmann.me/api/basic_json/unflatten) +- **JSON Patch functions**: [patch](https://json.nlohmann.me/api/basic_json/patch), [patch_inplace](https://json.nlohmann.me/api/basic_json/patch_inplace), [diff](https://json.nlohmann.me/api/basic_json/diff), [merge_patch](https://json.nlohmann.me/api/basic_json/merge_patch) +- **Static functions**: [meta](https://json.nlohmann.me/api/basic_json/meta), [get_allocator](https://json.nlohmann.me/api/basic_json/get_allocator) +- **Binary formats**: [from_bjdata](https://json.nlohmann.me/api/basic_json/from_bjdata), [from_bson](https://json.nlohmann.me/api/basic_json/from_bson), [from_cbor](https://json.nlohmann.me/api/basic_json/from_cbor), [from_msgpack](https://json.nlohmann.me/api/basic_json/from_msgpack), [from_ubjson](https://json.nlohmann.me/api/basic_json/from_ubjson), [to_bjdata](https://json.nlohmann.me/api/basic_json/to_bjdata), [to_bson](https://json.nlohmann.me/api/basic_json/to_bson), [to_cbor](https://json.nlohmann.me/api/basic_json/to_cbor), [to_msgpack](https://json.nlohmann.me/api/basic_json/to_msgpack), [to_ubjson](https://json.nlohmann.me/api/basic_json/to_ubjson) +- **Non-member functions**: [operator<<](https://json.nlohmann.me/api/operator_ltlt/), [operator>>](https://json.nlohmann.me/api/operator_gtgt/), [to_string](https://json.nlohmann.me/api/basic_json/to_string) +- **Literals**: [operator""_json](https://json.nlohmann.me/api/operator_literal_json) +- **Helper classes**: [std::hash<basic_json>](https://json.nlohmann.me/api/basic_json/std_hash), [std::swap<basic_json>](https://json.nlohmann.me/api/basic_json/std_swap) + +[**Full API documentation**](https://json.nlohmann.me/api/basic_json/) + ## Examples Here are some examples to give you an idea how to use the class. @@ -109,9 +139,8 @@ Here are some examples to give you an idea how to use the class. Beside the examples below, you may want to: → Check the [documentation](https://json.nlohmann.me/)\ -→ Browse the [standalone example files](https://github.com/nlohmann/json/tree/develop/docs/examples) - -Every API function (documented in the [API Documentation](https://json.nlohmann.me/api/basic_json/)) has a corresponding standalone example file. For example, the [`emplace()`](https://json.nlohmann.me/api/basic_json/emplace/) function has a matching [emplace.cpp](https://github.com/nlohmann/json/blob/develop/docs/examples/emplace.cpp) example file. +→ Browse the [standalone example files](https://github.com/nlohmann/json/tree/develop/docs/mkdocs/docs/examples)\ +→ Read the full [API Documentation](https://json.nlohmann.me/api/basic_json/) with self-contained examples for every function ### Read JSON from a file @@ -385,7 +414,7 @@ struct MyIterator { using iterator_category = std::input_iterator_tag; MyIterator& operator++() { - MyContainer.advance(); + target->advance(); return *this; } @@ -394,7 +423,7 @@ struct MyIterator { } reference operator*() const { - return target.get_current(); + return target->get_current(); } MyContainer* target = nullptr; @@ -547,7 +576,6 @@ int fob_present = o.count("fob"); // 0 o.erase("foo"); ``` - ### Conversion from STL containers Any sequence container (`std::array`, `std::vector`, `std::deque`, `std::forward_list`, `std::list`) whose values can be used to construct JSON values (e.g., integers, floating point numbers, Booleans, string types, or again STL containers described in this section) can be used to create a JSON array. The same holds for similar associative containers (`std::set`, `std::multiset`, `std::unordered_set`, `std::unordered_multiset`), but in these cases the order of the elements of the array depends on how the elements are ordered in the respective STL container. @@ -803,26 +831,21 @@ Likewise, when calling `template get()` or `get_to(your_type&)`, the Some important things: -* Those methods **MUST** be in your type's namespace (which can be the global namespace), or the library will not be able to locate them (in this example, they are in namespace `ns`, where `person` is defined). -* Those methods **MUST** be available (e.g., proper headers must be included) everywhere you use these conversions. Look at [issue 1108](https://github.com/nlohmann/json/issues/1108) for errors that may occur otherwise. -* When using `template get()`, `your_type` **MUST** be [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). (There is a way to bypass this requirement described later.) -* In function `from_json`, use function [`at()`](https://json.nlohmann.me/api/basic_json/at/) to access the object values rather than `operator[]`. In case a key does not exist, `at` throws an exception that you can handle, whereas `operator[]` exhibits undefined behavior. -* You do not need to add serializers or deserializers for STL types like `std::vector`: the library already implements these. +- Those methods **MUST** be in your type's namespace (which can be the global namespace), or the library will not be able to locate them (in this example, they are in namespace `ns`, where `person` is defined). +- Those methods **MUST** be available (e.g., proper headers must be included) everywhere you use these conversions. Look at [issue 1108](https://github.com/nlohmann/json/issues/1108) for errors that may occur otherwise. +- When using `template get()`, `your_type` **MUST** be [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). (There is a way to bypass this requirement described later.) +- In function `from_json`, use function [`at()`](https://json.nlohmann.me/api/basic_json/at/) to access the object values rather than `operator[]`. In case a key does not exist, `at` throws an exception that you can handle, whereas `operator[]` exhibits undefined behavior. +- You do not need to add serializers or deserializers for STL types like `std::vector`: the library already implements these. #### Simplify your life with macros -If you just want to serialize/deserialize some structs, the `to_json`/`from_json` functions can be a lot of boilerplate. +If you just want to serialize/deserialize some structs, the `to_json`/`from_json` functions can be a lot of boilerplate. There are [**several macros**](https://json.nlohmann.me/features/arbitrary_types/#simplify-your-life-with-macros) to make your life easier as long as you (1) want to use a JSON object as serialization and (2) want to use the member variable names as object keys in that object. -There are two macros to make your life easier as long as you (1) want to use a JSON object as serialization and (2) want to use the member variable names as object keys in that object: +Which macro to choose depends on whether private member variables need to be accessed, a deserialization is needed, missing values should yield an error or should be replaced by default values, and if derived classes are used. See [this overview to choose the right one for your use case](https://json.nlohmann.me/api/macros/#serializationdeserialization-macros). -- `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(name, member1, member2, ...)` is to be defined inside the namespace of the class/struct to create code for. -- `NLOHMANN_DEFINE_TYPE_INTRUSIVE(name, member1, member2, ...)` is to be defined inside the class/struct to create code for. This macro can also access private members. +##### Example usage of macros -In both macros, the first parameter is the name of the class/struct, and all remaining parameters name the members. - -##### Examples - -The `to_json`/`from_json` functions for the `person` struct above can be created with: +The `to_json`/`from_json` functions for the `person` struct above can be created with [`NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE`](https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/). In all macros, the first parameter is the name of the class/struct, and all remaining parameters name the members. ```cpp namespace ns { @@ -830,7 +853,7 @@ namespace ns { } ``` -Here is an example with private members, where `NLOHMANN_DEFINE_TYPE_INTRUSIVE` is needed: +Here is another example with private members, where [`NLOHMANN_DEFINE_TYPE_INTRUSIVE`](https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/) is needed: ```cpp namespace ns { @@ -839,7 +862,7 @@ namespace ns { std::string street; int housenumber; int postcode; - + public: NLOHMANN_DEFINE_TYPE_INTRUSIVE(address, street, housenumber, postcode) }; @@ -973,14 +996,14 @@ struct bad_serializer { template static void to_json(BasicJsonType& j, const T& value) { - // this calls BasicJsonType::json_serializer::to_json(j, value); + // this calls BasicJsonType::json_serializer::to_json(j, value) // if BasicJsonType::json_serializer == bad_serializer ... oops! j = value; } template static void to_json(const BasicJsonType& j, T& value) { - // this calls BasicJsonType::json_serializer::from_json(j, value); + // this calls BasicJsonType::json_serializer::from_json(j, value) // if BasicJsonType::json_serializer == bad_serializer ... oops! value = j.template get(); // oops! } @@ -1026,14 +1049,16 @@ assert(j3.template get() == TS_RUNNING); // undefined json value to enum (where the first map entry above is the default) json jPi = 3.14; -assert(jPi.template get() == TS_INVALID ); +assert(jPi.template get() == TS_INVALID); ``` Just as in [Arbitrary Type Conversions](#arbitrary-types-conversions) above, + - `NLOHMANN_JSON_SERIALIZE_ENUM()` MUST be declared in your enum type's namespace (which can be the global namespace), or the library will not be able to locate it, and it will default to integer serialization. - It MUST be available (e.g., proper headers must be included) everywhere you use the conversions. Other Important points: + - When using `template get()`, undefined JSON values will default to the first pair specified in your map. Select this default pair carefully. - If an enum or JSON value is specified more than once in your map, the first matching occurrence from the top of the map will be returned when converting to or from JSON. @@ -1108,14 +1133,19 @@ binary.set_subtype(0x10); auto cbor = json::to_msgpack(j); // 0xD5 (fixext2), 0x10, 0xCA, 0xFE ``` +## Customers + +The library is used in multiple projects, applications, operating systems, etc. The list below is not exhaustive, but the result of an internet search. If you know further customers of the library, please let me know, see [contact](#contact). + +[![logos of customers using the library](docs/mkdocs/docs/images/customers.png)](https://json.nlohmann.me/home/customers/) ## Supported compilers -Though it's 2023 already, the support for C++11 is still a bit sparse. Currently, the following compilers are known to work: +Though it's 2025 already, the support for C++11 is still a bit sparse. Currently, the following compilers are known to work: -- GCC 4.8 - 12.0 (and possibly later) -- Clang 3.4 - 15.0 (and possibly later) -- Apple Clang 9.1 - 13.1 (and possibly later) +- GCC 4.8 - 14.2 (and possibly later) +- Clang 3.4 - 21.0 (and possibly later) +- Apple Clang 9.1 - 16.0 (and possibly later) - Intel C++ Compiler 17.0.2 (and possibly later) - Nvidia CUDA Compiler 11.0.221 (and possibly later) - Microsoft Visual C++ 2015 / Build Tools 14.0.25123.0 (and possibly later) @@ -1127,10 +1157,10 @@ I would be happy to learn about other compilers/versions. Please note: -- GCC 4.8 has a bug [57824](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57824)): multiline raw strings cannot be the arguments to macros. Don't use multiline raw strings directly in macros with this compiler. +- GCC 4.8 has a bug [57824](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57824): multiline raw strings cannot be the arguments to macros. Don't use multiline raw strings directly in macros with this compiler. - Android defaults to using very old compilers and C++ libraries. To fix this, add the following to your `Application.mk`. This will switch to the LLVM C++ library, the Clang compiler, and enable C++11 and other features disabled by default. - ``` + ```makefile APP_STL := c++_shared NDK_TOOLCHAIN_VERSION := clang3.6 APP_CPPFLAGS += -frtti -fexceptions @@ -1142,68 +1172,7 @@ Please note: - Unsupported versions of GCC and Clang are rejected by `#error` directives. This can be switched off by defining `JSON_SKIP_UNSUPPORTED_COMPILER_CHECK`. Note that you can expect no support in this case. -The following compilers are currently used in continuous integration at [AppVeyor](https://ci.appveyor.com/project/nlohmann/json), [Cirrus CI](https://cirrus-ci.com/github/nlohmann/json), and [GitHub Actions](https://github.com/nlohmann/json/actions): - -| Compiler | Operating System | CI Provider | -|--------------------------------------------------------------------------------------------------------|--------------------|----------------| -| Apple Clang 11.0.3 (clang-1103.0.32.62); Xcode 11.7 | macOS 11.7.1 | GitHub Actions | -| Apple Clang 12.0.0 (clang-1200.0.32.29); Xcode 12.4 | macOS 11.7.1 | GitHub Actions | -| Apple Clang 12.0.5 (clang-1205.0.22.11); Xcode 12.5.1 | macOS 11.7.1 | GitHub Actions | -| Apple Clang 13.0.0 (clang-1300.0.29.3); Xcode 13.0 | macOS 11.7.1 | GitHub Actions | -| Apple Clang 13.0.0 (clang-1300.0.29.3); Xcode 13.1 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 13.0.0 (clang-1300.0.29.30); Xcode 13.2.1 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 13.1.6 (clang-1316.0.21.2.3); Xcode 13.3.1 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 13.1.6 (clang-1316.0.21.2.5); Xcode 13.4.1 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 14.0.0 (clang-1400.0.29.102); Xcode 14.0 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 14.0.0 (clang-1400.0.29.102); Xcode 14.0.1 | macOS 12.6.1 | GitHub Actions | -| Apple Clang 14.0.0 (clang-1400.0.29.202); Xcode 14.1 | macOS 12.6.1 | GitHub Actions | -| Clang 3.5.2 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 3.6.2 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 3.7.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 3.8.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 3.9.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 4.0.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 5.0.2 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 6.0.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 7.0.1 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 8.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 9.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 10.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 10.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 11.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 11.0.0 with MSVC-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 11.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 12.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 12.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 13.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 13.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 14.0.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 14.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 15.0.0 with GNU-like command-line | Windows-10.0.17763 | GitHub Actions | -| Clang 15.0.4 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Clang 16.0.0 (16.0.0-++20221031071727+500876226c60-1~exp1~20221031071831.439) | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 4.8.5 (Ubuntu 4.8.5-4ubuntu2) | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 4.9.4 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 5.5.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 6.5.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 7.5.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 8.1.0 (i686-posix-dwarf-rev0, Built by MinGW-W64 project) | Windows-10.0.17763 | GitHub Actions | -| GCC 8.1.0 (x86_64-posix-seh-rev0, Built by MinGW-W64 project) | Windows-10.0.17763 | GitHub Actions | -| GCC 8.5.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 9.5.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 10.4.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 11.1.0 | Ubuntu (aarch64) | Cirrus CI | -| GCC 11.3.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 12.2.0 | Ubuntu 20.04.3 LTS | GitHub Actions | -| GCC 13.0.0 20220605 (experimental) | Ubuntu 20.04.3 LTS | GitHub Actions | -| Intel C++ Compiler 2021.5.0.20211109 | Ubuntu 20.04.3 LTS | GitHub Actions | -| NVCC 11.0.221 | Ubuntu 20.04.3 LTS | GitHub Actions | -| Visual Studio 14 2015 MSVC 19.0.24241.7 (Build Engine version 14.0.25420.1) | Windows-6.3.9600 | AppVeyor | -| Visual Studio 15 2017 MSVC 19.16.27035.0 (Build Engine version 15.9.21+g9802d43bc3 for .NET Framework) | Windows-10.0.14393 | AppVeyor | -| Visual Studio 16 2019 MSVC 19.28.29912.0 (Build Engine version 16.9.0+57a23d249 for .NET Framework) | Windows-10.0.17763 | GitHub Actions | -| Visual Studio 16 2019 MSVC 19.28.29912.0 (Build Engine version 16.9.0+57a23d249 for .NET Framework) | Windows-10.0.17763 | AppVeyor | -| Visual Studio 17 2022 MSVC 19.30.30709.0 (Build Engine version 17.0.31804.368 for .NET Framework) | Windows-10.0.20348 | GitHub Actions | - +See the page [quality assurance](https://json.nlohmann.me/community/quality_assurance) on the compilers used to check the library in the CI. ## Integration @@ -1230,7 +1199,7 @@ To use this library from a CMake project, you can locate it directly with `find_ ```cmake # CMakeLists.txt -find_package(nlohmann_json 3.2.0 REQUIRED) +find_package(nlohmann_json 3.12.0 REQUIRED) ... add_library(foo ...) ... @@ -1270,10 +1239,11 @@ Since CMake v3.11, be used to automatically download a release as a dependency at configure time. Example: + ```cmake include(FetchContent) -FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz) +FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.12.0/json.tar.xz) FetchContent_MakeAvailable(json) target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) @@ -1300,11 +1270,12 @@ add_library(foo ...) # import method target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) ``` + ```cmake # thirdparty/CMakeLists.txt ... if(FOO_USE_EXTERNAL_JSON) - find_package(nlohmann_json 3.2.0 REQUIRED) + find_package(nlohmann_json 3.12.0 REQUIRED) else() set(JSON_BuildTests OFF CACHE INTERNAL "") add_subdirectory(nlohmann_json) @@ -1316,51 +1287,24 @@ endif() ### Package Managers -:beer: If you are using OS X and [Homebrew](https://brew.sh), just type `brew install nlohmann-json` and you're set. If you want the bleeding edge rather than the latest release, use `brew install nlohmann-json --HEAD`. See [nlohmann-json](https://formulae.brew.sh/formula/nlohmann-json) for more information. - -If you are using the [Meson Build System](https://mesonbuild.com), add this source tree as a [meson subproject](https://mesonbuild.com/Subprojects.html#using-a-subproject). You may also use the `include.zip` published in this project's [Releases](https://github.com/nlohmann/json/releases) to reduce the size of the vendored source tree. Alternatively, you can get a wrap file by downloading it from [Meson WrapDB](https://wrapdb.mesonbuild.com/nlohmann_json), or simply use `meson wrap install nlohmann_json`. Please see the meson project for any issues regarding the packaging. - -The provided `meson.build` can also be used as an alternative to CMake for installing `nlohmann_json` system-wide in which case a pkg-config file is installed. To use it, simply have your build system require the `nlohmann_json` pkg-config dependency. In Meson, it is preferred to use the [`dependency()`](https://mesonbuild.com/Reference-manual.html#dependency) object with a subproject fallback, rather than using the subproject directly. - -If you are using [Bazel](https://bazel.build/) you can simply reference this repository using `http_archive` or `git_repository` and depend on `@nlohmann_json//:json`. - -If you are using [Conan](https://www.conan.io/) to manage your dependencies, merely add [`nlohmann_json/x.y.z`](https://conan.io/center/nlohmann_json) to your `conanfile`'s requires, where `x.y.z` is the release version you want to use. Please file issues [here](https://github.com/conan-io/conan-center-index/issues) if you experience problems with the packages. - -If you are using [Spack](https://www.spack.io/) to manage your dependencies, you can use the [`nlohmann-json` package](https://spack.readthedocs.io/en/latest/package_list.html#nlohmann-json). Please see the [spack project](https://github.com/spack/spack) for any issues regarding the packaging. - -If you are using [hunter](https://github.com/cpp-pm/hunter) on your project for external dependencies, then you can use the [nlohmann_json package](https://hunter.readthedocs.io/en/latest/packages/pkg/nlohmann_json.html). Please see the hunter project for any issues regarding the packaging. - -If you are using [Buckaroo](https://buckaroo.pm), you can install this library's module with `buckaroo add github.com/buckaroo-pm/nlohmann-json`. Please file issues [here](https://github.com/buckaroo-pm/nlohmann-json). There is a demo repo [here](https://github.com/njlr/buckaroo-nholmann-json-example). - -If you are using [vcpkg](https://github.com/Microsoft/vcpkg/) on your project for external dependencies, then you can install the [nlohmann-json package](https://github.com/Microsoft/vcpkg/tree/master/ports/nlohmann-json) with `vcpkg install nlohmann-json` and follow the then displayed descriptions. Please see the vcpkg project for any issues regarding the packaging. - -If you are using [cget](https://cget.readthedocs.io/en/latest/), you can install the latest development version with `cget install nlohmann/json`. A specific version can be installed with `cget install nlohmann/json@v3.1.0`. Also, the multiple header version can be installed by adding the `-DJSON_MultipleHeaders=ON` flag (i.e., `cget install nlohmann/json -DJSON_MultipleHeaders=ON`). - -If you are using [CocoaPods](https://cocoapods.org), you can use the library by adding pod `"nlohmann_json", '~>3.1.2'` to your podfile (see [an example](https://bitbucket.org/benman/nlohmann_json-cocoapod/src/master/)). Please file issues [here](https://bitbucket.org/benman/nlohmann_json-cocoapod/issues?status=new&status=open). - -If you are using [Swift Package Manager](https://swift.org/package-manager/), you can use the library by adding a package dependency to this repository. And target dependency as `.product(name: "nlohmann-json", package: "json")`. - -If you are using [NuGet](https://www.nuget.org), you can use the package [nlohmann.json](https://www.nuget.org/packages/nlohmann.json/). Please check [this extensive description](https://github.com/nlohmann/json/issues/1132#issuecomment-452250255) on how to use the package. Please file issues [here](https://github.com/hnkb/nlohmann-json-nuget/issues). - -If you are using [conda](https://conda.io/), you can use the package [nlohmann_json](https://github.com/conda-forge/nlohmann_json-feedstock) from [conda-forge](https://conda-forge.org) executing `conda install -c conda-forge nlohmann_json`. Please file issues [here](https://github.com/conda-forge/nlohmann_json-feedstock/issues). - -If you are using [MSYS2](https://www.msys2.org/), you can use the [mingw-w64-nlohmann-json](https://packages.msys2.org/base/mingw-w64-nlohmann-json) package, just type `pacman -S mingw-w64-i686-nlohmann-json` or `pacman -S mingw-w64-x86_64-nlohmann-json` for installation. Please file issues [here](https://github.com/msys2/MINGW-packages/issues/new?title=%5Bnlohmann-json%5D) if you experience problems with the packages. - -If you are using [MacPorts](https://ports.macports.org), execute `sudo port install nlohmann-json` to install the [nlohmann-json](https://ports.macports.org/port/nlohmann-json/) package. - -If you are using [`build2`](https://build2.org), you can use the [`nlohmann-json`](https://cppget.org/nlohmann-json) package from the public repository https://cppget.org or directly from the [package's sources repository](https://github.com/build2-packaging/nlohmann-json). In your project's `manifest` file, just add `depends: nlohmann-json` (probably with some [version constraints](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml#guide-add-remove-deps)). If you are not familiar with using dependencies in `build2`, [please read this introduction](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml). -Please file issues [here](https://github.com/build2-packaging/nlohmann-json) if you experience problems with the packages. - -If you are using [`wsjcpp`](https://wsjcpp.org), you can use the command `wsjcpp install "https://github.com/nlohmann/json:develop"` to get the latest version. Note you can change the branch ":develop" to an existing tag or another branch. - -If you are using [`CPM.cmake`](https://github.com/TheLartians/CPM.cmake), you can check this [`example`](https://github.com/TheLartians/CPM.cmake/tree/master/examples/json). After [adding CPM script](https://github.com/TheLartians/CPM.cmake#adding-cpm) to your project, implement the following snippet to your CMake: - -```cmake -CPMAddPackage( - NAME nlohmann_json - GITHUB_REPOSITORY nlohmann/json - VERSION 3.9.1) -``` +Use your favorite [**package manager**](https://json.nlohmann.me/integration/package_managers/) to use the library. + +-  [**Homebrew**](https://json.nlohmann.me/integration/package_managers/#homebrew) `nlohmann-json` +-  [**Meson**](https://json.nlohmann.me/integration/package_managers/#meson) `nlohmann_json` +-  [**Bazel**](https://json.nlohmann.me/integration/package_managers/#bazel) `nlohmann_json` +-  [**Conan**](https://json.nlohmann.me/integration/package_managers/#conan) `nlohmann_json` +-  [**Spack**](https://json.nlohmann.me/integration/package_managers/#spack) `nlohmann-json` +- [**Hunter**](https://json.nlohmann.me/integration/package_managers/#hunter) `nlohmann_json` +-  [**vcpkg**](https://json.nlohmann.me/integration/package_managers/#vcpkg) `nlohmann-json` +- [**cget**](https://json.nlohmann.me/integration/package_managers/#cget) `nlohmann/json` +-  [**Swift Package Manager**](https://json.nlohmann.me/integration/package_managers/#swift-package-manager) `nlohmann/json` +-  [**Nuget**](https://json.nlohmann.me/integration/package_managers/#nuget) `nlohmann.json` +-  [**Conda**](https://json.nlohmann.me/integration/package_managers/#conda) `nlohmann_json` +-  [**MacPorts**](https://json.nlohmann.me/integration/package_managers/#macports) `nlohmann-json` +-  [**cpm.cmake**](https://json.nlohmann.me/integration/package_managers/#cpmcmake) `gh:nlohmann/json` +-  [**xmake**](https://json.nlohmann.me/integration/package_managers/#xmake) `nlohmann_json` + +The library is part of many package managers. See the [**documentation**](https://json.nlohmann.me/integration/package_managers/) for detailed descriptions and examples. ### Pkg-config @@ -1370,20 +1314,13 @@ If you are using bare Makefiles, you can use `pkg-config` to generate the includ pkg-config nlohmann_json --cflags ``` -Users of the Meson build system will also be able to use a system-wide library, which will be found by `pkg-config`: - -```meson -json = dependency('nlohmann_json', required: true) -``` - - ## License - +OSI approved license The class is licensed under the [MIT License](https://opensource.org/licenses/MIT): -Copyright © 2013-2022 [Niels Lohmann](https://nlohmann.me) +Copyright © 2013-2025 [Niels Lohmann](https://nlohmann.me) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -1393,13 +1330,19 @@ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR I * * * -The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) +- The class contains the UTF-8 Decoder from Bjoern Hoehrmann which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2008-2009 [Björn Hoehrmann](https://bjoern.hoehrmann.de/) +- The class contains a slightly modified version of the Grisu2 algorithm from Florian Loitsch which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2009 [Florian Loitsch](https://florian.loitsch.com/) +- The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). +- The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). -The class contains a slightly modified version of the Grisu2 algorithm from Florian Loitsch which is licensed under the [MIT License](https://opensource.org/licenses/MIT) (see above). Copyright © 2009 [Florian Loitsch](https://florian.loitsch.com/) +REUSE Software -The class contains a copy of [Hedley](https://nemequ.github.io/hedley/) from Evan Nemerson which is licensed as [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/). +The library is compliant to version 3.3 of the [**REUSE specification**](https://reuse.software): -The class contains parts of [Google Abseil](https://github.com/abseil/abseil-cpp) which is licensed under the [Apache 2.0 License](https://opensource.org/licenses/Apache-2.0). +- Every source file contains an SPDX copyright header. +- The full text of all licenses used in the repository can be found in the `LICENSES` folder. +- File `.reuse/dep5` contains an overview of all files' copyrights and licenses. +- Run `pipx run reuse lint` to verify the project's REUSE compliance and `pipx run reuse spdx` to generate a SPDX SBOM. ## Contact @@ -1415,7 +1358,7 @@ Only if your request would contain confidential information, please [send me an I deeply appreciate the help of the following people. - +GitHub avatars of the contributors 1. [Teemperor](https://github.com/Teemperor) implemented CMake support and lcov integration, realized escape and Unicode handling in the string parser, and fixed the JSON serialization. 2. [elliotgoodrich](https://github.com/elliotgoodrich) fixed an issue with double deletion in the iterator classes. @@ -1534,7 +1477,7 @@ I deeply appreciate the help of the following people. 115. [Matthias Möller](https://github.com/TinyTinni) removed the dependency from `std::stringstream`. 116. [agrianius](https://github.com/agrianius) added code to use alternative string implementations. 117. [Daniel599](https://github.com/Daniel599) allowed to use more algorithms with the `items()` function. -118. [Julius Rakow](https://github.com/jrakow) fixed the Meson include directory and fixed the links to [cppreference.com](cppreference.com). +118. [Julius Rakow](https://github.com/jrakow) fixed the Meson include directory and fixed the links to [cppreference.com](https://cppreference.com). 119. [Sonu Lohani](https://github.com/sonulohani) fixed the compilation with MSVC 2015 in debug mode. 120. [grembo](https://github.com/grembo) fixed the test suite and re-enabled several test cases. 121. [Hyeon Kim](https://github.com/simnalamburt) introduced the macro `JSON_INTERNAL_CATCH` to control the exception handling inside the library. @@ -1763,10 +1706,47 @@ I deeply appreciate the help of the following people. 344. [Aleksei Sapitskii](https://github.com/aleksproger) added support for Apple's Swift Package Manager. 345. [Benjamin Buch](https://github.com/bebuch) fixed the installation path in CMake. 346. [Colby Haskell](https://github.com/colbychaskell) clarified the parse error message in case a file cannot be opened. +347. [Juan Carlos Arevalo Baeza](https://github.com/TheJCAB) fixed the enum conversion. +348. [alferov](https://github.com/ALF-ONE) fixed a version in the documentation. +349. [ss](https://github.com/serge-s) fixed the amalgamation call. +350. [AniketDhemare](https://github.com/AniketDhemare) fixed a version in the documentation. +351. [Philip Müller](https://github.com/philip-paul-mueller) fixed an example. +352. [Leila Shcheglova](https://github.com/LeilaShcheglova) fixed a warning in a test. +353. [Alex Prabhat Bara](https://github.com/alexprabhat99) fixed a function name in the documentation. +354. [laterlaugh](https://github.com/laterlaugh) fixed some typos. +355. [Yuanhao Jia](https://github.com/MrJia1997) fixed the GDB pretty printer. +356. [Fallen_Breath](https://github.com/Fallen-Breath) fixed an example for JSON Pointer. +357. [Nikhil Idiculla](https://github.com/tsnl) fixed some typos. +358. [Griffin Myers](https://github.com/gmyers18) updated the Natvis file. +359. [thetimr](https://github.com/thetimr) fixed a typo in the documentation. +360. [Balazs Erseki](https://github.com/zerocukor287) fixed a URL in the contribution guidelines. +361. [Niccolò Iardella](https://github.com/rotolof) added `NLOHMANN_DEFINE_DERIVED_TYPE_*` macros. +362. [Borislav Stanimirov](https://github.com/iboB) allowed overriding the CMake target name. +363. [Captain Crutches](https://github.com/captaincrutches) made `iterator_proxy_value` a `std::forward_iterator`. +364. [Fredrik Sandhei](https://github.com/fsandhei) added type conversion support for `std::optional`. +365. [jh96](https://github.com/jordan-hoang) added exceptions when `nullptr` is passed to `parse`. +366. [Stuart Gorman](https://github.com/StuartGorman) fixed number parsing when `EINTR` set in `errno`. +367. [Dylan Baker](https://github.com/dcbaker) generated a pkg-config file that follows the pkg-config conventions. +368. [Tianyi Chen](https://github.com/TianyiChen) optimized the binary `get_number` implementation. +369. [peng-wang-cn](https://github.com/peng-wang-cn) added type conversion support for multidimensional arrays. +370. [Einars Netlis-Galejs](https://github.com/EinarsNG) added `ONLY_SERIALIZE` for `NLOHMANN_DEFINE_DERIVED_TYPE_*` macros. +371. [Marcel](https://github.com/mering) removed `alwayslink=True` Bazel flag. +372. [Harinath Nampally](https://github.com/hnampally) added diagnostic positions to exceptions. +373. [Nissim Armand Ben Danan](https://github.com/NissimBendanan) fixed `NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT` with an empty JSON instance. +374. [Michael Valladolid](https://github.com/codenut) added support for BSON uint64 serialization/deserialization. +375. [Nikhil](https://github.com/nikhilreddydev) updated the documentation. +376. [Nebojša Cvetković](https://github.com/nebkat) added support for BJDATA optimized binary array type. +377. [Sushrut Shringarputale](https://github.com/sushshring) added support for diagnostic positions. +378. [kimci86](https://github.com/kimci86) templated to `NLOHMANN_DEFINE_TYPE` macros to also support `ordered_json`. +379. [Richard Topchii](https://github.com/richardtop) added support for VisionOS in the Swift Package Manager. +380. [Robert Chisholm](https://github.com/Robadob) fixed a typo. +381. [zjyhjqs](https://github.com/zjyhjqs) added CPack support. +382. [bitFiedler](https://github.com/bitFiedler) made GDB pretty printer work with Python 3.8. +383. [Gianfranco Costamagna](https://github.com/LocutusOfBorg) fixed a compiler warning. +384. [risa2000](https://github.com/risa2000) made `std::filesystem::path` conversion to/from UTF-8 encoded string explicit. Thanks a lot for helping out! Please [let me know](mailto:mail@nlohmann.me) if I forgot someone. - ## Used third-party tools The library itself consists of a single header file licensed under the MIT license. However, it is built, tested, documented, and whatnot using a lot of third-party tools and services. Thanks a lot! @@ -1777,16 +1757,15 @@ The library itself consists of a single header file licensed under the MIT licen - [**Artistic Style**](http://astyle.sourceforge.net) for automatic source code indentation - [**Clang**](https://clang.llvm.org) for compilation with code sanitizers - [**CMake**](https://cmake.org) for build automation -- [**Codacy**](https://www.codacy.com) for further [code analysis](https://www.codacy.com/app/nlohmann/json) +- [**Codacy**](https://www.codacy.com) for further [code analysis](https://app.codacy.com/gh/nlohmann/json/dashboard) - [**Coveralls**](https://coveralls.io) to measure [code coverage](https://coveralls.io/github/nlohmann/json) - [**Coverity Scan**](https://scan.coverity.com) for [static analysis](https://scan.coverity.com/projects/nlohmann-json) - [**cppcheck**](http://cppcheck.sourceforge.net) for static analysis - [**doctest**](https://github.com/onqtam/doctest) for the unit tests -- [**git-update-ghpages**](https://github.com/rstacruz/git-update-ghpages) to upload the documentation to gh-pages - [**GitHub Changelog Generator**](https://github.com/skywinder/github-changelog-generator) to generate the [ChangeLog](https://github.com/nlohmann/json/blob/develop/ChangeLog.md) - [**Google Benchmark**](https://github.com/google/benchmark) to implement the benchmarks - [**Hedley**](https://nemequ.github.io/hedley/) to avoid re-inventing several compiler-agnostic feature macros -- [**lcov**](http://ltp.sourceforge.net/coverage/lcov.php) to process coverage information and create an HTML view +- [**lcov**](https://github.com/linux-test-project/lcov) to process coverage information and create an HTML view - [**libFuzzer**](https://llvm.org/docs/LibFuzzer.html) to implement fuzz testing for OSS-Fuzz - [**Material for MkDocs**](https://squidfunk.github.io/mkdocs-material/) for the style of the documentation site - [**MkDocs**](https://www.mkdocs.org) for the documentation site @@ -1794,12 +1773,6 @@ The library itself consists of a single header file licensed under the MIT licen - [**Probot**](https://probot.github.io) for automating maintainer tasks such as closing stale issues, requesting missing information, or detecting toxic comments. - [**Valgrind**](https://valgrind.org) to check for correct memory management - -## Projects using JSON for Modern C++ - -The library is currently used in Apple macOS Sierra-Monterey and iOS 10-15. I am not sure what they are using the library for, but I am happy that it runs on so many devices. - - ## Notes ### Character encoding @@ -1821,11 +1794,11 @@ This library does not support comments by default. It does so for three reasons: 1. Comments are not part of the [JSON specification](https://tools.ietf.org/html/rfc8259). You may argue that `//` or `/* */` are allowed in JavaScript, but JSON is not JavaScript. 2. This was not an oversight: Douglas Crockford [wrote on this](https://plus.google.com/118095276221607585885/posts/RK8qyGVaGSr) in May 2012: - - > I removed comments from JSON because I saw people were using them to hold parsing directives, a practice which would have destroyed interoperability. I know that the lack of comments makes some people sad, but it shouldn't. - - > Suppose you are using JSON to keep configuration files, which you would like to annotate. Go ahead and insert all the comments you like. Then pipe it through JSMin before handing it to your JSON parser. - + + > I removed comments from JSON because I saw people were using them to hold parsing directives, a practice which would have destroyed interoperability. I know that the lack of comments makes some people sad, but it shouldn't. + > + > Suppose you are using JSON to keep configuration files, which you would like to annotate. Go ahead and insert all the comments you like. Then pipe it through JSMin before handing it to your JSON parser. + 3. It is dangerous for interoperability if some libraries would add comment support while others don't. Please check [The Harmful Consequences of the Robustness Principle](https://tools.ietf.org/html/draft-iab-protocol-maintenance-01) on this. However, you can pass set parameter `ignore_comments` to true in the `parse` function to ignore `//` or `/* */` comments. Comments will then be treated as whitespace. @@ -1836,6 +1809,8 @@ By default, the library does not preserve the **insertion order of object elemen If you do want to preserve the insertion order, you can try the type [`nlohmann::ordered_json`](https://github.com/nlohmann/json/issues/2179). Alternatively, you can use a more sophisticated ordered map like [`tsl::ordered_map`](https://github.com/Tessil/ordered-map) ([integration](https://github.com/nlohmann/json/issues/546#issuecomment-304447518)) or [`nlohmann::fifo_map`](https://github.com/nlohmann/fifo_map) ([integration](https://github.com/nlohmann/json/issues/485#issuecomment-333652309)). +See the [**documentation on object order**](https://json.nlohmann.me/features/object_order/) for more information. + ### Memory Release We checked with Valgrind and the Address Sanitizer (ASAN) that there are no memory leaks. @@ -1847,21 +1822,21 @@ Here is a related issue [#1924](https://github.com/nlohmann/json/issues/1924). ### Further notes -- The code contains numerous debug **assertions** which can be switched off by defining the preprocessor macro `NDEBUG`, see the [documentation of `assert`](https://en.cppreference.com/w/cpp/error/assert). In particular, note [`operator[]`](https://json.nlohmann.me/api/basic_json/operator%5B%5D/) implements **unchecked access** for const objects: If the given key is not present, the behavior is undefined (think of a dereferenced null pointer) and yields an [assertion failure](https://github.com/nlohmann/json/issues/289) if assertions are switched on. If you are not sure whether an element in an object exists, use checked access with the [`at()` function](https://json.nlohmann.me/api/basic_json/at/). Furthermore, you can define `JSON_ASSERT(x)` to replace calls to `assert(x)`. +- The code contains numerous debug **assertions** which can be switched off by defining the preprocessor macro `NDEBUG`, see the [documentation of `assert`](https://en.cppreference.com/w/cpp/error/assert). In particular, note [`operator[]`](https://json.nlohmann.me/api/basic_json/operator%5B%5D/) implements **unchecked access** for const objects: If the given key is not present, the behavior is undefined (think of a dereferenced null pointer) and yields an [assertion failure](https://github.com/nlohmann/json/issues/289) if assertions are switched on. If you are not sure whether an element in an object exists, use checked access with the [`at()` function](https://json.nlohmann.me/api/basic_json/at/). Furthermore, you can define `JSON_ASSERT(x)` to replace calls to `assert(x)`. See the [**documentation on runtime assertions**](https://json.nlohmann.me/features/assertions/) for more information. - As the exact number type is not defined in the [JSON specification](https://tools.ietf.org/html/rfc8259.html), this library tries to choose the best fitting C++ number type automatically. As a result, the type `double` may be used to store numbers which may yield [**floating-point exceptions**](https://github.com/nlohmann/json/issues/181) in certain rare situations if floating-point exceptions have been unmasked in the calling code. These exceptions are not caused by the library and need to be fixed in the calling code, such as by re-masking the exceptions prior to calling library functions. - The code can be compiled without C++ **runtime type identification** features; that is, you can use the `-fno-rtti` compiler flag. -- **Exceptions** are used widely within the library. They can, however, be switched off with either using the compiler flag `-fno-exceptions` or by defining the symbol `JSON_NOEXCEPTION`. In this case, exceptions are replaced by `abort()` calls. You can further control this behavior by defining `JSON_THROW_USER` (overriding `throw`), `JSON_TRY_USER` (overriding `try`), and `JSON_CATCH_USER` (overriding `catch`). Note that `JSON_THROW_USER` should leave the current scope (e.g., by throwing or aborting), as continuing after it may yield undefined behavior. Note the explanatory [`what()`](https://en.cppreference.com/w/cpp/error/exception/what) string of exceptions is not available for MSVC if exceptions are disabled, see [#2824](https://github.com/nlohmann/json/discussions/2824). +- **Exceptions** are used widely within the library. They can, however, be switched off with either using the compiler flag `-fno-exceptions` or by defining the symbol `JSON_NOEXCEPTION`. In this case, exceptions are replaced by `abort()` calls. You can further control this behavior by defining `JSON_THROW_USER` (overriding `throw`), `JSON_TRY_USER` (overriding `try`), and `JSON_CATCH_USER` (overriding `catch`). Note that `JSON_THROW_USER` should leave the current scope (e.g., by throwing or aborting), as continuing after it may yield undefined behavior. Note the explanatory [`what()`](https://en.cppreference.com/w/cpp/error/exception/what) string of exceptions is not available for MSVC if exceptions are disabled, see [#2824](https://github.com/nlohmann/json/discussions/2824). See the [**documentation of exceptions**](https://json.nlohmann.me/home/exceptions/) for more information. ## Execute unit tests To compile and run the tests, you need to execute -```sh -$ mkdir build -$ cd build -$ cmake .. -DJSON_BuildTests=On -$ cmake --build . -$ ctest --output-on-failure +```shell +mkdir build +cd build +cmake .. -DJSON_BuildTests=On +cmake --build . +ctest --output-on-failure ``` Note that during the `ctest` stage, several JSON test files are downloaded from an [external repository](https://github.com/nlohmann/json_test_data). If policies forbid downloading artifacts during testing, you can download the files yourself and pass the directory with the test files via `-DJSON_TestDataDirectory=path` to CMake. Then, no Internet connectivity is required. See [issue #2189](https://github.com/nlohmann/json/issues/2189) for more information. @@ -1884,8 +1859,8 @@ json/tests/src/make_test_data_available.hpp:23: FATAL ERROR: REQUIRE( utils::che In case you have downloaded the library rather than checked out the code via Git, test `cmake_fetch_content_configure` will fail. Please execute `ctest -LE git_required` to skip these tests. See [issue #2189](https://github.com/nlohmann/json/issues/2189) for more information. -Some tests change the installed files and hence make the whole process not reproducible. Please execute `ctest -LE not_reproducible` to skip these tests. See [issue #2324](https://github.com/nlohmann/json/issues/2324) for more information. +Some tests change the installed files and hence make the whole process not reproducible. Please execute `ctest -LE not_reproducible` to skip these tests. See [issue #2324](https://github.com/nlohmann/json/issues/2324) for more information. Furthermore, assertions must be switched off to ensure reproducible builds (see [discussion 4494](https://github.com/nlohmann/json/discussions/4494)). Note you need to call `cmake -LE "not_reproducible|git_required"` to exclude both labels. See [issue #2596](https://github.com/nlohmann/json/issues/2596) for more information. -As Intel compilers use unsafe floating point optimization by default, the unit tests may fail. Use flag [`/fp:precise`](https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/compiler-options/compiler-option-details/floating-point-options/fp-model-fp.html) then. +As Intel compilers use unsafe floating point optimization by default, the unit tests may fail. Use flag [`/fp:precise`](https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/fp-model-fp.html) then. diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/adl_serializer.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/adl_serializer.hpp index 56a606c0f6a1..5df1af3de914 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/adl_serializer.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/adl_serializer.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/byte_container_with_subtype.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/byte_container_with_subtype.hpp index 91382cd682d1..d6398e6266f0 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/byte_container_with_subtype.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/byte_container_with_subtype.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/abi_macros.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/abi_macros.hpp index f48b9eb1d5f0..76cf336fcd14 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/abi_macros.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/abi_macros.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -12,20 +12,24 @@ #ifndef JSON_SKIP_LIBRARY_VERSION_CHECK #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) - #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0 #warning "Already included a different version of the library!" #endif #endif #endif #define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum) #ifndef JSON_DIAGNOSTICS #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -36,6 +40,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -47,14 +57,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/from_json.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/from_json.hpp index aa2f0cbf4cde..d647d742399a 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/from_json.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/from_json.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -13,6 +13,9 @@ #include // forward_list #include // inserter, front_inserter, end #include // map +#ifdef JSON_HAS_CPP_17 + #include // optional +#endif #include // string #include // tuple, make_tuple #include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible @@ -43,6 +46,24 @@ inline void from_json(const BasicJsonType& j, typename std::nullptr_t& n) n = nullptr; } +#ifdef JSON_HAS_CPP_17 +#ifndef JSON_USE_IMPLICIT_CONVERSIONS +template +void from_json(const BasicJsonType& j, std::optional& opt) +{ + if (j.is_null()) + { + opt = std::nullopt; + } + else + { + opt.emplace(j.template get()); + } +} + +#endif // JSON_USE_IMPLICIT_CONVERSIONS +#endif // JSON_HAS_CPP_17 + // overloads for basic_json template parameters template < typename BasicJsonType, typename ArithmeticType, enable_if_t < std::is_arithmetic::value&& @@ -190,6 +211,54 @@ auto from_json(const BasicJsonType& j, T (&arr)[N]) // NOLINT(cppcoreguidelines } } +template +auto from_json(const BasicJsonType& j, T (&arr)[N1][N2]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +-> decltype(j.template get(), void()) +{ + for (std::size_t i1 = 0; i1 < N1; ++i1) + { + for (std::size_t i2 = 0; i2 < N2; ++i2) + { + arr[i1][i2] = j.at(i1).at(i2).template get(); + } + } +} + +template +auto from_json(const BasicJsonType& j, T (&arr)[N1][N2][N3]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +-> decltype(j.template get(), void()) +{ + for (std::size_t i1 = 0; i1 < N1; ++i1) + { + for (std::size_t i2 = 0; i2 < N2; ++i2) + { + for (std::size_t i3 = 0; i3 < N3; ++i3) + { + arr[i1][i2][i3] = j.at(i1).at(i2).at(i3).template get(); + } + } + } +} + +template +auto from_json(const BasicJsonType& j, T (&arr)[N1][N2][N3][N4]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +-> decltype(j.template get(), void()) +{ + for (std::size_t i1 = 0; i1 < N1; ++i1) + { + for (std::size_t i2 = 0; i2 < N2; ++i2) + { + for (std::size_t i3 = 0; i3 < N3; ++i3) + { + for (std::size_t i4 = 0; i4 < N4; ++i4) + { + arr[i1][i2][i3][i4] = j.at(i1).at(i2).at(i3).at(i4).template get(); + } + } + } + } +} + template inline void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/) { @@ -275,7 +344,7 @@ void()) template < typename BasicJsonType, typename T, std::size_t... Idx > std::array from_json_inplace_array_impl(BasicJsonType&& j, - identity_tag> /*unused*/, index_sequence /*unused*/) + identity_tag> /*unused*/, index_sequence /*unused*/) { return { { std::forward(j).at(Idx).template get()... } }; } @@ -379,6 +448,12 @@ std::tuple from_json_tuple_impl_base(BasicJsonType&& j, index_sequence< return std::make_tuple(std::forward(j).at(Idx).template get()...); } +template +std::tuple<> from_json_tuple_impl_base(BasicJsonType& /*unused*/, index_sequence<> /*unused*/) +{ + return {}; +} + template < typename BasicJsonType, class A1, class A2 > std::pair from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<0> /*unused*/) { @@ -464,7 +539,12 @@ inline void from_json(const BasicJsonType& j, std_fs::path& p) { JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j)); } - p = *j.template get_ptr(); + const auto& s = *j.template get_ptr(); +#ifdef JSON_HAS_CPP_20 + p = std_fs::path(std::u8string_view(reinterpret_cast(s.data()), s.size())); +#else + p = std_fs::u8path(s); // accepts UTF-8 encoded std::string in C++17, deprecated in C++20 +#endif } #endif diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_chars.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_chars.hpp index e10741c9232c..743104174dc0 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_chars.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_chars.hpp @@ -1,10 +1,10 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // // SPDX-FileCopyrightText: 2009 Florian Loitsch -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -239,10 +239,10 @@ boundaries compute_boundaries(FloatType value) // v- m- v m+ v+ const bool lower_boundary_is_closer = F == 0 && E > 1; - const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_plus = diyfp((2 * v.f) + 1, v.e - 1); const diyfp m_minus = lower_boundary_is_closer - ? diyfp(4 * v.f - 1, v.e - 2) // (B) - : diyfp(2 * v.f - 1, v.e - 1); // (A) + ? diyfp((4 * v.f) - 1, v.e - 2) // (B) + : diyfp((2 * v.f) - 1, v.e - 1); // (A) // Determine the normalized w+ = m+. const diyfp w_plus = diyfp::normalize(m_plus); @@ -472,7 +472,7 @@ inline cached_power get_cached_power_for_binary_exponent(int e) JSON_ASSERT(e >= -1500); JSON_ASSERT(e <= 1500); const int f = kAlpha - e - 1; - const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); + const int k = ((f * 78913) / (1 << 18)) + static_cast(f > 0); const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep; JSON_ASSERT(index >= 0); @@ -950,15 +950,15 @@ inline char* append_exponent(char* buf, int e) } else if (k < 100) { - *buf++ = static_cast('0' + k / 10); + *buf++ = static_cast('0' + (k / 10)); k %= 10; *buf++ = static_cast('0' + k); } else { - *buf++ = static_cast('0' + k / 100); + *buf++ = static_cast('0' + (k / 100)); k %= 100; - *buf++ = static_cast('0' + k / 10); + *buf++ = static_cast('0' + (k / 10)); k %= 10; *buf++ = static_cast('0' + k); } diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_json.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_json.hpp index e39b7797dd2c..ead45665f1be 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_json.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/conversions/to_json.hpp @@ -1,13 +1,18 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once +#include // JSON_HAS_CPP_17 +#ifdef JSON_HAS_CPP_17 + #include // optional +#endif + #include // copy #include // begin, end #include // string @@ -18,7 +23,6 @@ #include // vector #include -#include #include #include #include @@ -260,6 +264,22 @@ struct external_constructor // to_json // ///////////// +#ifdef JSON_HAS_CPP_17 +template::value, int> = 0> +void to_json(BasicJsonType& j, const std::optional& opt) +{ + if (opt.has_value()) + { + j = *opt; + } + else + { + j = nullptr; + } +} +#endif + template::value, int> = 0> inline void to_json(BasicJsonType& j, T b) noexcept @@ -320,7 +340,8 @@ template::type; - external_constructor::construct(j, static_cast(e)); + static constexpr value_t integral_value_t = std::is_unsigned::value ? value_t::number_unsigned : value_t::number_integer; + external_constructor::construct(j, static_cast(e)); } #endif // JSON_DISABLE_ENUM_SERIALIZATION @@ -405,6 +426,13 @@ inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence< j = { std::get(t)... }; } +template +inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& /*unused*/, index_sequence<> /*unused*/) +{ + using array_t = typename BasicJsonType::array_t; + j = array_t(); +} + template::value, int > = 0> inline void to_json(BasicJsonType& j, const T& t) { @@ -415,7 +443,12 @@ inline void to_json(BasicJsonType& j, const T& t) template inline void to_json(BasicJsonType& j, const std_fs::path& p) { - j = p.string(); +#ifdef JSON_HAS_CPP_20 + const std::u8string s = p.u8string(); + j = std::string(s.begin(), s.end()); +#else + j = p.u8string(); // returns std::string in C++17 +#endif } #endif diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/exceptions.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/exceptions.hpp index 5974d7be2b47..5ebfb657876e 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/exceptions.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/exceptions.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -25,6 +25,18 @@ #include #include +// With -Wweak-vtables, Clang will complain about the exception classes as they +// have no out-of-line virtual method definitions and their vtable will be +// emitted in every translation unit. This issue cannot be fixed with a +// header-only library as there is no implementation file to move these +// functions to. As a result, we suppress this warning here to avoid client +// code to stumble over this. See https://github.com/nlohmann/json/issues/4087 +// for a discussion. +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wweak-vtables" +#endif + NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { @@ -119,16 +131,34 @@ class exception : public std::exception { return concat(a, '/', detail::escape(b)); }); - return concat('(', str, ") "); + + return concat('(', str, ") ", get_byte_positions(leaf_element)); #else - static_cast(leaf_element); - return ""; + return get_byte_positions(leaf_element); #endif } private: /// an exception object as storage for error messages std::runtime_error m; +#if JSON_DIAGNOSTIC_POSITIONS + template + static std::string get_byte_positions(const BasicJsonType* leaf_element) + { + if ((leaf_element->start_pos() != std::string::npos) && (leaf_element->end_pos() != std::string::npos)) + { + return concat("(bytes ", std::to_string(leaf_element->start_pos()), "-", std::to_string(leaf_element->end_pos()), ") "); + } + return ""; + } +#else + template + static std::string get_byte_positions(const BasicJsonType* leaf_element) + { + static_cast(leaf_element); + return ""; + } +#endif }; /// @brief exception indicating a parse error @@ -255,3 +285,7 @@ class other_error : public exception } // namespace detail NLOHMANN_JSON_NAMESPACE_END + +#if defined(__clang__) + #pragma clang diagnostic pop +#endif diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/hash.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/hash.hpp index 4464e8e67bb0..973943ec2668 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/hash.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/hash.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/binary_reader.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/binary_reader.hpp index a6e100e7611e..2120cf9a453e 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/binary_reader.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/binary_reader.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -20,6 +20,9 @@ #include // char_traits, string #include // make_pair, move #include // vector +#ifdef __cpp_lib_byteswap + #include //byteswap +#endif #include #include @@ -62,7 +65,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -169,7 +172,7 @@ class binary_reader std::int32_t document_size{}; get_number(input_format_t::bson, document_size); - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) { return false; } @@ -325,6 +328,12 @@ class binary_reader return get_number(input_format_t::bson, value) && sax->number_integer(value); } + case 0x11: // uint64 + { + std::uint64_t value{}; + return get_number(input_format_t::bson, value) && sax->number_unsigned(value); + } + default: // anything else not supported (yet) { std::array cr{{}}; @@ -391,7 +400,7 @@ class binary_reader std::int32_t document_size{}; get_number(input_format_t::bson, document_size); - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) { return false; } @@ -651,7 +660,7 @@ class binary_reader } case 0x9F: // array (indefinite length) - return get_cbor_array(static_cast(-1), tag_handler); + return get_cbor_array(detail::unknown_size(), tag_handler); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -705,7 +714,7 @@ class binary_reader } case 0xBF: // map (indefinite length) - return get_cbor_object(static_cast(-1), tag_handler); + return get_cbor_object(detail::unknown_size(), tag_handler); case 0xC6: // tagged item case 0xC7: @@ -1093,7 +1102,7 @@ class binary_reader } /*! - @param[in] len the length of the array or static_cast(-1) for an + @param[in] len the length of the array or detail::unknown_size() for an array of indefinite size @param[in] tag_handler how CBOR tags should be treated @return whether array creation completed @@ -1106,7 +1115,7 @@ class binary_reader return false; } - if (len != static_cast(-1)) + if (len != detail::unknown_size()) { for (std::size_t i = 0; i < len; ++i) { @@ -1131,7 +1140,7 @@ class binary_reader } /*! - @param[in] len the length of the object or static_cast(-1) for an + @param[in] len the length of the object or detail::unknown_size() for an object of indefinite size @param[in] tag_handler how CBOR tags should be treated @return whether object creation completed @@ -1147,7 +1156,7 @@ class binary_reader if (len != 0) { string_t key; - if (len != static_cast(-1)) + if (len != detail::unknown_size()) { for (std::size_t i = 0; i < len; ++i) { @@ -2310,6 +2319,16 @@ class binary_reader case 'Z': // null return sax->null(); + case 'B': // byte + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint8_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + case 'U': { std::uint8_t number{}; @@ -2510,7 +2529,7 @@ class binary_reader return false; } - if (size_and_type.second == 'C') + if (size_and_type.second == 'C' || size_and_type.second == 'B') { size_and_type.second = 'U'; } @@ -2532,6 +2551,13 @@ class binary_reader return (sax->end_array() && sax->end_object()); } + // If BJData type marker is 'B' decode as binary + if (input_format == input_format_t::bjdata && size_and_type.first != npos && size_and_type.second == 'B') + { + binary_t result; + return get_binary(input_format, size_and_type.first, result) && sax->binary(result); + } + if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) @@ -2565,7 +2591,7 @@ class binary_reader } else { - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) { return false; } @@ -2643,7 +2669,7 @@ class binary_reader } else { - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) { return false; } @@ -2754,6 +2780,29 @@ class binary_reader return current = ia.get_character(); } + /*! + @brief get_to read into a primitive type + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns false instead + + @return bool, whether the read was successful + */ + template + bool get_to(T& dest, const input_format_t format, const char* context) + { + auto new_chars_read = ia.get_elements(&dest); + chars_read += new_chars_read; + if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T))) + { + // in case of failure, advance position by 1 to report failing location + ++chars_read; + sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); + return false; + } + return true; + } + /*! @return character read from the input after ignoring all 'N' entries */ @@ -2768,6 +2817,28 @@ class binary_reader return current; } + template + static void byte_swap(NumberType& number) + { + constexpr std::size_t sz = sizeof(number); +#ifdef __cpp_lib_byteswap + if constexpr (sz == 1) + { + return; + } + if constexpr(std::is_integral_v) + { + number = std::byteswap(number); + return; + } +#endif + auto* ptr = reinterpret_cast(&number); + for (std::size_t i = 0; i < sz / 2; ++i) + { + std::swap(ptr[i], ptr[sz - i - 1]); + } + } + /* @brief read a number from the input @@ -2786,29 +2857,16 @@ class binary_reader template bool get_number(const input_format_t format, NumberType& result) { - // step 1: read input into array with system's byte order - std::array vec{}; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number"))) - { - return false; - } + // read in the original format - // reverse byte order prior to conversion if necessary - if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } + if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number"))) + { + return false; + } + if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) + { + byte_swap(result); } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); return true; } @@ -2947,7 +3005,7 @@ class binary_reader } private: - static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast(-1); + static JSON_INLINE_VARIABLE constexpr std::size_t npos = detail::unknown_size(); /// input adapter InputAdapterType ia; @@ -2973,6 +3031,7 @@ class binary_reader #define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \ make_array( \ + bjd_type{'B', "byte"}, \ bjd_type{'C', "char"}, \ bjd_type{'D', "double"}, \ bjd_type{'I', "int16"}, \ diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/input_adapters.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/input_adapters.hpp index 33fca3e4b932..1affd619eab0 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/input_adapters.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/input_adapters.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -23,6 +23,7 @@ #include // istream #endif // JSON_NO_IO +#include #include #include #include @@ -67,6 +68,13 @@ class file_input_adapter return std::fgetc(m_file); } + // returns the number of characters successfully read + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + return fread(dest, 1, sizeof(T) * count, m_file); + } + private: /// the file pointer to read from std::FILE* m_file; @@ -126,6 +134,17 @@ class input_stream_adapter return res; } + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto res = static_cast(sb->sgetn(reinterpret_cast(dest), static_cast(count * sizeof(T)))); + if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T))) + { + is->clear(is->rdstate() | std::ios::eofbit); + } + return res; + } + private: /// the associated input stream std::istream* is = nullptr; @@ -157,6 +176,26 @@ class iterator_input_adapter return char_traits::eof(); } + // for general iterators, we cannot really do something better than falling back to processing the range one-by-one + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto* ptr = reinterpret_cast(dest); + for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index) + { + if (JSON_HEDLEY_LIKELY(current != end)) + { + ptr[read_index] = static_cast(*current); + std::advance(current, 1); + } + else + { + return read_index; + } + } + return count * sizeof(T); + } + private: IteratorType current; IteratorType end; @@ -320,6 +359,13 @@ class wide_string_input_adapter return utf8_bytes[utf8_bytes_index++]; } + // parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here + template + std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1) + { + JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr)); + } + private: BaseInputAdapter base_adapter; @@ -416,10 +462,17 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval())); + #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) { + if (file == nullptr) + { + JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + } return file_input_adapter(file); } @@ -446,9 +499,13 @@ template < typename CharT, int >::type = 0 > contiguous_bytes_input_adapter input_adapter(CharT b) { + if (b == nullptr) + { + JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + } auto length = std::strlen(reinterpret_cast(b)); const auto* ptr = reinterpret_cast(b); - return input_adapter(ptr, ptr + length); + return input_adapter(ptr, ptr + length); // cppcheck-suppress[nullPointerArithmeticRedundantCheck] } template diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/json_sax.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/json_sax.hpp index c772521cd843..0ff877893452 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/json_sax.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/json_sax.hpp @@ -1,22 +1,23 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once #include #include // string +#include // enable_if_t #include // move #include // vector #include +#include #include #include - NLOHMANN_JSON_NAMESPACE_BEGIN /*! @@ -144,6 +145,11 @@ struct json_sax namespace detail { +constexpr std::size_t unknown_size() +{ + return (std::numeric_limits::max)(); +} + /*! @brief SAX implementation to create a JSON value from SAX events @@ -157,7 +163,7 @@ constructor contains the parsed value. @tparam BasicJsonType the JSON type */ -template +template class json_sax_dom_parser { public: @@ -166,14 +172,15 @@ class json_sax_dom_parser using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; /*! @param[in,out] r reference to a JSON value that is manipulated while parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) {} // make class move-only @@ -229,7 +236,18 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); } @@ -252,6 +270,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -261,7 +287,16 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); } @@ -274,6 +309,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_array()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -298,6 +341,75 @@ class json_sax_dom_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -311,6 +423,11 @@ class json_sax_dom_parser if (ref_stack.empty()) { root = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif + return &root; } @@ -319,12 +436,22 @@ class json_sax_dom_parser if (ref_stack.back()->is_array()) { ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif + return &(ref_stack.back()->m_data.m_value.array->back()); } JSON_ASSERT(ref_stack.back()->is_object()); JSON_ASSERT(object_element); *object_element = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif + return object_element; } @@ -338,9 +465,11 @@ class json_sax_dom_parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; -template +template class json_sax_dom_callback_parser { public: @@ -351,11 +480,13 @@ class json_sax_dom_callback_parser using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; json_sax_dom_callback_parser(BasicJsonType& r, - const parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(cb), allow_exceptions(allow_exceptions_) + parser_callback_t cb, + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) { keep_stack.push_back(true); } @@ -418,12 +549,26 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + } return true; } @@ -452,9 +597,23 @@ class json_sax_dom_callback_parser { // discard object *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } else { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } } @@ -488,10 +647,25 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } } return true; @@ -506,12 +680,26 @@ class json_sax_dom_callback_parser keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); if (keep) { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } else { // discard array *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } } @@ -548,6 +736,71 @@ class json_sax_dom_callback_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @param[in] v value to add to the JSON value we build during parsing @param[in] skip_callback whether we should skip calling the callback @@ -578,6 +831,10 @@ class json_sax_dom_callback_parser // create value auto value = BasicJsonType(std::forward(v)); +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif + // check callback const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); @@ -632,9 +889,9 @@ class json_sax_dom_callback_parser /// stack to model hierarchy of values std::vector ref_stack {}; /// stack to manage which values to keep - std::vector keep_stack {}; + std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) /// stack to manage which object keys to keep - std::vector key_keep_stack {}; + std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) /// helper to hold the reference for the next object element BasicJsonType* object_element = nullptr; /// whether a syntax error occurred @@ -645,6 +902,8 @@ class json_sax_dom_callback_parser const bool allow_exceptions = true; /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; template @@ -692,7 +951,7 @@ class json_sax_acceptor return true; } - bool start_object(std::size_t /*unused*/ = static_cast(-1)) + bool start_object(std::size_t /*unused*/ = detail::unknown_size()) { return true; } @@ -707,7 +966,7 @@ class json_sax_acceptor return true; } - bool start_array(std::size_t /*unused*/ = static_cast(-1)) + bool start_array(std::size_t /*unused*/ = detail::unknown_size()) { return true; } diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/lexer.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/lexer.hpp index 4b3bf77d6247..2b4e80a4859c 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/lexer.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/lexer.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -967,7 +967,7 @@ class lexer : public lexer_base locale's decimal point is used instead of `.` to work with the locale-dependent converters. */ - token_type scan_number() // lgtm [cpp/use-of-goto] + token_type scan_number() // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated. { // reset token_buffer to store the number's bytes reset(); @@ -1049,6 +1049,7 @@ class lexer : public lexer_base case '.': { add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; goto scan_number_decimal1; } @@ -1085,6 +1086,7 @@ class lexer : public lexer_base case '.': { add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; goto scan_number_decimal1; } @@ -1245,7 +1247,7 @@ class lexer : public lexer_base // we are done scanning a number) unget(); - char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* endptr = nullptr; // NOLINT(misc-const-correctness,cppcoreguidelines-pro-type-vararg,hicpp-vararg) errno = 0; // try to parse integers first and fall back to floats @@ -1256,7 +1258,7 @@ class lexer : public lexer_base // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - if (errno == 0) + if (errno != ERANGE) { value_unsigned = static_cast(x); if (value_unsigned == x) @@ -1272,7 +1274,7 @@ class lexer : public lexer_base // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - if (errno == 0) + if (errno != ERANGE) { value_integer = static_cast(x); if (value_integer == x) @@ -1322,6 +1324,7 @@ class lexer : public lexer_base { token_buffer.clear(); token_string.clear(); + decimal_point_position = std::string::npos; token_string.push_back(char_traits::to_char_type(current)); } @@ -1430,6 +1433,11 @@ class lexer : public lexer_base /// return current string value (implicitly resets the token; useful only once) string_t& get_string() { + // translate decimal points from locale back to '.' (#4084) + if (decimal_point_char != '.' && decimal_point_position != std::string::npos) + { + token_buffer[decimal_point_position] = '.'; + } return token_buffer; } @@ -1627,6 +1635,8 @@ class lexer : public lexer_base /// the decimal point const char_int_type decimal_point_char = '.'; + /// the position of the decimal point in the input + std::size_t decimal_point_position = std::string::npos; }; } // namespace detail diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/parser.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/parser.hpp index bdf85ba29267..b5a5a525c0bc 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/parser.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/parser.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -69,10 +69,10 @@ class parser public: /// a parser reading from an input adapter explicit parser(InputAdapterType&& adapter, - const parser_callback_t cb = nullptr, + parser_callback_t cb = nullptr, const bool allow_exceptions_ = true, const bool skip_comments = false) - : callback(cb) + : callback(std::move(cb)) , m_lexer(std::move(adapter), skip_comments) , allow_exceptions(allow_exceptions_) { @@ -94,7 +94,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -122,7 +122,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -194,7 +194,7 @@ class parser { case token_type::begin_object: { - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) { return false; } @@ -239,7 +239,7 @@ class parser case token_type::begin_array: { - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) { return false; } diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/position_t.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/position_t.hpp index 8ac7c78cfd33..c26c5f4348b2 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/position_t.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/input/position_t.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/internal_iterator.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/internal_iterator.hpp index 2991ee6930c8..9f3c8e6b8f46 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/internal_iterator.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/internal_iterator.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iter_impl.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iter_impl.hpp index 44470913477d..45864e8c350b 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iter_impl.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iter_impl.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -463,7 +463,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. */ template < typename IterImpl, detail::enable_if_t < (std::is_same::value || std::is_same::value), std::nullptr_t > = nullptr > bool operator==(const IterImpl& other) const @@ -474,7 +474,11 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object)); } - JSON_ASSERT(m_object != nullptr); + // value-initialized forward iterators can be compared, and must compare equal to other value-initialized iterators of the same type #4493 + if (m_object == nullptr) + { + return true; + } switch (m_object->m_data.m_type) { @@ -499,7 +503,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: not equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. */ template < typename IterImpl, detail::enable_if_t < (std::is_same::value || std::is_same::value), std::nullptr_t > = nullptr > bool operator!=(const IterImpl& other) const @@ -509,7 +513,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: smaller - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. */ bool operator<(const iter_impl& other) const { @@ -519,7 +523,12 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object)); } - JSON_ASSERT(m_object != nullptr); + // value-initialized forward iterators can be compared, and must compare equal to other value-initialized iterators of the same type #4493 + if (m_object == nullptr) + { + // the iterators are both value-initialized and are to be considered equal, but this function checks for smaller, so we return false + return false; + } switch (m_object->m_data.m_type) { @@ -544,7 +553,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: less than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. */ bool operator<=(const iter_impl& other) const { @@ -553,7 +562,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: greater than - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. */ bool operator>(const iter_impl& other) const { @@ -562,7 +571,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: greater than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. + @pre (1) The iterator is initialized; i.e. `m_object != nullptr`, or (2) both iterators are value-initialized. */ bool operator>=(const iter_impl& other) const { diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iteration_proxy.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iteration_proxy.hpp index 76293de22701..78e0d8b036a3 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iteration_proxy.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iteration_proxy.hpp @@ -1,16 +1,15 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once #include // size_t -#include // input_iterator_tag -#include // string, to_string +#include // forward_iterator_tag #include // tuple_size, get, tuple_element #include // move @@ -20,19 +19,13 @@ #include #include +#include #include NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { -template -void int_to_string( string_type& target, std::size_t value ) -{ - // For ADL - using std::to_string; - target = to_string(value); -} template class iteration_proxy_value { public: @@ -40,7 +33,7 @@ template class iteration_proxy_value using value_type = iteration_proxy_value; using pointer = value_type *; using reference = value_type &; - using iterator_category = std::input_iterator_tag; + using iterator_category = std::forward_iterator_tag; using string_type = typename std::remove_cv< typename std::remove_reference().key() ) >::type >::type; private: @@ -220,7 +213,7 @@ namespace std #endif template class tuple_size<::nlohmann::detail::iteration_proxy_value> // NOLINT(cert-dcl58-cpp) - : public std::integral_constant {}; + : public std::integral_constant {}; template class tuple_element> // NOLINT(cert-dcl58-cpp) diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iterator_traits.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iterator_traits.hpp index 84cc27a85ea5..5ca92a5e8c53 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iterator_traits.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/iterator_traits.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -43,7 +43,7 @@ struct iterator_traits template struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types + : iterator_types { }; diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/json_reverse_iterator.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/json_reverse_iterator.hpp index 006d5499ad59..f979d8533dae 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/json_reverse_iterator.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/json_reverse_iterator.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/primitive_iterator.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/primitive_iterator.hpp index 0b6e8499e605..3a238349be05 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/primitive_iterator.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/iterators/primitive_iterator.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_custom_base_class.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_custom_base_class.hpp index d1e29162a37b..91e0c80bc079 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_custom_base_class.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_custom_base_class.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_pointer.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_pointer.hpp index 4fdcd9ad2824..1f764979e3ad 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_pointer.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_pointer.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -228,7 +228,7 @@ class json_pointer } const char* p = s.c_str(); - char* p_end = nullptr; + char* p_end = nullptr; // NOLINT(misc-const-correctness) errno = 0; // strtoull doesn't reset errno const unsigned long long res = std::strtoull(p, &p_end, 10); // NOLINT(runtime/int) if (p == p_end // invalid input or empty string @@ -750,7 +750,7 @@ class json_pointer // iterate array and use index as reference string for (std::size_t i = 0; i < value.m_data.m_value.array->size(); ++i) { - flatten(detail::concat(reference_string, '/', std::to_string(i)), + flatten(detail::concat(reference_string, '/', std::to_string(i)), value.m_data.m_value.array->operator[](i), result); } } @@ -769,7 +769,7 @@ class json_pointer // iterate object and use keys as reference string for (const auto& element : *value.m_data.m_value.object) { - flatten(detail::concat(reference_string, '/', detail::escape(element.first)), element.second, result); + flatten(detail::concat(reference_string, '/', detail::escape(element.first)), element.second, result); } } break; diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_ref.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_ref.hpp index b8bb6a76b067..a9a68d9b346e 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_ref.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/json_ref.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_scope.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_scope.hpp index 97127a646279..d1c6b1be3ab4 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_scope.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_scope.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -32,15 +32,20 @@ // C++ language standard detection // if the user manually specified the used c++ version this is skipped -#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) - #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#if !defined(JSON_HAS_CPP_23) && !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus > 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L) + #define JSON_HAS_CPP_23 #define JSON_HAS_CPP_20 #define JSON_HAS_CPP_17 #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #elif (defined(__cplusplus) && __cplusplus > 201703L) || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus > 201402L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 #define JSON_HAS_CPP_17 #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #elif (defined(__cplusplus) && __cplusplus > 201103L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) #define JSON_HAS_CPP_14 #endif // the cpp 11 flag is always specified because it is the minimal required version @@ -216,7 +221,9 @@ template \ inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ { \ + /* NOLINTNEXTLINE(modernize-type-traits) we use C++11 */ \ static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + /* NOLINTNEXTLINE(modernize-avoid-c-arrays) we don't want to depend on */ \ static const std::pair m[] = __VA_ARGS__; \ auto it = std::find_if(std::begin(m), std::end(m), \ [e](const std::pair& ej_pair) -> bool \ @@ -228,7 +235,9 @@ template \ inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ { \ + /* NOLINTNEXTLINE(modernize-type-traits) we use C++11 */ \ static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + /* NOLINTNEXTLINE(modernize-avoid-c-arrays) we don't want to depend on */ \ static const std::pair m[] = __VA_ARGS__; \ auto it = std::find_if(std::begin(m), std::end(m), \ [&j](const std::pair& ej_pair) -> bool \ @@ -391,42 +400,146 @@ #define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; #define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); -#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = !nlohmann_json_j.is_null() ? nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1) : nlohmann_json_default_obj.v1; /*! @brief macro @def NLOHMANN_DEFINE_TYPE_INTRUSIVE @since version 3.9.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ */ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT +@since version 3.11.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ +*/ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE +@since version 3.11.3 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ +*/ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } /*! @brief macro @def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE @since version 3.9.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ */ #define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT +@since version 3.11.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE +@since version 3.11.3 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ +*/ #define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(Type, BaseType, ...) \ + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT(Type, BaseType, ...) \ + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + template::value, int> = 0> \ + friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE(Type, BaseType, ...) \ + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, BaseType, ...) \ + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + template::value, int> = 0> \ + void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE +@since version 3.12.0 +@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ +*/ +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + template::value, int> = 0> \ + void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } // inspired from https://stackoverflow.com/a/26745591 -// allows to call any std function as if (e.g. with begin): +// allows calling any std function as if (e.g., with begin): // using std::begin; begin(x); // // it allows using the detected idiom to retrieve the return type diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_unscope.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_unscope.hpp index c6620d1e2c10..2edb1686d3ff 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_unscope.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/macro_unscope.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -34,6 +34,7 @@ #undef JSON_HAS_CPP_14 #undef JSON_HAS_CPP_17 #undef JSON_HAS_CPP_20 + #undef JSON_HAS_CPP_23 #undef JSON_HAS_FILESYSTEM #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM #undef JSON_HAS_THREE_WAY_COMPARISON diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/begin.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/begin.hpp index 364cc89d8793..6e697b58e9c8 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/begin.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/begin.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/end.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/end.hpp index 463f07061b1d..4d279146dac5 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/end.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/call_std/end.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/cpp_future.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/cpp_future.hpp index 412b5aa74b4b..57811b985172 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/cpp_future.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/cpp_future.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-FileCopyrightText: 2018 The Abseil Authors // SPDX-License-Identifier: MIT @@ -162,7 +162,7 @@ struct static_const #endif template -inline constexpr std::array make_array(Args&& ... args) +constexpr std::array make_array(Args&& ... args) { return std::array {{static_cast(std::forward(args))...}}; } diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/detected.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/detected.hpp index 1db9bf9ca322..c394733e1b2f 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/detected.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/detected.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/identity_tag.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/identity_tag.hpp index 269deffb26f3..c39dabbacaa8 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/identity_tag.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/identity_tag.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/is_sax.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/is_sax.hpp index 4e02bc1480ee..2574f72b9380 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/is_sax.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/is_sax.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/std_fs.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/std_fs.hpp index e381a3a4013e..821da3a3da9c 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/std_fs.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/std_fs.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -19,7 +19,7 @@ namespace std_fs = std::experimental::filesystem; } // namespace detail NLOHMANN_JSON_NAMESPACE_END #elif JSON_HAS_FILESYSTEM -#include +#include // NOLINT(build/c++17) NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/type_traits.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/type_traits.hpp index e1b000dcc225..d834a0d56e19 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/type_traits.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/type_traits.hpp @@ -1,18 +1,18 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once #include // numeric_limits +#include // char_traits +#include // tuple #include // false_type, is_constructible, is_integral, is_same, true_type #include // declval -#include // tuple -#include // char_traits #include #include @@ -211,7 +211,7 @@ struct char_traits : std::char_traits static constexpr int_type eof() noexcept { - return static_cast(EOF); + return static_cast(std::char_traits::eof()); } }; @@ -235,7 +235,7 @@ struct char_traits : std::char_traits static constexpr int_type eof() noexcept { - return static_cast(EOF); + return static_cast(std::char_traits::eof()); } }; @@ -261,19 +261,19 @@ struct is_default_constructible : std::is_default_constructible {}; template struct is_default_constructible> - : conjunction, is_default_constructible> {}; + : conjunction, is_default_constructible> {}; template struct is_default_constructible> - : conjunction, is_default_constructible> {}; + : conjunction, is_default_constructible> {}; template struct is_default_constructible> - : conjunction...> {}; + : conjunction...> {}; template struct is_default_constructible> - : conjunction...> {}; + : conjunction...> {}; template struct is_constructible : std::is_constructible {}; @@ -471,8 +471,8 @@ is_detected::value&& // special case for types like std::filesystem::path whose iterator's value_type are themselves // c.f. https://github.com/nlohmann/json/pull/3073 !std::is_same>::value&& - is_complete_type < - detected_t>::value >> +is_complete_type < +detected_t>::value >> { using value_type = range_value_t; @@ -595,12 +595,12 @@ using is_usable_as_key_type = typename std::conditional < template> using is_usable_as_basic_json_key_type = typename std::conditional < - is_usable_as_key_type::value - && !is_json_iterator_of::value, - std::true_type, - std::false_type >::type; + is_usable_as_key_type::value + && !is_json_iterator_of::value, + std::true_type, + std::false_type >::type; template using detect_erase_with_key_type = decltype(std::declval().erase(std::declval())); @@ -734,7 +734,7 @@ struct value_in_range_of_impl1 }; template -inline constexpr bool value_in_range_of(T val) +constexpr bool value_in_range_of(T val) { return value_in_range_of_impl1::test(val); } @@ -750,7 +750,7 @@ namespace impl { template -inline constexpr bool is_c_string() +constexpr bool is_c_string() { using TUnExt = typename std::remove_extent::type; using TUnCVExt = typename std::remove_cv::type; @@ -778,7 +778,7 @@ namespace impl { template -inline constexpr bool is_transparent() +constexpr bool is_transparent() { return is_detected::value; } diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/void_t.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/void_t.hpp index 99615c7c5aba..258475c49ce6 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/void_t.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/meta/void_t.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/binary_writer.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/binary_writer.hpp index f475d57be800..f81fa54c7b1e 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/binary_writer.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/binary_writer.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -28,6 +28,13 @@ NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { +/// how to encode BJData +enum class bjdata_version_t +{ + draft2, + draft3, +}; + /////////////////// // binary writer // /////////////////// @@ -612,7 +619,7 @@ class binary_writer case value_t::binary: { // step 0: determine if the binary type has a set subtype to - // determine whether or not to use the ext or fixext types + // determine whether to use the ext or fixext types const bool use_ext = j.m_data.m_value.binary->has_subtype(); // step 1: write control byte and the byte string length @@ -735,11 +742,14 @@ class binary_writer @param[in] use_type whether to use '$' prefixes (optimized format) @param[in] add_prefix whether prefixes need to be used for this value @param[in] use_bjdata whether write in BJData format, default is false + @param[in] bjdata_version which BJData version to use, default is draft2 */ void write_ubjson(const BasicJsonType& j, const bool use_count, const bool use_type, const bool add_prefix = true, - const bool use_bjdata = false) + const bool use_bjdata = false, const bjdata_version_t bjdata_version = bjdata_version_t::draft2) { + const bool bjdata_draft3 = use_bjdata && bjdata_version == bjdata_version_t::draft3; + switch (j.type()) { case value_t::null: @@ -829,7 +839,7 @@ class binary_writer for (const auto& el : *j.m_data.m_value.array) { - write_ubjson(el, use_count, use_type, prefix_required, use_bjdata); + write_ubjson(el, use_count, use_type, prefix_required, use_bjdata, bjdata_version); } if (!use_count) @@ -847,11 +857,11 @@ class binary_writer oa->write_character(to_char_type('[')); } - if (use_type && !j.m_data.m_value.binary->empty()) + if (use_type && (bjdata_draft3 || !j.m_data.m_value.binary->empty())) { JSON_ASSERT(use_count); oa->write_character(to_char_type('$')); - oa->write_character('U'); + oa->write_character(bjdata_draft3 ? 'B' : 'U'); } if (use_count) @@ -870,7 +880,7 @@ class binary_writer { for (size_t i = 0; i < j.m_data.m_value.binary->size(); ++i) { - oa->write_character(to_char_type('U')); + oa->write_character(to_char_type(bjdata_draft3 ? 'B' : 'U')); oa->write_character(j.m_data.m_value.binary->data()[i]); } } @@ -887,7 +897,7 @@ class binary_writer { if (use_bjdata && j.m_data.m_value.object->size() == 3 && j.m_data.m_value.object->find("_ArrayType_") != j.m_data.m_value.object->end() && j.m_data.m_value.object->find("_ArraySize_") != j.m_data.m_value.object->end() && j.m_data.m_value.object->find("_ArrayData_") != j.m_data.m_value.object->end()) { - if (!write_bjdata_ndarray(*j.m_data.m_value.object, use_count, use_type)) // decode bjdata ndarray in the JData format (https://github.com/NeuroJSON/jdata) + if (!write_bjdata_ndarray(*j.m_data.m_value.object, use_count, use_type, bjdata_version)) // decode bjdata ndarray in the JData format (https://github.com/NeuroJSON/jdata) { break; } @@ -931,7 +941,7 @@ class binary_writer oa->write_characters( reinterpret_cast(el.first.c_str()), el.first.size()); - write_ubjson(el.second, use_count, use_type, prefix_required, use_bjdata); + write_ubjson(el.second, use_count, use_type, prefix_required, use_bjdata, bjdata_version); } if (!use_count) @@ -1087,7 +1097,8 @@ class binary_writer } else { - JSON_THROW(out_of_range::create(407, concat("integer number ", std::to_string(j.m_data.m_value.number_unsigned), " cannot be represented by BSON as it does not fit int64"), &j)); + write_bson_entry_header(name, 0x11 /* uint64 */); + write_number(static_cast(j.m_data.m_value.number_unsigned), true); } } @@ -1615,10 +1626,11 @@ class binary_writer /*! @return false if the object is successfully converted to a bjdata ndarray, true if the type or size is invalid */ - bool write_bjdata_ndarray(const typename BasicJsonType::object_t& value, const bool use_count, const bool use_type) + bool write_bjdata_ndarray(const typename BasicJsonType::object_t& value, const bool use_count, const bool use_type, const bjdata_version_t bjdata_version) { std::map bjdtype = {{"uint8", 'U'}, {"int8", 'i'}, {"uint16", 'u'}, {"int16", 'I'}, - {"uint32", 'm'}, {"int32", 'l'}, {"uint64", 'M'}, {"int64", 'L'}, {"single", 'd'}, {"double", 'D'}, {"char", 'C'} + {"uint32", 'm'}, {"int32", 'l'}, {"uint64", 'M'}, {"int64", 'L'}, {"single", 'd'}, {"double", 'D'}, + {"char", 'C'}, {"byte", 'B'} }; string_t key = "_ArrayType_"; @@ -1648,10 +1660,10 @@ class binary_writer oa->write_character('#'); key = "_ArraySize_"; - write_ubjson(value.at(key), use_count, use_type, true, true); + write_ubjson(value.at(key), use_count, use_type, true, true, bjdata_version); key = "_ArrayData_"; - if (dtype == 'U' || dtype == 'C') + if (dtype == 'U' || dtype == 'C' || dtype == 'B') { for (const auto& el : value.at(key)) { diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/output_adapters.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/output_adapters.hpp index 626f7c0c85e3..60806970203b 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/output_adapters.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/output_adapters.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/serializer.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/serializer.hpp index ed20b0d9e183..3137f3c3602c 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/serializer.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/output/serializer.hpp @@ -1,10 +1,10 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2008-2009 Björn Hoehrmann -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2008 - 2009 Björn Hoehrmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -643,7 +643,7 @@ class serializer @param[in] x unsigned integer number to count its digits @return number of decimal digits */ - inline unsigned int count_digits(number_unsigned_t x) noexcept + unsigned int count_digits(number_unsigned_t x) noexcept { unsigned int n_digits = 1; for (;;) @@ -926,7 +926,7 @@ class serializer ? (byte & 0x3fu) | (codep << 6u) : (0xFFu >> type) & (byte); - const std::size_t index = 256u + static_cast(state) * 16u + static_cast(type); + const std::size_t index = 256u + (static_cast(state) * 16u) + static_cast(type); JSON_ASSERT(index < utf8d.size()); state = utf8d[index]; return state; @@ -952,7 +952,7 @@ class serializer * absolute values of INT_MIN and INT_MAX are usually not the same. See * #1708 for details. */ - inline number_unsigned_t remove_sign(number_integer_t x) noexcept + number_unsigned_t remove_sign(number_integer_t x) noexcept { JSON_ASSERT(x < 0 && x < (std::numeric_limits::max)()); // NOLINT(misc-redundant-expression) return static_cast(-(x + 1)) + 1; diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_concat.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_concat.hpp index f49e8d215ee2..78569a8ad30b 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_concat.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_concat.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_escape.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_escape.hpp index 7f1b5c562bc3..7f0231819391 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_escape.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_escape.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_utils.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_utils.hpp new file mode 100644 index 000000000000..d8ecc0cba4c4 --- /dev/null +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/string_utils.hpp @@ -0,0 +1,37 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-License-Identifier: MIT + +#pragma once + +#include // size_t +#include // string, to_string + +#include + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +void int_to_string(StringType& target, std::size_t value) +{ + // For ADL + using std::to_string; + target = to_string(value); +} + +template +StringType to_string(std::size_t value) +{ + StringType result; + int_to_string(result, value); + return result; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/detail/value_t.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/detail/value_t.hpp index 07688fe8c682..4bd87abd27d8 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/detail/value_t.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/detail/value_t.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/json.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/json.hpp index 95d6bf1dd9d8..ed51cd544748 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/json.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/json.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT /****************************************************************************\ @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -114,9 +115,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -137,7 +138,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec ) { return ::nlohmann::detail::parser(std::move(adapter), - std::move(cb), allow_exceptions, ignore_comments); + std::move(cb), allow_exceptions, ignore_comments); } private: @@ -170,6 +171,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using error_handler_t = detail::error_handler_t; /// how to treat CBOR tags using cbor_tag_handler_t = detail::cbor_tag_handler_t; + /// how to encode BJData + using bjdata_version_t = detail::bjdata_version_t; /// helper type for initializer lists of basic_json values using initializer_list_t = std::initializer_list>; @@ -249,7 +252,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { basic_json result; - result["copyright"] = "(C) 2013-2023 Niels Lohmann"; + result["copyright"] = "(C) 2013-2025 Niels Lohmann"; result["name"] = "JSON for Modern C++"; result["url"] = "https://github.com/nlohmann/json"; result["version"]["string"] = @@ -514,7 +517,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec object = nullptr; // silence warning, see #821 if (JSON_HEDLEY_UNLIKELY(t == value_t::null)) { - JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.11.3", nullptr)); // LCOV_EXCL_LINE + JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.12.0", nullptr)); // LCOV_EXCL_LINE } break; } @@ -750,10 +753,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec return it; } - reference set_parent(reference j, std::size_t old_capacity = static_cast(-1)) + reference set_parent(reference j, std::size_t old_capacity = detail::unknown_size()) { #if JSON_DIAGNOSTICS - if (old_capacity != static_cast(-1)) + if (old_capacity != detail::unknown_size()) { // see https://github.com/nlohmann/json/issues/2838 JSON_ASSERT(type() == value_t::array); @@ -847,6 +850,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) +#if JSON_DIAGNOSTIC_POSITIONS + : start_position(val.start_pos()), + end_position(val.end_pos()) +#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -893,6 +900,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); + set_parents(); assert_invariant(); } @@ -1029,7 +1037,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class InputIT, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > - basic_json(InputIT first, InputIT last) + basic_json(InputIT first, InputIT last) // NOLINT(performance-unnecessary-value-param) { JSON_ASSERT(first.m_object != nullptr); JSON_ASSERT(last.m_object != nullptr); @@ -1144,6 +1152,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) + , end_position(other.end_position) +#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -1213,15 +1225,24 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), - m_data(std::move(other.m_data)) + m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check + , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check +#endif { // check that passed value is valid - other.assert_invariant(false); + other.assert_invariant(false); // cppcheck-suppress[accessForwarded] // invalidate payload other.m_data.m_type = value_t::null; other.m_data.m_value = {}; +#if JSON_DIAGNOSTIC_POSITIONS + other.start_position = std::string::npos; + other.end_position = std::string::npos; +#endif + set_parents(); assert_invariant(); } @@ -1242,6 +1263,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + +#if JSON_DIAGNOSTIC_POSITIONS + swap(start_position, other.start_position); + swap(end_position, other.end_position); +#endif + json_base_class_t::operator=(std::move(other)); set_parents(); @@ -1463,13 +1490,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// get a pointer to the value (integer number) number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept { - return is_number_integer() ? &m_data.m_value.number_integer : nullptr; + return m_data.m_type == value_t::number_integer ? &m_data.m_value.number_integer : nullptr; } /// get a pointer to the value (integer number) constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept { - return is_number_integer() ? &m_data.m_value.number_integer : nullptr; + return m_data.m_type == value_t::number_integer ? &m_data.m_value.number_integer : nullptr; } /// get a pointer to the value (unsigned number) @@ -1948,7 +1975,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { // create better exception explanation JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this)); - } + } // cppcheck-suppress[missingReturn] } else { @@ -1971,7 +1998,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { // create better exception explanation JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this)); - } + } // cppcheck-suppress[missingReturn] } else { @@ -2116,7 +2143,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief access specified object element /// @sa https://json.nlohmann.me/api/basic_json/operator%5B%5D/ - reference operator[](typename object_t::key_type key) + reference operator[](typename object_t::key_type key) // NOLINT(performance-unnecessary-value-param) { // implicitly convert null value to an empty object if (is_null()) @@ -2426,7 +2453,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class IteratorType, detail::enable_if_t < std::is_same::value || std::is_same::value, int > = 0 > - IteratorType erase(IteratorType pos) + IteratorType erase(IteratorType pos) // NOLINT(performance-unnecessary-value-param) { // make sure iterator fits the current value if (JSON_HEDLEY_UNLIKELY(this != pos.m_object)) @@ -2496,7 +2523,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class IteratorType, detail::enable_if_t < std::is_same::value || std::is_same::value, int > = 0 > - IteratorType erase(IteratorType first, IteratorType last) + IteratorType erase(IteratorType first, IteratorType last) // NOLINT(performance-unnecessary-value-param) { // make sure iterator fits the current value if (JSON_HEDLEY_UNLIKELY(this != first.m_object || this != last.m_object)) @@ -3263,7 +3290,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @note: This uses std::distance to support GCC 4.8, /// see https://github.com/nlohmann/json/pull/1257 template - iterator insert_iterator(const_iterator pos, Args&& ... args) + iterator insert_iterator(const_iterator pos, Args&& ... args) // NOLINT(performance-unnecessary-value-param) { iterator result(this); JSON_ASSERT(m_data.m_value.array != nullptr); @@ -3282,7 +3309,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, const basic_json& val) + iterator insert(const_iterator pos, const basic_json& val) // NOLINT(performance-unnecessary-value-param) { // insert only works for arrays if (JSON_HEDLEY_LIKELY(is_array())) @@ -3302,14 +3329,14 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, basic_json&& val) + iterator insert(const_iterator pos, basic_json&& val) // NOLINT(performance-unnecessary-value-param) { return insert(pos, val); } /// @brief inserts copies of element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, size_type cnt, const basic_json& val) + iterator insert(const_iterator pos, size_type cnt, const basic_json& val) // NOLINT(performance-unnecessary-value-param) { // insert only works for arrays if (JSON_HEDLEY_LIKELY(is_array())) @@ -3329,7 +3356,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts range of elements into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, const_iterator first, const_iterator last) + iterator insert(const_iterator pos, const_iterator first, const_iterator last) // NOLINT(performance-unnecessary-value-param) { // insert only works for arrays if (JSON_HEDLEY_UNLIKELY(!is_array())) @@ -3360,7 +3387,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts elements from initializer list into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, initializer_list_t ilist) + iterator insert(const_iterator pos, initializer_list_t ilist) // NOLINT(performance-unnecessary-value-param) { // insert only works for arrays if (JSON_HEDLEY_UNLIKELY(!is_array())) @@ -3380,7 +3407,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts range of elements into object /// @sa https://json.nlohmann.me/api/basic_json/insert/ - void insert(const_iterator first, const_iterator last) + void insert(const_iterator first, const_iterator last) // NOLINT(performance-unnecessary-value-param) { // insert only works for objects if (JSON_HEDLEY_UNLIKELY(!is_object())) @@ -3401,6 +3428,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } m_data.m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); + set_parents(); } /// @brief updates a JSON object from another object, overwriting existing keys @@ -3412,7 +3440,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief updates a JSON object from another object, overwriting existing keys /// @sa https://json.nlohmann.me/api/basic_json/update/ - void update(const_iterator first, const_iterator last, bool merge_objects = false) + void update(const_iterator first, const_iterator last, bool merge_objects = false) // NOLINT(performance-unnecessary-value-param) { // implicitly convert null value to an empty object if (is_null()) @@ -4013,12 +4041,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(InputType&& i, - const parser_callback_t cb = nullptr, + parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions, ignore_comments).parse(true, result); + parser(detail::input_adapter(std::forward(i)), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved,accessForwarded] return result; } @@ -4028,24 +4056,24 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(IteratorType first, IteratorType last, - const parser_callback_t cb = nullptr, + parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result); + parser(detail::input_adapter(std::move(first), std::move(last)), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved] return result; } JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len)) static basic_json parse(detail::span_input_adapter&& i, - const parser_callback_t cb = nullptr, + parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result); + parser(i.get(), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved] return result; } @@ -4224,6 +4252,23 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif +#if JSON_DIAGNOSTIC_POSITIONS + /// the start position of the value + std::size_t start_position = std::string::npos; + /// the end position of the value + std::size_t end_position = std::string::npos; + public: + constexpr std::size_t start_pos() const noexcept + { + return start_position; + } + + constexpr std::size_t end_pos() const noexcept + { + return end_position; + } +#endif + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -4309,27 +4354,30 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static std::vector to_bjdata(const basic_json& j, const bool use_size = false, - const bool use_type = false) + const bool use_type = false, + const bjdata_version_t version = bjdata_version_t::draft2) { std::vector result; - to_bjdata(j, result, use_size, use_type); + to_bjdata(j, result, use_size, use_type, version); return result; } /// @brief create a BJData serialization of a given JSON value /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static void to_bjdata(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false) + const bool use_size = false, const bool use_type = false, + const bjdata_version_t version = bjdata_version_t::draft2) { - binary_writer(o).write_ubjson(j, use_size, use_type, true, true); + binary_writer(o).write_ubjson(j, use_size, use_type, true, true, version); } /// @brief create a BJData serialization of a given JSON value /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static void to_bjdata(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false) + const bool use_size = false, const bool use_type = false, + const bjdata_version_t version = bjdata_version_t::draft2) { - binary_writer(o).write_ubjson(j, use_size, use_type, true, true); + binary_writer(o).write_ubjson(j, use_size, use_type, true, true, version); } /// @brief create a BSON serialization of a given JSON value @@ -4365,9 +4413,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4381,9 +4429,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4406,10 +4454,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4422,9 +4470,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4437,9 +4485,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4460,10 +4508,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4476,9 +4524,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4491,9 +4539,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4514,10 +4562,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4530,9 +4578,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4545,9 +4593,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4560,9 +4608,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4575,9 +4623,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4598,10 +4646,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } /// @} @@ -4702,7 +4750,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec // the valid JSON Patch operations enum class patch_operations {add, remove, replace, move, copy, test, invalid}; - const auto get_op = [](const std::string & op) + const auto get_op = [](const string_t& op) { if (op == "add") { @@ -4733,7 +4781,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec }; // wrapper for "add" operation; add value at ptr - const auto operation_add = [&result](json_pointer & ptr, basic_json val) + const auto operation_add = [&result](json_pointer & ptr, const basic_json & val) { // adding to the root of the target document means replacing it if (ptr.empty()) @@ -4839,15 +4887,15 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec for (const auto& val : json_patch) { // wrapper to get a value for an operation - const auto get_value = [&val](const std::string & op, - const std::string & member, + const auto get_value = [&val](const string_t& op, + const string_t& member, bool string_type) -> basic_json & { // find value auto it = val.m_data.m_value.object->find(member); // context-sensitive error message - const auto error_msg = (op == "op") ? "operation" : detail::concat("operation '", op, '\''); + const auto error_msg = (op == "op") ? "operation" : detail::concat("operation '", op, '\''); // NOLINT(bugprone-unused-local-non-trivial-variable) // check if desired value is present if (JSON_HEDLEY_UNLIKELY(it == val.m_data.m_value.object->end())) @@ -4874,8 +4922,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } // collect mandatory members - const auto op = get_value("op", "op", true).template get(); - const auto path = get_value(op, "path", true).template get(); + const auto op = get_value("op", "op", true).template get(); + const auto path = get_value(op, "path", true).template get(); json_pointer ptr(path); switch (get_op(op)) @@ -4901,7 +4949,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec case patch_operations::move: { - const auto from_path = get_value("move", "from", true).template get(); + const auto from_path = get_value("move", "from", true).template get(); json_pointer from_ptr(from_path); // the "from" location must exist - use at() @@ -4918,7 +4966,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec case patch_operations::copy: { - const auto from_path = get_value("copy", "from", true).template get(); + const auto from_path = get_value("copy", "from", true).template get(); const json_pointer from_ptr(from_path); // the "from" location must exist - use at() @@ -4978,7 +5026,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/diff/ JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json diff(const basic_json& source, const basic_json& target, - const std::string& path = "") + const string_t& path = "") { // the patch basic_json result(value_t::array); @@ -5008,7 +5056,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec while (i < source.size() && i < target.size()) { // recursive call to compare array values at index i - auto temp_diff = diff(source[i], target[i], detail::concat(path, '/', std::to_string(i))); + auto temp_diff = diff(source[i], target[i], detail::concat(path, '/', detail::to_string(i))); result.insert(result.end(), temp_diff.begin(), temp_diff.end()); ++i; } @@ -5025,7 +5073,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec result.insert(result.begin() + end_index, object( { {"op", "remove"}, - {"path", detail::concat(path, '/', std::to_string(i))} + {"path", detail::concat(path, '/', detail::to_string(i))} })); ++i; } @@ -5036,7 +5084,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec result.push_back( { {"op", "add"}, - {"path", detail::concat(path, "/-")}, + {"path", detail::concat(path, "/-")}, {"value", target[i]} }); ++i; @@ -5051,7 +5099,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec for (auto it = source.cbegin(); it != source.cend(); ++it) { // escape the key name to be used in a JSON patch - const auto path_key = detail::concat(path, '/', detail::escape(it.key())); + const auto path_key = detail::concat(path, '/', detail::escape(it.key())); if (target.find(it.key()) != target.end()) { @@ -5075,7 +5123,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec if (source.find(it.key()) == source.end()) { // found a key that is not in this -> add it - const auto path_key = detail::concat(path, '/', detail::escape(it.key())); + const auto path_key = detail::concat(path, '/', detail::escape(it.key())); result.push_back( { {"op", "add"}, {"path", path_key}, diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/json_fwd.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/json_fwd.hpp index 32bde590f8cc..a595ae4abf56 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/json_fwd.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/json_fwd.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/ordered_map.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/ordered_map.hpp index 39e4a50a1496..d830c6d7e8d0 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/ordered_map.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/ordered_map.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once @@ -26,7 +26,7 @@ NLOHMANN_JSON_NAMESPACE_BEGIN /// for use within nlohmann::basic_json template , class Allocator = std::allocator>> - struct ordered_map : std::vector, Allocator> + struct ordered_map : std::vector, Allocator> { using key_type = Key; using mapped_type = T; @@ -341,7 +341,7 @@ template , template using require_input_iter = typename std::enable_if::iterator_category, - std::input_iterator_tag>::value>::type; + std::input_iterator_tag>::value>::type; template> void insert(InputIt first, InputIt last) diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley.hpp index a1dc64f60063..fb0b073b0c65 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley.hpp @@ -2,11 +2,11 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2016 - 2021 Evan Nemerson // SPDX-License-Identifier: MIT /* Hedley - https://nemequ.github.io/hedley diff --git a/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley_undef.hpp b/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley_undef.hpp index c0aee2bb309b..ad86d11f22cc 100644 --- a/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley_undef.hpp +++ b/contrib/restricted/nlohmann_json/include/nlohmann/thirdparty/hedley/hedley_undef.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 +// | | |__ | | | | | | version 3.12.0 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann // SPDX-License-Identifier: MIT #pragma once diff --git a/contrib/restricted/nlohmann_json/ya.make b/contrib/restricted/nlohmann_json/ya.make index b12f985a75d6..d39c7b0f000a 100644 --- a/contrib/restricted/nlohmann_json/ya.make +++ b/contrib/restricted/nlohmann_json/ya.make @@ -10,9 +10,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(3.11.3) +VERSION(3.12.0) -ORIGINAL_SOURCE(https://github.com/nlohmann/json/archive/v3.11.3.tar.gz) +ORIGINAL_SOURCE(https://github.com/nlohmann/json/archive/v3.12.0.tar.gz) ADDINCL( GLOBAL contrib/restricted/nlohmann_json/include From 56ff6f65502d7744348c5bc22d6a0cee3082d98a Mon Sep 17 00:00:00 2001 From: alexelexa Date: Tue, 6 May 2025 17:02:19 +0300 Subject: [PATCH 12/24] YT-23359: Replica move balancing commit_hash:348025fad19bc9eaae2346073778c6b0c940eef1 --- yt/yt/core/misc/collection_helpers-inl.h | 7 +++++++ yt/yt/core/misc/collection_helpers.h | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/yt/yt/core/misc/collection_helpers-inl.h b/yt/yt/core/misc/collection_helpers-inl.h index 85105406dc01..4eb891d9c39e 100644 --- a/yt/yt/core/misc/collection_helpers-inl.h +++ b/yt/yt/core/misc/collection_helpers-inl.h @@ -86,6 +86,13 @@ std::vector GetKeys(const T& collection, size_t sizeLimit) }); } +template +THashSet GetKeySet(const T& collection, size_t sizeLimit) +{ + auto vec = GetKeys(collection, sizeLimit); + return THashSet(vec.begin(), vec.end()); +} + template std::vector GetValues(const T& collection, size_t sizeLimit) { diff --git a/yt/yt/core/misc/collection_helpers.h b/yt/yt/core/misc/collection_helpers.h index 6614e3a3974f..9da2189780e6 100644 --- a/yt/yt/core/misc/collection_helpers.h +++ b/yt/yt/core/misc/collection_helpers.h @@ -18,6 +18,11 @@ std::vector GetKeys( const T& collection, size_t sizeLimit = std::numeric_limits::max()); +template +THashSet GetKeySet( + const T& collection, + size_t sizeLimit = std::numeric_limits::max()); + template std::vector GetValues( const T& collection, From 6ce738d44097d75b303066dd5ad549fe432794b1 Mon Sep 17 00:00:00 2001 From: imunkin Date: Tue, 6 May 2025 17:16:22 +0300 Subject: [PATCH 13/24] YQL-19884: Add Ascii{Starts,Ends}WithIgnoreCase functions to String UDF commit_hash:4b86982498876ec14632c0a018a940c3393bb5d6 --- yql/essentials/docs/en/changelog/2025.02.md | 4 +- yql/essentials/docs/en/udf/list/string.md | 12 ++-- yql/essentials/docs/ru/changelog/2025.02.md | 3 +- yql/essentials/docs/ru/udf/list/string.md | 10 +-- .../public/udf/arrow/udf_arrow_helpers.h | 14 ++-- .../udfs/common/string/string_udf.cpp | 36 ++++++++-- .../common/string/test/canondata/result.json | 15 ++++ .../test.test_AsciiCmpIgnoreCase_/results.txt | 69 +++++++++++++++++++ .../extracted | 41 +++++++++++ .../results.txt | 69 +++++++++++++++++++ .../test.test_BlockFind_/results.txt | 52 -------------- .../canondata/test.test_Find_/results.txt | 52 -------------- .../string/test/cases/AsciiCmpIgnoreCase.cfg | 2 + .../string/test/cases/AsciiCmpIgnoreCase.sql | 5 ++ .../test/cases/AsciiCmpIgnoreCase_2025_02.cfg | 3 + .../test/cases/AsciiCmpIgnoreCase_2025_02.sql | 8 +++ .../test/cases/BlockAsciiCmpIgnoreCase.cfg | 2 + .../test/cases/BlockAsciiCmpIgnoreCase.sql | 8 +++ .../common/string/test/cases/BlockFind.sql | 4 -- .../udfs/common/string/test/cases/Find.sql | 4 -- 20 files changed, 281 insertions(+), 132 deletions(-) create mode 100644 yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_/results.txt create mode 100644 yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_2025_02_/extracted create mode 100644 yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiCmpIgnoreCase_/results.txt create mode 100644 yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.cfg create mode 100644 yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.sql create mode 100644 yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.cfg create mode 100644 yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.sql create mode 100644 yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.cfg create mode 100644 yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.sql diff --git a/yql/essentials/docs/en/changelog/2025.02.md b/yql/essentials/docs/en/changelog/2025.02.md index 647cd969f47f..2208e46dc573 100644 --- a/yql/essentials/docs/en/changelog/2025.02.md +++ b/yql/essentials/docs/en/changelog/2025.02.md @@ -28,4 +28,6 @@ SELECT foo_new, ... WHERE foo = 1 GROUP BY expr AS foo_new ## Changes in the String module {#string-module} -Removed String::Reverse function, use Unicode::Reverse. \ No newline at end of file +Removed String::Reverse function, use Unicode::Reverse. +Removed String::HasPrefixIgnoreCase and String::StartsWithIgnoreCase functions, use String::AsciiStartsWithIgnoreCase. +Removed String::HasSuffixIgnoreCase and String::EndsWithIgnoreCase functions, use String::AsciiEndsWithIgnoreCase. diff --git a/yql/essentials/docs/en/udf/list/string.md b/yql/essentials/docs/en/udf/list/string.md index b737792c6f9d..5cc65ccccaee 100644 --- a/yql/essentials/docs/en/udf/list/string.md +++ b/yql/essentials/docs/en/udf/list/string.md @@ -38,21 +38,25 @@ Functions for ASCII strings: * `String::ReverseFind(String{Flags:AutoMap}, String, [Uint64?]) -> Int64`: Returns the last position found or -1. The optional argument is the offset from the beginning of the string. +* `String::AsciiStartsWithIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module) + * `String::HasPrefix(String?, String) -> Bool` -* `String::HasPrefixIgnoreCase(String?, String) -> Bool` +* `String::HasPrefixIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module) * `String::StartsWith(String?, String) -> Bool` -* `String::StartsWithIgnoreCase(String?, String) -> Bool` +* `String::StartsWithIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module) + +* `String::AsciiEndsWithIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module) * `String::HasSuffix(String?, String) -> Bool` -* `String::HasSuffixIgnoreCase(String?, String) -> Bool` +* `String::HasSuffixIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module) * `String::EndsWith(String?, String) -> Bool` -* `String::EndsWithIgnoreCase(String?, String) -> Bool` +* `String::EndsWithIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module) * `String::Substring(String{Flags:AutoMap}, [Uint64?, Uint64?]) -> String` diff --git a/yql/essentials/docs/ru/changelog/2025.02.md b/yql/essentials/docs/ru/changelog/2025.02.md index 94debb338cda..2c71920240aa 100644 --- a/yql/essentials/docs/ru/changelog/2025.02.md +++ b/yql/essentials/docs/ru/changelog/2025.02.md @@ -29,4 +29,5 @@ SELECT foo_new, ... WHERE foo = 1 GROUP BY expr AS foo_new ## Изменения в модуле String {#string-module} Удалена функция String::Reverse, используйте Unicode::Reverse. - +Удалены функции String::HasPrefixIgnoreCase и String::StartsWithIgnoreCase, используйте String::AsciiStartsWithIgnoreCase. +Удалены функции String::HasSuffixIgnoreCase и String::EndsWithIgnoreCase, используйте String::AsciiEndsWithIgnoreCase. diff --git a/yql/essentials/docs/ru/udf/list/string.md b/yql/essentials/docs/ru/udf/list/string.md index 6896b41a6277..26380ea03d03 100644 --- a/yql/essentials/docs/ru/udf/list/string.md +++ b/yql/essentials/docs/ru/udf/list/string.md @@ -58,10 +58,12 @@ SELECT String::Strip("YQL "); -- "YQL" Устаревшие функции, к использованию не рекомендуются. -* `String::HasPrefixIgnoreCase(string:String?, prefix:String) -> Bool` -* `String::StartsWithIgnoreCase(string:String?, prefix:String) -> Bool` -* `String::HasSuffixIgnoreCase(string:String?, suffix:String) -> Bool` -* `String::EndsWithIgnoreCase(string:String?, suffix:String) -> Bool` +* `String::AsciiStartsWithIgnoreCase(string:String?, prefix:String) -> Bool` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module) +* `String::AsciiEndsWithIgnoreCase(string:String?, suffix:String) -> Bool` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module) +* `String::HasPrefixIgnoreCase(string:String?, prefix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module) +* `String::StartsWithIgnoreCase(string:String?, prefix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module) +* `String::HasSuffixIgnoreCase(string:String?, suffix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module) +* `String::EndsWithIgnoreCase(string:String?, suffix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module) Проверяют наличие префикса или суффикса в строке без учёта региста символов. diff --git a/yql/essentials/public/udf/arrow/udf_arrow_helpers.h b/yql/essentials/public/udf/arrow/udf_arrow_helpers.h index a792ab90ae5e..af7ce958d0f2 100644 --- a/yql/essentials/public/udf/arrow/udf_arrow_helpers.h +++ b/yql/essentials/public/udf/arrow/udf_arrow_helpers.h @@ -62,7 +62,7 @@ class TUdfKernelState : public arrow::compute::KernelState { return *ScalarBuilder_; } - + const IValueBuilder& GetValueBuilder() { Y_ENSURE(ValueBuilder_); return *ValueBuilder_; @@ -348,7 +348,7 @@ TScalarBuilderImpl* CastToScalarBuilderImpl(IScalarBuilder& builder) { template TReader* CastToBlockReaderImpl(IBlockReader& reader) { static_assert(std::is_base_of_v); - + auto* readerImpl = dynamic_cast(&reader); Y_ENSURE(readerImpl, TStringBuilder() << "Got " << typeid(reader).name() << " as BlockReader"); return readerImpl; @@ -444,7 +444,7 @@ struct TBinaryKernelExec { *res = MakeArray(outputArrays); } else if (arg1.is_array() && arg2.is_scalar()) { - auto& array1 = *arg1.array(); + auto& array1 = *arg1.array(); auto item2 = reader2Impl->GetScalarItem(*arg2.scalar()); auto& builder = state.GetArrayBuilder(); auto* builderImpl = CastToArrayBuilderImpl(builder); @@ -644,7 +644,7 @@ struct TUnaryUnsafeFixedSizeFilterKernel { } auto validMask = nullBuilder.Finish(); validMask = MakeDenseBitmap(validMask->data(), length, GetYqlMemoryPool()); - + auto inMask = inArray->buffers[0]; if (inMask) { outArray->buffers[0] = AllocateBitmapWithReserve(length, GetYqlMemoryPool()); @@ -703,6 +703,10 @@ class TUnaryOverOptionalImpl : public TBoxedValue { BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, 0, true) \ UDF_IMPL(udfName, builder.SimpleSignature().SupportsBlocks().IsStrict();, ;, ;, "", "", udfName##_BlocksImpl) +#define BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(udfName, signatureFunc, options) \ + BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, 0, true) \ + UDF_IMPL(udfName, builder.SimpleSignature().SupportsBlocks().IsStrict(); options;, ;, ;, "", "", udfName##_BlocksImpl) + #define BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(udfName, signatureFunc, optArgc) \ BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, optArgc, false) \ UDF_IMPL(udfName, builder.SimpleSignature().SupportsBlocks().OptionalArgs(optArgc);, ;, ;, "", "", udfName##_BlocksImpl) @@ -744,4 +748,4 @@ class TUnaryOverOptionalImpl : public TBoxedValue { END_ARROW_UDF(udfName##_BlocksImpl, exec) #define END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(udfName, exec, nullHandling) \ - END_ARROW_UDF_WITH_NULL_HANDLING(udfName##_BlocksImpl, exec, nullHandling) \ No newline at end of file + END_ARROW_UDF_WITH_NULL_HANDLING(udfName##_BlocksImpl, exec, nullHandling) diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index 161c51c060bd..feb95dced934 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -153,8 +153,25 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional(TOptional), } \ } -#define STRING_TWO_ARGS_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, bool(TOptional, char*)) { \ +#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \ + SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional, char*), \ + builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) \ + { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TString haystack(args[0].AsStringRef()); \ + const TString needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(function(haystack, needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } + +#define STRING_ASCII_CMP_IGNORE_CASE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \ + bool(TOptional, char*), \ + builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) \ + { \ Y_UNUSED(valueBuilder); \ if (args[0]) { \ const TString haystack(args[0].AsStringRef()); \ @@ -380,12 +397,19 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional(TOptional), XX(HasPrefix, StartsWith) \ XX(HasSuffix, EndsWith) -#define STRING_TWO_ARGS_UDF_MAP(XX) \ +// NOTE: The functions below are marked as deprecated, so block implementation +// is not required for them. Hence, STRING_TWO_ARGS_UDF_DEPRECATED_2025_02 +// provides only the scalar one at the moment. +#define STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(XX) \ XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \ XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \ XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \ XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase) +#define STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(XX) \ + XX(AsciiStartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \ + XX(AsciiEndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) + // NOTE: The functions below are marked as deprecated, so block implementation // is not required for them. Hence, STROKA_UDF provides only the scalar one at // the moment. @@ -898,7 +922,8 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional(TOptional), STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) - STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF) + STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF) IS_ASCII_UDF_MAP(IS_ASCII_UDF) static constexpr ui64 padLim = 1000000; @@ -914,7 +939,8 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional(TOptional), STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) - STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF) + STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) diff --git a/yql/essentials/udfs/common/string/test/canondata/result.json b/yql/essentials/udfs/common/string/test/canondata/result.json index 596f9a72180f..e47e0bc7c64d 100644 --- a/yql/essentials/udfs/common/string/test/canondata/result.json +++ b/yql/essentials/udfs/common/string/test/canondata/result.json @@ -4,6 +4,16 @@ "uri": "file://test.test_AsciiChecks_/results.txt" } ], + "test.test[AsciiCmpIgnoreCase]": [ + { + "uri": "file://test.test_AsciiCmpIgnoreCase_/results.txt" + } + ], + "test.test[AsciiCmpIgnoreCase_2025_02]": [ + { + "uri": "file://test.test_AsciiCmpIgnoreCase_2025_02_/extracted" + } + ], "test.test[Base32Decode]": [ { "uri": "file://test.test_Base32Decode_/results.txt" @@ -19,6 +29,11 @@ "uri": "file://test.test_BlockAsciiChecks_/results.txt" } ], + "test.test[BlockAsciiCmpIgnoreCase]": [ + { + "uri": "file://test.test_BlockAsciiCmpIgnoreCase_/results.txt" + } + ], "test.test[BlockFind]": [ { "uri": "file://test.test_BlockFind_/results.txt" diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_/results.txt new file mode 100644 index 000000000000..363e1483c4cb --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_/results.txt @@ -0,0 +1,69 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "icstarts"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "icends"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + %false; + %false + ]; + [ + "aswedfg"; + %true; + %false + ]; + [ + "asdadsaasd"; + %true; + %false + ]; + [ + "gdsfsassas"; + %false; + %true + ]; + [ + ""; + %false; + %false + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + %false; + %false + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_2025_02_/extracted b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_2025_02_/extracted new file mode 100644 index 000000000000..7a8f23ac6be3 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_2025_02_/extracted @@ -0,0 +1,41 @@ +/program.sql:
: Error: Type annotation + + /program.sql:
:4:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject + SELECT + ^ + /program.sql:
:4:1: Error: At function: SqlProjectItem + SELECT + ^ + /program.sql:
:8:13: Error: At function: Apply, At function: Udf + String::HasPrefixIgnoreCase(value, "AS") AS icprefix, + ^ + /program.sql:
:8:13: Error: UDF 'String.HasPrefixIgnoreCase' is not available after version 2025.01 + String::HasPrefixIgnoreCase(value, "AS") AS icprefix, + ^ + /program.sql:
:4:1: Error: At function: SqlProjectItem + SELECT + ^ + /program.sql:
:10:13: Error: At function: Apply, At function: Udf + String::StartsWithIgnoreCase(value, "AS") AS icstarts, + ^ + /program.sql:
:10:13: Error: UDF 'String.StartsWithIgnoreCase' is not available after version 2025.01 + String::StartsWithIgnoreCase(value, "AS") AS icstarts, + ^ + /program.sql:
:4:1: Error: At function: SqlProjectItem + SELECT + ^ + /program.sql:
:12:13: Error: At function: Apply, At function: Udf + String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, + ^ + /program.sql:
:12:13: Error: UDF 'String.HasSuffixIgnoreCase' is not available after version 2025.01 + String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, + ^ + /program.sql:
:4:1: Error: At function: SqlProjectItem + SELECT + ^ + /program.sql:
:14:13: Error: At function: Apply, At function: Udf + String::EndsWithIgnoreCase(value, "AS") AS icends, + ^ + /program.sql:
:14:13: Error: UDF 'String.EndsWithIgnoreCase' is not available after version 2025.01 + String::EndsWithIgnoreCase(value, "AS") AS icends, + ^ \ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiCmpIgnoreCase_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiCmpIgnoreCase_/results.txt new file mode 100644 index 000000000000..363e1483c4cb --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiCmpIgnoreCase_/results.txt @@ -0,0 +1,69 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "icstarts"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "icends"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + %false; + %false + ]; + [ + "aswedfg"; + %true; + %false + ]; + [ + "asdadsaasd"; + %true; + %false + ]; + [ + "gdsfsassas"; + %false; + %true + ]; + [ + ""; + %false; + %false + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + %false; + %false + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt index 5470e5a7a37b..f6374e682e5f 100644 --- a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt @@ -21,34 +21,6 @@ "Bool" ] ]; - [ - "icprefix"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icstarts"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icsuffix"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icends"; - [ - "DataType"; - "Bool" - ] - ]; [ "levenstein"; [ @@ -63,55 +35,31 @@ [ "fdsa"; %false; - %false; - %false; - %false; - %false; "3" ]; [ "aswedfg"; %true; - %true; - %true; - %false; - %false; "5" ]; [ "asdadsaasd"; %true; - %true; - %true; - %false; - %false; "8" ]; [ "gdsfsassas"; %true; - %false; - %false; - %true; - %true; "8" ]; [ ""; %false; - %false; - %false; - %false; - %false; "2" ]; [ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; %false; - %false; - %false; - %false; - %false; "23" ] ] diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt index 7abed4de2a42..cec532125014 100644 --- a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt @@ -49,34 +49,6 @@ "Bool" ] ]; - [ - "icprefix"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icstarts"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icsuffix"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "icends"; - [ - "DataType"; - "Bool" - ] - ]; [ "find"; [ @@ -109,10 +81,6 @@ %false; %false; %false; - %false; - %false; - %false; - %false; "-1"; "-1"; "3" @@ -124,10 +92,6 @@ %true; %false; %false; - %true; - %true; - %false; - %false; "0"; "0"; "5" @@ -139,10 +103,6 @@ %true; %false; %false; - %true; - %true; - %false; - %false; "0"; "7"; "8" @@ -154,10 +114,6 @@ %false; %true; %true; - %false; - %false; - %true; - %true; "5"; "8"; "8" @@ -169,10 +125,6 @@ %false; %false; %false; - %false; - %false; - %false; - %false; "-1"; "-1"; "2" @@ -184,10 +136,6 @@ %false; %false; %false; - %false; - %false; - %false; - %false; "-1"; "-1"; "23" diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.cfg b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.cfg new file mode 100644 index 000000000000..037a7697b4f8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.cfg @@ -0,0 +1,2 @@ +langver 2025.02 +in plato.Input default.in diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.sql b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.sql new file mode 100644 index 000000000000..6c67d1364562 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase.sql @@ -0,0 +1,5 @@ +SELECT + value, + String::AsciiStartsWithIgnoreCase(value, "AS") AS icstarts, + String::AsciiEndsWithIgnoreCase(value, "AS") AS icends, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.cfg b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.cfg new file mode 100644 index 000000000000..43239d297df3 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.cfg @@ -0,0 +1,3 @@ +xfail +langver 2025.02 +in plato.Input default.in diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.sql b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.sql new file mode 100644 index 000000000000..a4883f6dc077 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiCmpIgnoreCase_2025_02.sql @@ -0,0 +1,8 @@ +/* syntax version 1 */ +SELECT + value, + String::HasPrefixIgnoreCase(value, "AS") AS icprefix, + String::StartsWithIgnoreCase(value, "AS") AS icstarts, + String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, + String::EndsWithIgnoreCase(value, "AS") AS icends, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.cfg b/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.cfg new file mode 100644 index 000000000000..037a7697b4f8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.cfg @@ -0,0 +1,2 @@ +langver 2025.02 +in plato.Input default.in diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.sql b/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.sql new file mode 100644 index 000000000000..1a8c6088e561 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.sql @@ -0,0 +1,8 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + value, + String::AsciiStartsWithIgnoreCase(value, "AS") AS icstarts, + String::AsciiEndsWithIgnoreCase(value, "AS") AS icends, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql index 0135bef3627d..f1c855bcc113 100644 --- a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql +++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql @@ -3,9 +3,5 @@ pragma UseBlocks; SELECT value, String::Contains(value, "as") AS contains, - String::HasPrefixIgnoreCase(value, "AS") AS icprefix, - String::StartsWithIgnoreCase(value, "AS") AS icstarts, - String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, - String::EndsWithIgnoreCase(value, "AS") AS icends, String::LevensteinDistance(value, "as") AS levenstein FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql index 856b53f6d4d1..273553dcf9e1 100644 --- a/yql/essentials/udfs/common/string/test/cases/Find.sql +++ b/yql/essentials/udfs/common/string/test/cases/Find.sql @@ -6,10 +6,6 @@ SELECT String::StartsWith(value, "as") AS starts, String::HasSuffix(value, "as") AS suffix, String::EndsWith(value, "as") AS ends, - String::HasPrefixIgnoreCase(value, "AS") AS icprefix, - String::StartsWithIgnoreCase(value, "AS") AS icstarts, - String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, - String::EndsWithIgnoreCase(value, "AS") AS icends, String::Find(value, "as") AS find, String::ReverseFind(value, "as") AS rfind, String::LevensteinDistance(value, "as") AS levenstein From 1f6c6c27a8e7dbb160b50ebaaaea4d3aeb59612c Mon Sep 17 00:00:00 2001 From: nogert Date: Tue, 6 May 2025 18:38:08 +0300 Subject: [PATCH 14/24] Improve source sets definitions. commit_hash:7e97b6e8cdd2b05e53de5902441a090b8e0ffcaf --- .../ide-gradle/source_sets.jinja | 59 +++++++++---------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/build/export_generators/ide-gradle/source_sets.jinja b/build/export_generators/ide-gradle/source_sets.jinja index a0086568b6f7..a5f81c89a44d 100644 --- a/build/export_generators/ide-gradle/source_sets.jinja +++ b/build/export_generators/ide-gradle/source_sets.jinja @@ -1,46 +1,39 @@ {#- empty string #} sourceSets { -{%- set target_jar_source_set = target.jar_source_set|reject('startsWith', 'src/main/java:')|unique -%} -{%- set target_jar_resource_set = target.jar_resource_set|reject('startsWith', 'src/main/resources:')|unique -%} -{%- if proto_template or target_jar_source_set|length or target_jar_resource_set|length %} main { - {#- - Default by Gradle: - - java.srcDir("src/main/java") - resources.srcDir("src/main/resources") + java.srcDir("src/main/java") + resources.srcDir("src/main/resources") - #} -{%- if target_jar_source_set|length -%} -{%- for source_set in target_jar_source_set -%} -{%- set srcdir_glob = split(source_set, ':') -%} -{%- set srcdir = srcdir_glob[0] %} +{%- set target_jar_source_set = target.jar_source_set|reject('startsWith', 'src/main/java:')|unique -%} +{%- if target_jar_source_set|length -%} +{%- for source_set in target_jar_source_set -%} +{%- set srcdir_glob = split(source_set, ':') -%} +{%- set srcdir = srcdir_glob[0] %} java.srcDir({{ PatchRoots(srcdir) }}) -{%- endfor -%} -{%- endif %} -{%- if target_jar_resource_set|length -%} -{%- for resource_set in target_jar_resource_set -%} -{%- set resdir_glob = split(resource_set, ':') -%} -{%- set resdir = resdir_glob[0] %} +{%- endfor -%} +{%- endif %} + +{%- set target_jar_resource_set = target.jar_resource_set|reject('startsWith', 'src/main/resources:')|unique -%} +{%- if target_jar_resource_set|length -%} +{%- for resource_set in target_jar_resource_set -%} +{%- set resdir_glob = split(resource_set, ':') -%} +{%- set resdir = resdir_glob[0] %} resources.srcDir({{ PatchRoots(resdir) }}) -{%- endfor -%} -{%- endif -%} -{%- if proto_template %} +{%- endfor -%} +{%- endif -%} + +{%- if proto_template %} java.srcDir("$buildDir/generated/source/proto/main/java") -{%- if target.proto_grpc %} +{%- if target.proto_grpc %} java.srcDir("$buildDir/generated/source/proto/main/grpc") -{%- endif %} {%- endif %} - } {%- endif %} - test { - {#- - Default by Gradle: + } - java.srcDir("src/test/java") - resources.srcDir("src/test/resources") + test { + java.srcDir("src/test/java") + resources.srcDir("src/test/resources") - #} {%- if proto_template %} java.srcDir("$buildDir/generated/source/proto/test/java") {%- if target.proto_grpc %} @@ -84,3 +77,7 @@ sourceSets { tasks.withType() { duplicatesStrategy = DuplicatesStrategy.INCLUDE } + +tasks.withType() { + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} From 2ee421373f021f9e2052947917506a69c0b757ea Mon Sep 17 00:00:00 2001 From: nechda Date: Tue, 6 May 2025 18:42:39 +0300 Subject: [PATCH 15/24] [library] Fix -Wdeprecated-this-capture warning Fix for library commit_hash:4ffd5fad7dcb0fae1cd0597997304346d7ac8865 --- library/cpp/lwtrace/log_shuttle.h | 2 +- library/cpp/yson_pull/detail/macros.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/library/cpp/lwtrace/log_shuttle.h b/library/cpp/lwtrace/log_shuttle.h index 8549acd833b6..95fc4649e0ba 100644 --- a/library/cpp/lwtrace/log_shuttle.h +++ b/library/cpp/lwtrace/log_shuttle.h @@ -332,7 +332,7 @@ namespace NLWTrace { bool TEditLogShuttleActionExecutor::DoExecute(TOrbit& orbit, const TParams& params) { Y_UNUSED(params); bool ignore = Action.GetIgnore(); - orbit.ForEachShuttle(this->GetTraceIdx(), [=](IShuttle* shuttle) { + orbit.ForEachShuttle(this->GetTraceIdx(), [=, this](IShuttle* shuttle) { this->Cast(shuttle)->SetIgnore(ignore); return true; }); diff --git a/library/cpp/yson_pull/detail/macros.h b/library/cpp/yson_pull/detail/macros.h index 7243f9cfe139..35f2ef1930e7 100644 --- a/library/cpp/yson_pull/detail/macros.h +++ b/library/cpp/yson_pull/detail/macros.h @@ -16,7 +16,7 @@ (); #else // Clang does not support gnu-style attributes on lambda functions yet -#define COLD_BLOCK_BYVALUE [=]() { +#define COLD_BLOCK_BYVALUE [=, this]() { #define COLD_BLOCK_BYREF [&]() { #define COLD_BLOCK_END \ } \ From 865c8c2f6d59c8a58e40c9ad322b494e7789a9d0 Mon Sep 17 00:00:00 2001 From: robot-yql-sandbox Date: Tue, 6 May 2025 18:47:05 +0300 Subject: [PATCH 16/24] YQL_CODEGEN_AND_COMMIT system-97 commit_hash:3372363b71fa8e76e69df2facd026d94cdda1eeb --- yql/essentials/data/language/udfs_basic.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yql/essentials/data/language/udfs_basic.json b/yql/essentials/data/language/udfs_basic.json index c9039913de8a..60630dd0f7dd 100644 --- a/yql/essentials/data/language/udfs_basic.json +++ b/yql/essentials/data/language/udfs_basic.json @@ -1 +1 @@ -{"DateTime":[{"name":"Convert"},{"name":"EndOf"},{"name":"EndOfDay"},{"name":"EndOfMonth"},{"name":"EndOfQuarter"},{"name":"EndOfWeek"},{"name":"EndOfYear"},{"name":"Format"},{"name":"FromMicroseconds"},{"name":"FromMicroseconds64"},{"name":"FromMilliseconds"},{"name":"FromMilliseconds64"},{"name":"FromSeconds"},{"name":"FromSeconds64"},{"name":"GetDayOfMonth"},{"name":"GetDayOfWeek"},{"name":"GetDayOfWeekName"},{"name":"GetDayOfYear"},{"name":"GetHour"},{"name":"GetMicrosecondOfSecond"},{"name":"GetMillisecondOfSecond"},{"name":"GetMinute"},{"name":"GetMonth"},{"name":"GetMonthName"},{"name":"GetSecond"},{"name":"GetTimezoneId"},{"name":"GetTimezoneName"},{"name":"GetWeekOfYear"},{"name":"GetWeekOfYearIso8601"},{"name":"GetYear"},{"name":"Interval64FromDays"},{"name":"Interval64FromHours"},{"name":"Interval64FromMicroseconds"},{"name":"Interval64FromMilliseconds"},{"name":"Interval64FromMinutes"},{"name":"Interval64FromSeconds"},{"name":"IntervalFromDays"},{"name":"IntervalFromHours"},{"name":"IntervalFromMicroseconds"},{"name":"IntervalFromMilliseconds"},{"name":"IntervalFromMinutes"},{"name":"IntervalFromSeconds"},{"name":"MakeDate"},{"name":"MakeDate32"},{"name":"MakeDatetime"},{"name":"MakeDatetime64"},{"name":"MakeTimestamp"},{"name":"MakeTimestamp64"},{"name":"MakeTzDate"},{"name":"MakeTzDate32"},{"name":"MakeTzDatetime"},{"name":"MakeTzDatetime64"},{"name":"MakeTzTimestamp"},{"name":"MakeTzTimestamp64"},{"name":"Parse"},{"name":"Parse64"},{"name":"ParseHttp"},{"name":"ParseIso8601"},{"name":"ParseRfc822"},{"name":"ParseX509"},{"name":"ShiftMonths"},{"name":"ShiftQuarters"},{"name":"ShiftYears"},{"name":"Split"},{"name":"StartOf"},{"name":"StartOfDay"},{"name":"StartOfMonth"},{"name":"StartOfQuarter"},{"name":"StartOfWeek"},{"name":"StartOfYear"},{"name":"TimeOfDay"},{"name":"ToDays"},{"name":"ToHours"},{"name":"ToMicroseconds"},{"name":"ToMilliseconds"},{"name":"ToMinutes"},{"name":"ToSeconds"},{"name":"Update"}],"Digest":[{"name":"Argon2"},{"name":"Blake2B"},{"name":"CityHash"},{"name":"CityHash128"},{"name":"Crc32c"},{"name":"Crc64"},{"name":"FarmHashFingerprint"},{"name":"FarmHashFingerprint128"},{"name":"FarmHashFingerprint2"},{"name":"FarmHashFingerprint32"},{"name":"FarmHashFingerprint64"},{"name":"Fnv32"},{"name":"Fnv64"},{"name":"HighwayHash"},{"name":"IntHash64"},{"name":"Md5HalfMix"},{"name":"Md5Hex"},{"name":"Md5Raw"},{"name":"MurMurHash"},{"name":"MurMurHash2A"},{"name":"MurMurHash2A32"},{"name":"MurMurHash32"},{"name":"NumericHash"},{"name":"Sha1"},{"name":"Sha256"},{"name":"SipHash"},{"name":"SuperFastHash"},{"name":"XXH3"},{"name":"XXH3_128"}],"Hyperscan":[{"name":"BacktrackingGrep"},{"name":"BacktrackingMatch"},{"name":"Capture"},{"name":"Grep"},{"name":"Match"},{"name":"MultiGrep"},{"name":"MultiMatch"},{"name":"Replace"}],"Ip":[{"name":"ConvertToIPv6"},{"name":"FromString"},{"name":"GetSubnet"},{"name":"GetSubnetByMask"},{"name":"IsEmbeddedIPv4"},{"name":"IsIPv4"},{"name":"IsIPv6"},{"name":"SubnetFromString"},{"name":"SubnetMatch"},{"name":"SubnetToString"},{"name":"ToFixedIPv6String"},{"name":"ToString"}],"Json":[{"name":"GetField"}],"Math":[{"name":"Abs"},{"name":"Acos"},{"name":"Asin"},{"name":"Asinh"},{"name":"Atan"},{"name":"Atan2"},{"name":"Cbrt"},{"name":"Ceil"},{"name":"Cos"},{"name":"Cosh"},{"name":"E"},{"name":"Eps"},{"name":"Erf"},{"name":"ErfInv"},{"name":"ErfcInv"},{"name":"Exp"},{"name":"Exp2"},{"name":"Fabs"},{"name":"Floor"},{"name":"Fmod"},{"name":"FuzzyEquals"},{"name":"Hypot"},{"name":"IsFinite"},{"name":"IsInf"},{"name":"IsNaN"},{"name":"Ldexp"},{"name":"Lgamma"},{"name":"Log"},{"name":"Log10"},{"name":"Log2"},{"name":"Mod"},{"name":"NearbyInt"},{"name":"Pi"},{"name":"Pow"},{"name":"Rem"},{"name":"Remainder"},{"name":"Rint"},{"name":"Round"},{"name":"RoundDownward"},{"name":"RoundToNearest"},{"name":"RoundTowardZero"},{"name":"RoundUpward"},{"name":"Sigmoid"},{"name":"Sin"},{"name":"Sinh"},{"name":"Sqrt"},{"name":"Tan"},{"name":"Tanh"},{"name":"Tgamma"},{"name":"Trunc"}],"Pire":[{"name":"Capture"},{"name":"Grep"},{"name":"Match"},{"name":"MultiGrep"},{"name":"MultiMatch"},{"name":"Replace"}],"Re2":[{"name":"Capture"},{"name":"Count"},{"name":"Escape"},{"name":"FindAndConsume"},{"name":"Grep"},{"name":"Match"},{"name":"Options"},{"name":"PatternFromLike"},{"name":"Replace"}],"Re2posix":[{"name":"Capture"},{"name":"Count"},{"name":"Escape"},{"name":"FindAndConsume"},{"name":"Grep"},{"name":"Match"},{"name":"Options"},{"name":"PatternFromLike"},{"name":"Replace"}],"String":[{"name":"AsciiToLower"},{"name":"AsciiToTitle"},{"name":"AsciiToUpper"},{"name":"Base32Decode"},{"name":"Base32Encode"},{"name":"Base32StrictDecode"},{"name":"Base64Decode"},{"name":"Base64Encode"},{"name":"Base64EncodeUrl"},{"name":"Base64StrictDecode"},{"name":"Bin"},{"name":"BinText"},{"name":"CgiEscape"},{"name":"CgiUnescape"},{"name":"Collapse"},{"name":"CollapseText"},{"name":"Contains"},{"name":"DecodeHtml"},{"name":"EncodeHtml"},{"name":"EndsWith"},{"name":"EndsWithIgnoreCase"},{"name":"EscapeC"},{"name":"FromByteList"},{"name":"HasPrefix"},{"name":"HasPrefixIgnoreCase"},{"name":"HasSuffix"},{"name":"HasSuffixIgnoreCase"},{"name":"Hex"},{"name":"HexDecode"},{"name":"HexEncode"},{"name":"HexText"},{"name":"HumanReadableBytes"},{"name":"HumanReadableDuration"},{"name":"HumanReadableQuantity"},{"name":"IsAscii"},{"name":"IsAsciiAlnum"},{"name":"IsAsciiAlpha"},{"name":"IsAsciiDigit"},{"name":"IsAsciiHex"},{"name":"IsAsciiLower"},{"name":"IsAsciiSpace"},{"name":"IsAsciiUpper"},{"name":"JoinFromList"},{"name":"LeftPad"},{"name":"LevensteinDistance"},{"name":"Prec"},{"name":"RemoveAll"},{"name":"RemoveFirst"},{"name":"RemoveLast"},{"name":"ReplaceAll"},{"name":"ReplaceFirst"},{"name":"ReplaceLast"},{"name":"RightPad"},{"name":"SBin"},{"name":"SHex"},{"name":"SplitToList"},{"name":"StartsWith"},{"name":"StartsWithIgnoreCase"},{"name":"Strip"},{"name":"ToByteList"},{"name":"UnescapeC"}],"Unicode":[{"name":"Find"},{"name":"Fold"},{"name":"FromCodePointList"},{"name":"GetLength"},{"name":"IsAlnum"},{"name":"IsAlpha"},{"name":"IsAscii"},{"name":"IsDigit"},{"name":"IsHex"},{"name":"IsLower"},{"name":"IsSpace"},{"name":"IsUnicodeSet"},{"name":"IsUpper"},{"name":"IsUtf"},{"name":"JoinFromList"},{"name":"LevensteinDistance"},{"name":"Normalize"},{"name":"NormalizeNFC"},{"name":"NormalizeNFD"},{"name":"NormalizeNFKC"},{"name":"NormalizeNFKD"},{"name":"RFind"},{"name":"RemoveAll"},{"name":"RemoveFirst"},{"name":"RemoveLast"},{"name":"ReplaceAll"},{"name":"ReplaceFirst"},{"name":"ReplaceLast"},{"name":"Reverse"},{"name":"SplitToList"},{"name":"Strip"},{"name":"Substring"},{"name":"ToCodePointList"},{"name":"ToLower"},{"name":"ToTitle"},{"name":"ToUint64"},{"name":"ToUpper"},{"name":"Translit"},{"name":"TryToUint64"}],"Url":[{"name":"BuildQueryString"},{"name":"CanBePunycodeHostName"},{"name":"CutQueryStringAndFragment"},{"name":"CutScheme"},{"name":"CutWWW"},{"name":"CutWWW2"},{"name":"Decode"},{"name":"Encode"},{"name":"ForceHostNameToPunycode"},{"name":"ForcePunycodeToHostName"},{"name":"GetCGIParam"},{"name":"GetDomain"},{"name":"GetDomainLevel"},{"name":"GetFragment"},{"name":"GetHost"},{"name":"GetHostPort"},{"name":"GetOwner"},{"name":"GetPath"},{"name":"GetPort"},{"name":"GetScheme"},{"name":"GetSchemeHost"},{"name":"GetSchemeHostPort"},{"name":"GetSignificantDomain"},{"name":"GetTLD"},{"name":"GetTail"},{"name":"HostNameToPunycode"},{"name":"IsAllowedByRobotsTxt"},{"name":"IsKnownTLD"},{"name":"IsWellKnownTLD"},{"name":"Normalize"},{"name":"NormalizeWithDefaultHttpScheme"},{"name":"Parse"},{"name":"PunycodeToHostName"},{"name":"QueryStringToDict"},{"name":"QueryStringToList"}],"Yson":[{"name":"Attributes"},{"name":"Contains"},{"name":"ConvertTo"},{"name":"ConvertToBool"},{"name":"ConvertToBoolDict"},{"name":"ConvertToBoolList"},{"name":"ConvertToDict"},{"name":"ConvertToDouble"},{"name":"ConvertToDoubleDict"},{"name":"ConvertToDoubleList"},{"name":"ConvertToInt64"},{"name":"ConvertToInt64Dict"},{"name":"ConvertToInt64List"},{"name":"ConvertToList"},{"name":"ConvertToString"},{"name":"ConvertToStringDict"},{"name":"ConvertToStringList"},{"name":"ConvertToUint64"},{"name":"ConvertToUint64Dict"},{"name":"ConvertToUint64List"},{"name":"Equals"},{"name":"From"},{"name":"GetHash"},{"name":"GetLength"},{"name":"IsBool"},{"name":"IsDict"},{"name":"IsDouble"},{"name":"IsEntity"},{"name":"IsInt64"},{"name":"IsList"},{"name":"IsString"},{"name":"IsUint64"},{"name":"Lookup"},{"name":"LookupBool"},{"name":"LookupDict"},{"name":"LookupDouble"},{"name":"LookupInt64"},{"name":"LookupList"},{"name":"LookupString"},{"name":"LookupUint64"},{"name":"Options"},{"name":"Parse"},{"name":"ParseJson"},{"name":"ParseJsonDecodeUtf8"},{"name":"Serialize"},{"name":"SerializeJson"},{"name":"SerializePretty"},{"name":"SerializeText"},{"name":"WithAttributes"},{"name":"YPath"},{"name":"YPathBool"},{"name":"YPathDict"},{"name":"YPathDouble"},{"name":"YPathInt64"},{"name":"YPathList"},{"name":"YPathString"},{"name":"YPathUint64"}],"Compress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Zlib"},{"name":"Zstd"}],"Decompress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Xz"},{"name":"Zlib"},{"name":"Zstd"}],"Protobuf":[{"name":"Parse"},{"name":"Serialize"},{"name":"TryParse"}],"Streaming":[{"name":"Process"},{"name":"ProcessInline"}],"TryDecompress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Xz"},{"name":"Zlib"},{"name":"Zstd"}]} +{"DateTime":[{"name":"Convert"},{"name":"EndOf"},{"name":"EndOfDay"},{"name":"EndOfMonth"},{"name":"EndOfQuarter"},{"name":"EndOfWeek"},{"name":"EndOfYear"},{"name":"Format"},{"name":"FromMicroseconds"},{"name":"FromMicroseconds64"},{"name":"FromMilliseconds"},{"name":"FromMilliseconds64"},{"name":"FromSeconds"},{"name":"FromSeconds64"},{"name":"GetDayOfMonth"},{"name":"GetDayOfWeek"},{"name":"GetDayOfWeekName"},{"name":"GetDayOfYear"},{"name":"GetHour"},{"name":"GetMicrosecondOfSecond"},{"name":"GetMillisecondOfSecond"},{"name":"GetMinute"},{"name":"GetMonth"},{"name":"GetMonthName"},{"name":"GetSecond"},{"name":"GetTimezoneId"},{"name":"GetTimezoneName"},{"name":"GetWeekOfYear"},{"name":"GetWeekOfYearIso8601"},{"name":"GetYear"},{"name":"Interval64FromDays"},{"name":"Interval64FromHours"},{"name":"Interval64FromMicroseconds"},{"name":"Interval64FromMilliseconds"},{"name":"Interval64FromMinutes"},{"name":"Interval64FromSeconds"},{"name":"IntervalFromDays"},{"name":"IntervalFromHours"},{"name":"IntervalFromMicroseconds"},{"name":"IntervalFromMilliseconds"},{"name":"IntervalFromMinutes"},{"name":"IntervalFromSeconds"},{"name":"MakeDate"},{"name":"MakeDate32"},{"name":"MakeDatetime"},{"name":"MakeDatetime64"},{"name":"MakeTimestamp"},{"name":"MakeTimestamp64"},{"name":"MakeTzDate"},{"name":"MakeTzDate32"},{"name":"MakeTzDatetime"},{"name":"MakeTzDatetime64"},{"name":"MakeTzTimestamp"},{"name":"MakeTzTimestamp64"},{"name":"Parse"},{"name":"Parse64"},{"name":"ParseHttp"},{"name":"ParseIso8601"},{"name":"ParseRfc822"},{"name":"ParseX509"},{"name":"ShiftMonths"},{"name":"ShiftQuarters"},{"name":"ShiftYears"},{"name":"Split"},{"name":"StartOf"},{"name":"StartOfDay"},{"name":"StartOfMonth"},{"name":"StartOfQuarter"},{"name":"StartOfWeek"},{"name":"StartOfYear"},{"name":"TimeOfDay"},{"name":"ToDays"},{"name":"ToHours"},{"name":"ToMicroseconds"},{"name":"ToMilliseconds"},{"name":"ToMinutes"},{"name":"ToSeconds"},{"name":"Update"}],"Digest":[{"name":"Argon2"},{"name":"Blake2B"},{"name":"CityHash"},{"name":"CityHash128"},{"name":"Crc32c"},{"name":"Crc64"},{"name":"FarmHashFingerprint"},{"name":"FarmHashFingerprint128"},{"name":"FarmHashFingerprint2"},{"name":"FarmHashFingerprint32"},{"name":"FarmHashFingerprint64"},{"name":"Fnv32"},{"name":"Fnv64"},{"name":"HighwayHash"},{"name":"IntHash64"},{"name":"Md5HalfMix"},{"name":"Md5Hex"},{"name":"Md5Raw"},{"name":"MurMurHash"},{"name":"MurMurHash2A"},{"name":"MurMurHash2A32"},{"name":"MurMurHash32"},{"name":"NumericHash"},{"name":"Sha1"},{"name":"Sha256"},{"name":"SipHash"},{"name":"SuperFastHash"},{"name":"XXH3"},{"name":"XXH3_128"}],"Hyperscan":[{"name":"BacktrackingGrep"},{"name":"BacktrackingMatch"},{"name":"Capture"},{"name":"Grep"},{"name":"Match"},{"name":"MultiGrep"},{"name":"MultiMatch"},{"name":"Replace"}],"Ip":[{"name":"ConvertToIPv6"},{"name":"FromString"},{"name":"GetSubnet"},{"name":"GetSubnetByMask"},{"name":"IsEmbeddedIPv4"},{"name":"IsIPv4"},{"name":"IsIPv6"},{"name":"SubnetFromString"},{"name":"SubnetMatch"},{"name":"SubnetToString"},{"name":"ToFixedIPv6String"},{"name":"ToString"}],"Json":[{"name":"GetField"}],"Math":[{"name":"Abs"},{"name":"Acos"},{"name":"Asin"},{"name":"Asinh"},{"name":"Atan"},{"name":"Atan2"},{"name":"Cbrt"},{"name":"Ceil"},{"name":"Cos"},{"name":"Cosh"},{"name":"E"},{"name":"Eps"},{"name":"Erf"},{"name":"ErfInv"},{"name":"ErfcInv"},{"name":"Exp"},{"name":"Exp2"},{"name":"Fabs"},{"name":"Floor"},{"name":"Fmod"},{"name":"FuzzyEquals"},{"name":"Hypot"},{"name":"IsFinite"},{"name":"IsInf"},{"name":"IsNaN"},{"name":"Ldexp"},{"name":"Lgamma"},{"name":"Log"},{"name":"Log10"},{"name":"Log2"},{"name":"Mod"},{"name":"NearbyInt"},{"name":"Pi"},{"name":"Pow"},{"name":"Rem"},{"name":"Remainder"},{"name":"Rint"},{"name":"Round"},{"name":"RoundDownward"},{"name":"RoundToNearest"},{"name":"RoundTowardZero"},{"name":"RoundUpward"},{"name":"Sigmoid"},{"name":"Sin"},{"name":"Sinh"},{"name":"Sqrt"},{"name":"Tan"},{"name":"Tanh"},{"name":"Tgamma"},{"name":"Trunc"}],"Pire":[{"name":"Capture"},{"name":"Grep"},{"name":"Match"},{"name":"MultiGrep"},{"name":"MultiMatch"},{"name":"Replace"}],"Re2":[{"name":"Capture"},{"name":"Count"},{"name":"Escape"},{"name":"FindAndConsume"},{"name":"Grep"},{"name":"Match"},{"name":"Options"},{"name":"PatternFromLike"},{"name":"Replace"}],"Re2posix":[{"name":"Capture"},{"name":"Count"},{"name":"Escape"},{"name":"FindAndConsume"},{"name":"Grep"},{"name":"Match"},{"name":"Options"},{"name":"PatternFromLike"},{"name":"Replace"}],"String":[{"name":"AsciiEndsWithIgnoreCase"},{"name":"AsciiStartsWithIgnoreCase"},{"name":"AsciiToLower"},{"name":"AsciiToTitle"},{"name":"AsciiToUpper"},{"name":"Base32Decode"},{"name":"Base32Encode"},{"name":"Base32StrictDecode"},{"name":"Base64Decode"},{"name":"Base64Encode"},{"name":"Base64EncodeUrl"},{"name":"Base64StrictDecode"},{"name":"Bin"},{"name":"BinText"},{"name":"CgiEscape"},{"name":"CgiUnescape"},{"name":"Collapse"},{"name":"CollapseText"},{"name":"Contains"},{"name":"DecodeHtml"},{"name":"EncodeHtml"},{"name":"EndsWith"},{"name":"EndsWithIgnoreCase"},{"name":"EscapeC"},{"name":"FromByteList"},{"name":"HasPrefix"},{"name":"HasPrefixIgnoreCase"},{"name":"HasSuffix"},{"name":"HasSuffixIgnoreCase"},{"name":"Hex"},{"name":"HexDecode"},{"name":"HexEncode"},{"name":"HexText"},{"name":"HumanReadableBytes"},{"name":"HumanReadableDuration"},{"name":"HumanReadableQuantity"},{"name":"IsAscii"},{"name":"IsAsciiAlnum"},{"name":"IsAsciiAlpha"},{"name":"IsAsciiDigit"},{"name":"IsAsciiHex"},{"name":"IsAsciiLower"},{"name":"IsAsciiSpace"},{"name":"IsAsciiUpper"},{"name":"JoinFromList"},{"name":"LeftPad"},{"name":"LevensteinDistance"},{"name":"Prec"},{"name":"RemoveAll"},{"name":"RemoveFirst"},{"name":"RemoveLast"},{"name":"ReplaceAll"},{"name":"ReplaceFirst"},{"name":"ReplaceLast"},{"name":"RightPad"},{"name":"SBin"},{"name":"SHex"},{"name":"SplitToList"},{"name":"StartsWith"},{"name":"StartsWithIgnoreCase"},{"name":"Strip"},{"name":"ToByteList"},{"name":"UnescapeC"}],"Unicode":[{"name":"Find"},{"name":"Fold"},{"name":"FromCodePointList"},{"name":"GetLength"},{"name":"IsAlnum"},{"name":"IsAlpha"},{"name":"IsAscii"},{"name":"IsDigit"},{"name":"IsHex"},{"name":"IsLower"},{"name":"IsSpace"},{"name":"IsUnicodeSet"},{"name":"IsUpper"},{"name":"IsUtf"},{"name":"JoinFromList"},{"name":"LevensteinDistance"},{"name":"Normalize"},{"name":"NormalizeNFC"},{"name":"NormalizeNFD"},{"name":"NormalizeNFKC"},{"name":"NormalizeNFKD"},{"name":"RFind"},{"name":"RemoveAll"},{"name":"RemoveFirst"},{"name":"RemoveLast"},{"name":"ReplaceAll"},{"name":"ReplaceFirst"},{"name":"ReplaceLast"},{"name":"Reverse"},{"name":"SplitToList"},{"name":"Strip"},{"name":"Substring"},{"name":"ToCodePointList"},{"name":"ToLower"},{"name":"ToTitle"},{"name":"ToUint64"},{"name":"ToUpper"},{"name":"Translit"},{"name":"TryToUint64"}],"Url":[{"name":"BuildQueryString"},{"name":"CanBePunycodeHostName"},{"name":"CutQueryStringAndFragment"},{"name":"CutScheme"},{"name":"CutWWW"},{"name":"CutWWW2"},{"name":"Decode"},{"name":"Encode"},{"name":"ForceHostNameToPunycode"},{"name":"ForcePunycodeToHostName"},{"name":"GetCGIParam"},{"name":"GetDomain"},{"name":"GetDomainLevel"},{"name":"GetFragment"},{"name":"GetHost"},{"name":"GetHostPort"},{"name":"GetOwner"},{"name":"GetPath"},{"name":"GetPort"},{"name":"GetScheme"},{"name":"GetSchemeHost"},{"name":"GetSchemeHostPort"},{"name":"GetSignificantDomain"},{"name":"GetTLD"},{"name":"GetTail"},{"name":"HostNameToPunycode"},{"name":"IsAllowedByRobotsTxt"},{"name":"IsKnownTLD"},{"name":"IsWellKnownTLD"},{"name":"Normalize"},{"name":"NormalizeWithDefaultHttpScheme"},{"name":"Parse"},{"name":"PunycodeToHostName"},{"name":"QueryStringToDict"},{"name":"QueryStringToList"}],"Yson":[{"name":"Attributes"},{"name":"Contains"},{"name":"ConvertTo"},{"name":"ConvertToBool"},{"name":"ConvertToBoolDict"},{"name":"ConvertToBoolList"},{"name":"ConvertToDict"},{"name":"ConvertToDouble"},{"name":"ConvertToDoubleDict"},{"name":"ConvertToDoubleList"},{"name":"ConvertToInt64"},{"name":"ConvertToInt64Dict"},{"name":"ConvertToInt64List"},{"name":"ConvertToList"},{"name":"ConvertToString"},{"name":"ConvertToStringDict"},{"name":"ConvertToStringList"},{"name":"ConvertToUint64"},{"name":"ConvertToUint64Dict"},{"name":"ConvertToUint64List"},{"name":"Equals"},{"name":"From"},{"name":"GetHash"},{"name":"GetLength"},{"name":"IsBool"},{"name":"IsDict"},{"name":"IsDouble"},{"name":"IsEntity"},{"name":"IsInt64"},{"name":"IsList"},{"name":"IsString"},{"name":"IsUint64"},{"name":"Lookup"},{"name":"LookupBool"},{"name":"LookupDict"},{"name":"LookupDouble"},{"name":"LookupInt64"},{"name":"LookupList"},{"name":"LookupString"},{"name":"LookupUint64"},{"name":"Options"},{"name":"Parse"},{"name":"ParseJson"},{"name":"ParseJsonDecodeUtf8"},{"name":"Serialize"},{"name":"SerializeJson"},{"name":"SerializePretty"},{"name":"SerializeText"},{"name":"WithAttributes"},{"name":"YPath"},{"name":"YPathBool"},{"name":"YPathDict"},{"name":"YPathDouble"},{"name":"YPathInt64"},{"name":"YPathList"},{"name":"YPathString"},{"name":"YPathUint64"}],"Compress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Zlib"},{"name":"Zstd"}],"Decompress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Xz"},{"name":"Zlib"},{"name":"Zstd"}],"Protobuf":[{"name":"Parse"},{"name":"Serialize"},{"name":"TryParse"}],"Streaming":[{"name":"Process"},{"name":"ProcessInline"}],"TryDecompress":[{"name":"BZip2"},{"name":"Brotli"},{"name":"Gzip"},{"name":"Lz4"},{"name":"Lzf"},{"name":"Lzma"},{"name":"Lzo"},{"name":"Lzq"},{"name":"Snappy"},{"name":"Xz"},{"name":"Zlib"},{"name":"Zstd"}]} From 5228e09619d19beb9fd0681369d58a993862027c Mon Sep 17 00:00:00 2001 From: mikailbag Date: Tue, 6 May 2025 19:34:29 +0300 Subject: [PATCH 17/24] Update TCMalloc (Vol. 2) [nodiff:caesar] commit_hash:48312ddf31d7bece9b834e38fc4e91254815a4d0 --- contrib/libs/tcmalloc/.yandex_meta/build.ym | 62 + .../.yandex_meta/devtools.copyrights.report | 382 +- .../.yandex_meta/devtools.licenses.report | 393 +- .../tcmalloc/.yandex_meta/licenses.list.txt | 241 +- contrib/libs/tcmalloc/README.md | 35 +- contrib/libs/tcmalloc/common.inc | 28 +- contrib/libs/tcmalloc/default/ya.make | 6 +- contrib/libs/tcmalloc/no_percpu_cache/ya.make | 5 +- contrib/libs/tcmalloc/numa_256k/ya.make | 2 +- .../libs/tcmalloc/numa_large_pages/ya.make | 2 +- contrib/libs/tcmalloc/patches/010-fork.patch | 400 ++ .../libs/tcmalloc/patches/020-user-data.patch | 269 + ...ler.patch => 030-soft-limit-handler.patch} | 30 +- .../040-remove-conflicting-noexcept.patch | 13 + .../tcmalloc/patches/050-avoid-cycle.patch | 13 + .../tcmalloc/patches/060-system-headers.sh | 5 + .../900-undeprecate-rate-interval.patch | 24 + contrib/libs/tcmalloc/patches/fork.patch | 310 - contrib/libs/tcmalloc/patches/userdata.patch | 220 - contrib/libs/tcmalloc/patches/yandex.patch | 91 - contrib/libs/tcmalloc/small_but_slow/ya.make | 1 + .../libs/tcmalloc/tcmalloc/.github/CODEOWNERS | 5 + .../tcmalloc/.github/workflows/ci.yml | 63 + contrib/libs/tcmalloc/tcmalloc/BUILD | 1401 ++-- .../tcmalloc/tcmalloc/allocation_sample.cc | 59 + .../tcmalloc/tcmalloc/allocation_sample.h | 93 + .../tcmalloc/allocation_sample_test.cc | 132 + .../tcmalloc/tcmalloc/allocation_sampling.cc | 270 + .../tcmalloc/tcmalloc/allocation_sampling.h | 259 + contrib/libs/tcmalloc/tcmalloc/arena.cc | 65 +- contrib/libs/tcmalloc/tcmalloc/arena.h | 88 +- contrib/libs/tcmalloc/tcmalloc/arena_test.cc | 100 +- contrib/libs/tcmalloc/tcmalloc/background.cc | 259 +- .../tcmalloc/tcmalloc/central_freelist.cc | 306 +- .../libs/tcmalloc/tcmalloc/central_freelist.h | 647 +- .../tcmalloc/central_freelist_benchmark.cc | 38 +- .../tcmalloc/central_freelist_fuzz.cc | 161 + .../tcmalloc/central_freelist_test.cc | 887 ++- contrib/libs/tcmalloc/tcmalloc/common.cc | 182 +- contrib/libs/tcmalloc/tcmalloc/common.h | 386 +- contrib/libs/tcmalloc/tcmalloc/copts.bzl | 49 + contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc | 1088 +--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h | 2764 +++++++- .../tcmalloc/cpu_cache_activate_test.cc | 87 + .../libs/tcmalloc/tcmalloc/cpu_cache_test.cc | 1728 ++++- .../tcmalloc/deallocation_profiler.cc | 760 +++ .../tcmalloc/tcmalloc/deallocation_profiler.h | 69 + contrib/libs/tcmalloc/tcmalloc/experiment.cc | 180 +- contrib/libs/tcmalloc/tcmalloc/experiment.h | 23 +- .../tcmalloc/tcmalloc/experiment_config.h | 25 +- .../libs/tcmalloc/tcmalloc/experiment_fuzz.cc | 29 +- .../tcmalloc/experimental_56_size_class.cc | 706 -- .../experimental_pow2_below64_size_class.cc | 679 -- .../tcmalloc/experimental_pow2_size_class.cc | 412 +- .../libs/tcmalloc/tcmalloc/global_stats.cc | 1029 +++ contrib/libs/tcmalloc/tcmalloc/global_stats.h | 87 + .../tcmalloc/tcmalloc/guarded_allocations.h | 62 + .../tcmalloc/guarded_page_allocator.cc | 663 +- .../tcmalloc/guarded_page_allocator.h | 246 +- .../guarded_page_allocator_benchmark.cc | 148 +- .../guarded_page_allocator_profile_test.cc | 371 ++ .../tcmalloc/guarded_page_allocator_test.cc | 227 +- .../tcmalloc/tcmalloc/heap_profiling_test.cc | 122 - .../tcmalloc/tcmalloc/hinted_tracker_lists.h | 131 + .../tcmalloc/tcmalloc/huge_address_map.cc | 139 +- .../libs/tcmalloc/tcmalloc/huge_address_map.h | 63 +- .../tcmalloc/huge_address_map_test.cc | 20 +- .../libs/tcmalloc/tcmalloc/huge_allocator.cc | 61 +- .../libs/tcmalloc/tcmalloc/huge_allocator.h | 39 +- .../tcmalloc/tcmalloc/huge_allocator_test.cc | 142 +- contrib/libs/tcmalloc/tcmalloc/huge_cache.cc | 309 +- contrib/libs/tcmalloc/tcmalloc/huge_cache.h | 159 +- .../libs/tcmalloc/tcmalloc/huge_cache_test.cc | 800 ++- .../tcmalloc/huge_page_aware_allocator.cc | 655 +- .../tcmalloc/huge_page_aware_allocator.h | 1106 +++- .../huge_page_aware_allocator_fuzz.cc | 550 ++ .../huge_page_aware_allocator_test.cc | 1607 ++++- .../libs/tcmalloc/tcmalloc/huge_page_filler.h | 2394 ++++--- .../tcmalloc/huge_page_filler_fuzz.cc | 450 ++ .../tcmalloc/huge_page_filler_test.cc | 5651 +++++++++-------- .../tcmalloc/tcmalloc/huge_page_subrelease.h | 731 +++ .../tcmalloc/huge_page_subrelease_test.cc | 351 + contrib/libs/tcmalloc/tcmalloc/huge_pages.h | 114 +- contrib/libs/tcmalloc/tcmalloc/huge_region.h | 588 +- .../tcmalloc/tcmalloc/huge_region_fuzz.cc | 262 + .../tcmalloc/tcmalloc/huge_region_test.cc | 774 ++- contrib/libs/tcmalloc/tcmalloc/internal/BUILD | 1122 ++++ .../tcmalloc/tcmalloc/internal/affinity.cc | 99 + .../tcmalloc/tcmalloc/internal/affinity.h | 71 + .../tcmalloc/internal/affinity_test.cc | 102 + .../tcmalloc/internal/allocation_guard.cc | 24 + .../tcmalloc/internal/allocation_guard.h | 57 + .../internal/allocation_guard_test.cc | 40 + .../tcmalloc/internal/atomic_danger.h | 2 + .../tcmalloc/internal/atomic_stats_counter.h | 3 +- .../libs/tcmalloc/tcmalloc/internal/bits.h | 82 - .../tcmalloc/tcmalloc/internal/bits_test.cc | 104 - .../tcmalloc/internal/cache_topology.cc | 92 +- .../tcmalloc/internal/cache_topology.h | 38 +- .../tcmalloc/internal/cache_topology_test.cc | 33 +- .../libs/tcmalloc/tcmalloc/internal/clock.h | 1 + .../libs/tcmalloc/tcmalloc/internal/config.h | 82 +- .../tcmalloc/tcmalloc/internal/config_test.cc | 101 + .../tcmalloc/tcmalloc/internal/cpu_utils.h | 85 + .../tcmalloc/tcmalloc/internal/declarations.h | 22 +- .../tcmalloc/tcmalloc/internal/environment.cc | 2 + .../tcmalloc/tcmalloc/internal/environment.h | 1 + .../internal/explicitly_constructed.h | 63 + .../tcmalloc/internal/exponential_biased.h | 78 + .../internal/exponential_biased_test.cc | 125 + .../tcmalloc/tcmalloc/internal/fake_profile.h | 66 + .../tcmalloc/internal/lifetime_predictions.h | 252 - .../internal/lifetime_predictions_test.cc | 156 - .../tcmalloc/internal/lifetime_tracker.h | 172 - .../internal/lifetime_tracker_test.cc | 129 - .../tcmalloc/tcmalloc/internal/linked_list.h | 39 +- .../internal/linked_list_benchmark.cc | 5 +- .../tcmalloc/internal/linked_list_test.cc | 16 +- .../tcmalloc/internal/linux_syscall_support.h | 19 +- .../tcmalloc/tcmalloc/internal/logging.cc | 302 +- .../libs/tcmalloc/tcmalloc/internal/logging.h | 390 +- .../tcmalloc/internal/logging_test.cc | 243 +- .../tcmalloc/internal/memory_stats.cc | 15 +- .../tcmalloc/tcmalloc/internal/memory_stats.h | 1 + .../tcmalloc/internal/memory_stats_test.cc | 3 +- .../tcmalloc/tcmalloc/internal/memory_tag.cc | 44 + .../tcmalloc/tcmalloc/internal/memory_tag.h | 90 + .../tcmalloc/tcmalloc/internal/mincore.cc | 9 +- .../libs/tcmalloc/tcmalloc/internal/mincore.h | 1 + .../tcmalloc/internal/mincore_benchmark.cc | 12 +- .../tcmalloc/internal/mincore_test.cc | 32 +- .../internal/mismatched_delete_state.h | 122 + .../tcmalloc/tcmalloc/internal/mock_span.h | 1 + .../libs/tcmalloc/tcmalloc/internal/numa.cc | 107 +- .../libs/tcmalloc/tcmalloc/internal/numa.h | 100 +- .../tcmalloc/tcmalloc/internal/numa_test.cc | 116 +- .../tcmalloc/tcmalloc/internal/optimization.h | 28 +- .../overflow.h} | 28 +- .../tcmalloc/tcmalloc/internal/page_size.cc | 45 + .../page_size.h} | 16 +- .../tcmalloc/tcmalloc/internal/pageflags.cc | 368 ++ .../tcmalloc/tcmalloc/internal/pageflags.h | 154 + .../tcmalloc/internal/pageflags_test.cc | 571 ++ .../tcmalloc/internal/parameter_accessors.h | 85 +- .../libs/tcmalloc/tcmalloc/internal/percpu.cc | 292 +- .../libs/tcmalloc/tcmalloc/internal/percpu.h | 379 +- .../tcmalloc/internal/percpu_early_test.cc | 39 + .../tcmalloc/internal/percpu_rseq_aarch64.S | 344 +- .../tcmalloc/internal/percpu_rseq_asm.S | 43 +- .../tcmalloc/internal/percpu_rseq_ppc.S | 606 -- .../internal/percpu_rseq_unsupported.cc | 45 +- .../tcmalloc/internal/percpu_rseq_x86_64.S | 311 +- .../tcmalloc/internal/percpu_tcmalloc.h | 2136 ++++--- .../tcmalloc/internal/percpu_tcmalloc_test.cc | 1385 ++-- .../tcmalloc/tcmalloc/internal/percpu_test.cc | 74 + .../tcmalloc/tcmalloc/internal/prefetch.h | 116 + .../tcmalloc/internal/prefetch_test.cc | 72 + .../tcmalloc/tcmalloc/internal/proc_maps.cc | 19 +- .../tcmalloc/tcmalloc/internal/proc_maps.h | 14 +- .../tcmalloc/tcmalloc/internal/profile.proto | 233 + .../tcmalloc/internal/profile_builder.cc | 927 +++ .../tcmalloc/internal/profile_builder.h | 102 + .../tcmalloc/internal/profile_builder_fuzz.cc | 54 + .../profile_builder_no_tcmalloc_test.cc | 46 + .../tcmalloc/internal/profile_builder_test.cc | 1031 +++ .../tcmalloc/internal/range_tracker.h | 95 +- .../internal/range_tracker_benchmark.cc | 14 +- .../tcmalloc/internal/range_tracker_test.cc | 33 +- .../tcmalloc/tcmalloc/internal/residency.cc | 230 + .../tcmalloc/tcmalloc/internal/residency.h | 120 + .../tcmalloc/internal/residency_test.cc | 373 ++ .../tcmalloc/internal/sampled_allocation.h | 64 + .../internal/sampled_allocation_recorder.h | 262 + .../sampled_allocation_recorder_test.cc | 278 + .../internal/sampled_allocation_test.cc | 70 + .../tcmalloc/internal/stacktrace_filter.h | 161 + .../internal/stacktrace_filter_test.cc | 262 + .../tcmalloc/tcmalloc/internal/sysinfo.cc | 147 + .../libs/tcmalloc/tcmalloc/internal/sysinfo.h | 80 + .../tcmalloc/internal/sysinfo_fuzz.cc | 48 + .../tcmalloc/internal/sysinfo_test.cc | 181 + ...ized-profile_builder_fuzz-5534221534363648 | Bin 0 -> 16 bytes ...ized-profile_builder_fuzz-5647243657216000 | Bin 0 -> 16 bytes ...ized-profile_builder_fuzz-5915530833559552 | Bin 0 -> 12 bytes ...profile_builder_fuzz-6685031907328000.fuzz | Bin 0 -> 84 bytes ...h-adc83b19e793491b1c6ea0fd8b46cd9f32e592fc | 1 + .../tcmalloc/internal/timeseries_tracker.h | 31 +- .../internal/timeseries_tracker_test.cc | 35 +- .../libs/tcmalloc/tcmalloc/internal/util.cc | 77 +- .../libs/tcmalloc/tcmalloc/internal/util.h | 86 +- .../libs/tcmalloc/tcmalloc/internal/ya.make | 21 + .../tcmalloc/internal_malloc_extension.h | 62 +- .../internal_malloc_tracing_extension.h | 36 + .../tcmalloc/tcmalloc/legacy_size_classes.cc | 1356 ++-- .../libs/tcmalloc/tcmalloc/libc_override.h | 185 +- .../tcmalloc/libc_override_gcc_and_weak.h | 114 - .../tcmalloc/tcmalloc/libc_override_glibc.h | 120 - .../tcmalloc/libc_override_redefine.h | 100 - .../tcmalloc/tcmalloc/malloc_extension.cc | 536 +- .../libs/tcmalloc/tcmalloc/malloc_extension.h | 422 +- .../tcmalloc/malloc_extension_fuzz.cc | 26 +- .../tcmalloc/malloc_extension_test.cc | 67 - .../tcmalloc/malloc_tracing_extension.cc | 41 + .../tcmalloc/malloc_tracing_extension.h | 55 + .../tcmalloc/tcmalloc/metadata_allocator.h | 41 + .../tcmalloc/metadata_object_allocator.h | 142 + .../tcmalloc/mock_central_freelist.cc | 39 +- .../tcmalloc/tcmalloc/mock_central_freelist.h | 28 +- .../mock_huge_page_static_forwarder.cc | 13 + .../mock_huge_page_static_forwarder.h | 243 + .../tcmalloc/mock_metadata_allocator.h | 47 + .../tcmalloc/tcmalloc/mock_static_forwarder.h | 242 + .../tcmalloc/tcmalloc/mock_transfer_cache.cc | 10 +- .../tcmalloc/tcmalloc/mock_transfer_cache.h | 329 +- .../tcmalloc/mock_virtual_allocator.h | 70 + .../libs/tcmalloc/tcmalloc/new_extension.h | 24 + .../tcmalloc/tcmalloc/new_extension_test.cc | 485 ++ .../libs/tcmalloc/tcmalloc/page_allocator.cc | 231 +- .../libs/tcmalloc/tcmalloc/page_allocator.h | 250 +- .../tcmalloc/page_allocator_interface.cc | 58 +- .../tcmalloc/page_allocator_interface.h | 51 +- .../tcmalloc/tcmalloc/page_allocator_test.cc | 165 +- .../tcmalloc/page_allocator_test_util.h | 35 +- contrib/libs/tcmalloc/tcmalloc/page_heap.cc | 528 -- contrib/libs/tcmalloc/tcmalloc/page_heap.h | 161 - .../tcmalloc/tcmalloc/page_heap_allocator.h | 93 - .../libs/tcmalloc/tcmalloc/page_heap_test.cc | 109 - contrib/libs/tcmalloc/tcmalloc/pagemap.cc | 19 +- contrib/libs/tcmalloc/tcmalloc/pagemap.h | 190 +- .../libs/tcmalloc/tcmalloc/pagemap_test.cc | 6 +- contrib/libs/tcmalloc/tcmalloc/pages.h | 80 +- contrib/libs/tcmalloc/tcmalloc/pages_test.cc | 34 + contrib/libs/tcmalloc/tcmalloc/parameters.cc | 528 +- contrib/libs/tcmalloc/tcmalloc/parameters.h | 192 +- .../tcmalloc/tcmalloc/peak_heap_tracker.cc | 71 +- .../tcmalloc/tcmalloc/peak_heap_tracker.h | 62 +- .../tcmalloc/tcmalloc/profile_marshaler.cc | 46 + .../tcmalloc/tcmalloc/profile_marshaler.h | 33 + .../tcmalloc/profile_marshaler_test.cc | 86 + .../libs/tcmalloc/tcmalloc/profile_test.cc | 137 +- .../tcmalloc/tcmalloc/reuse_size_classes.cc | 514 ++ .../tcmalloc/tcmalloc/runtime_size_classes.cc | 81 - .../tcmalloc/tcmalloc/runtime_size_classes.h | 49 - .../tcmalloc/runtime_size_classes_test.cc | 114 - contrib/libs/tcmalloc/tcmalloc/sampler.cc | 144 +- contrib/libs/tcmalloc/tcmalloc/sampler.h | 211 +- .../libs/tcmalloc/tcmalloc/segv_handler.cc | 261 + .../{want_no_hpaa.cc => segv_handler.h} | 17 +- .../tcmalloc/tcmalloc/segv_handler_test.cc | 108 + contrib/libs/tcmalloc/tcmalloc/selsan/BUILD | 63 + .../libs/tcmalloc/tcmalloc/selsan/README.md | 25 + .../tcmalloc/tcmalloc/selsan/report_test.cc | 82 + .../libs/tcmalloc/tcmalloc/selsan/selsan.cc | 300 + .../libs/tcmalloc/tcmalloc/selsan/selsan.h | 209 + .../tcmalloc/tcmalloc/selsan/shadow_test.cc | 139 + .../libs/tcmalloc/tcmalloc/size_class_info.h | 62 +- .../libs/tcmalloc/tcmalloc/size_classes.cc | 1336 ++-- .../tcmalloc/tcmalloc/size_classes_test.cc | 298 +- ..._classes_with_runtime_size_classes_test.cc | 127 - contrib/libs/tcmalloc/tcmalloc/sizemap.cc | 289 + contrib/libs/tcmalloc/tcmalloc/sizemap.h | 322 + .../libs/tcmalloc/tcmalloc/sizemap_fuzz.cc | 65 + .../libs/tcmalloc/tcmalloc/sizemap_test.cc | 142 + contrib/libs/tcmalloc/tcmalloc/span.cc | 331 +- contrib/libs/tcmalloc/tcmalloc/span.h | 672 +- .../libs/tcmalloc/tcmalloc/span_benchmark.cc | 141 +- contrib/libs/tcmalloc/tcmalloc/span_fuzz.cc | 129 + contrib/libs/tcmalloc/tcmalloc/span_stats.h | 3 +- contrib/libs/tcmalloc/tcmalloc/span_test.cc | 186 +- .../tcmalloc/tcmalloc/stack_trace_table.cc | 182 +- .../tcmalloc/tcmalloc/stack_trace_table.h | 65 +- .../tcmalloc/stack_trace_table_test.cc | 264 +- contrib/libs/tcmalloc/tcmalloc/static_vars.cc | 198 +- contrib/libs/tcmalloc/tcmalloc/static_vars.h | 258 +- contrib/libs/tcmalloc/tcmalloc/stats.cc | 432 +- contrib/libs/tcmalloc/tcmalloc/stats.h | 185 +- contrib/libs/tcmalloc/tcmalloc/stats_test.cc | 135 +- .../libs/tcmalloc/tcmalloc/system-alloc.cc | 614 +- contrib/libs/tcmalloc/tcmalloc/system-alloc.h | 831 ++- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc | 2602 +++----- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h | 135 +- .../libs/tcmalloc/tcmalloc/tcmalloc_policy.h | 251 +- ...ase-central_freelist_fuzz-6338860943802368 | Bin 0 -> 74490 bytes ...zed-central_freelist_fuzz-5328515345809408 | Bin 0 -> 52550 bytes ...page_aware_allocator_fuzz-4796454007799808 | Bin 0 -> 824 bytes ...page_aware_allocator_fuzz-4650158169391104 | Bin 0 -> 171 bytes ...page_aware_allocator_fuzz-5216394376773632 | 5 + ...page_aware_allocator_fuzz-5397442449178624 | 1 + ...page_aware_allocator_fuzz-5580915038093312 | Bin 0 -> 53344 bytes ...page_aware_allocator_fuzz-6140744194457600 | 1 + ...page_aware_allocator_fuzz-6302517124005888 | Bin 0 -> 1454 bytes .../testcase-4507694249082880 | Bin 0 -> 4614 bytes .../testcase-4582514590875648 | 0 .../testcase-5091122805276672 | Bin 0 -> 3663 bytes .../testcase-5185382849773568 | Bin 0 -> 2749 bytes .../testcase-5781242586923008 | Bin 0 -> 22 bytes .../testcase-6591694528970752 | Bin 0 -> 22 bytes ...ase-huge_page_filler_fuzz-5476984341004288 | Bin 0 -> 48109 bytes ...uge_page_filler_fuzz-5161409228701696.test | Bin 0 -> 587 bytes ...uge_page_filler_fuzz-5516474505363456.test | Bin 0 -> 42 bytes ...uge_page_filler_fuzz-6053674183688192.test | Bin 0 -> 450 bytes ...zed-huge_page_filler_fuzz-6159120802381824 | Bin 0 -> 32 bytes ...uge_page_filler_fuzz-6512022070886400.test | Bin 0 -> 32 bytes ...zed-huge_page_filler_fuzz-6622985612820480 | Bin 0 -> 1615 bytes ...h-869dbc1cdf6a1f79b386adf046c7df32257ef684 | Bin 0 -> 24 bytes ...h-e9f3aa3ad83e808a5588ec529c6cdf00d5d397fc | Bin 0 -> 73 bytes .../testcase-6686265543557120 | 4 + .../testcase-5235702354214912 | Bin 0 -> 131 bytes .../testcase-5618130730156032 | Bin 0 -> 71 bytes ...h-4338e5c59e1bda5104fb5f0aa5553aeb1d3d6465 | 1 + ...h-c20bfc5c10e885f8e5498c2907cfab82da7c0cff | 1 + ...se-minimized-sizemap_fuzz-5240920228626432 | Bin 0 -> 24 bytes ...tcase-minimized-span_fuzz-6271015625031680 | Bin 0 -> 24 bytes ...-minimized-span_fuzz-6321706670620672.fuzz | Bin 0 -> 24 bytes ...h-01d72a40d5815461b92d3f7c0f6377fd441b0034 | Bin 0 -> 24 bytes ...h-32697afd59029eb8356fee8ba568e7f6b58d728f | Bin 0 -> 24 bytes ...h-42b80edf9551d1095aebb6724c070ee43d490125 | Bin 0 -> 24 bytes ...h-500955af6568b0ed234bd40d6a01af496ba15eb2 | Bin 0 -> 24 bytes ...h-6ef2b6ae2246d1bda0190983b1007df2699e7738 | Bin 0 -> 24 bytes ...h-746940d0368bfe3e4a94b60659eeb6cb87106618 | Bin 0 -> 24 bytes .../span_fuzz/testcase-5877384059617280 | Bin 0 -> 77 bytes ...k-0b593173f17376c77a3a74a6644af58f77d7a366 | 1 + contrib/libs/tcmalloc/tcmalloc/testing/BUILD | 977 +++ .../tcmalloc/testing/aligned_new_test.cc | 184 + .../tcmalloc/testing/background_test.cc | 73 + .../tcmalloc/testing/benchmark_main.cc | 21 + .../testing/current_allocated_bytes_test.cc | 64 + .../testing/deallocation_profiler_test.cc | 772 +++ .../testing/default_parameters_test.cc | 135 + .../tcmalloc/testing/disable_numa_test.cc | 26 + .../testing/fast_path.insecure.golden | 16 + .../tcmalloc/testing/fast_path.opt.golden | 16 + .../testing/fast_path.release+insecure.golden | 16 + .../tcmalloc/testing/fast_path.release.golden | 16 + .../testing/fast_path.unstable.release.golden | 16 + .../tcmalloc/tcmalloc/testing/frag_test.cc | 69 + .../tcmalloc/testing/get_stats_test.cc | 415 ++ .../tcmalloc/testing/heap_profiling_test.cc | 267 + .../tcmalloc/tcmalloc/testing/hello_main.cc | 62 + .../tcmalloc/testing/large_alloc_size_test.cc | 56 + .../tcmalloc/testing/largesmall_frag_test.cc | 109 + .../tcmalloc/tcmalloc/testing/limit_test.cc | 476 ++ .../malloc_extension_system_malloc_test.cc | 7 +- .../tcmalloc/testing/malloc_extension_test.cc | 192 + .../testing/malloc_tracing_extension_test.cc | 111 + .../tcmalloc/testing/markidle_test.cc | 95 + .../tcmalloc/testing/memalign_test.cc | 297 + .../tcmalloc/testing/memory_errors_test.cc | 665 ++ .../tcmalloc/tcmalloc/testing/no_deps_test.cc | 145 + .../tcmalloc/testing/numa_locality_test.cc | 244 + .../tcmalloc/testing/outofmemory_test.cc | 90 + .../tcmalloc/testing/parallel_test.cc | 90 + .../testing/peak_heap_profiling_test.cc | 156 + .../testing/profile_drop_frames_test.cc | 382 ++ .../tcmalloc/tcmalloc/testing/profile_test.cc | 229 + .../testing/realized_fragmentation_test.cc | 217 + .../tcmalloc/{ => testing}/realloc_test.cc | 4 - .../tcmalloc/tcmalloc/testing/reclaim_test.cc | 191 + .../tcmalloc/testing/releasing_test.cc | 145 + .../testing/sample_size_class_test.cc | 103 + .../tcmalloc/tcmalloc/testing/sampler_test.cc | 275 + .../testing/sampling_memusage_test.cc | 187 + .../tcmalloc/testing/sampling_test.cc | 286 + .../tcmalloc/testing/startup_size_test.cc | 75 + .../{ => testing}/system-alloc_test.cc | 129 +- .../tcmalloc/testing/tcmalloc_benchmark.cc | 503 ++ .../{ => testing}/tcmalloc_large_test.cc | 32 +- .../tcmalloc/testing/tcmalloc_test.cc | 1670 +++++ .../tcmalloc/testing/test_allocator_harness.h | 225 + .../tcmalloc/tcmalloc/testing/testutil.cc | 178 + .../libs/tcmalloc/tcmalloc/testing/testutil.h | 295 + .../tcmalloc/testing/thread_ctor_test.cc | 22 + .../tcmalloc/testing/thread_ctor_test_lib.cc | 64 + .../tcmalloc/testing/thread_manager.h | 64 + .../tcmalloc/testing/threadcachesize_test.cc | 107 + .../tcmalloc/testing/variants_test.cc | 65 + ...ble_huge_region_more_often_test_helper.cc} | 24 +- ...t_disable_tcmalloc_big_span_test_helper.cc | 32 + .../tcmalloc/testing/want_hpaa_test_helper.cc | 60 + .../want_pow2below64_size_classes_helper.cc | 39 + .../libs/tcmalloc/tcmalloc/thread_cache.cc | 196 +- contrib/libs/tcmalloc/tcmalloc/thread_cache.h | 179 +- .../tcmalloc/tcmalloc/thread_cache_test.cc | 34 +- contrib/libs/tcmalloc/tcmalloc/tracking.h | 109 - .../libs/tcmalloc/tcmalloc/transfer_cache.cc | 134 +- .../libs/tcmalloc/tcmalloc/transfer_cache.h | 613 +- .../tcmalloc/transfer_cache_benchmark.cc | 128 +- .../tcmalloc/tcmalloc/transfer_cache_fuzz.cc | 70 +- .../tcmalloc/transfer_cache_internals.h | 881 +-- .../tcmalloc/tcmalloc/transfer_cache_stats.h | 8 +- .../tcmalloc/tcmalloc/transfer_cache_test.cc | 763 ++- contrib/libs/tcmalloc/tcmalloc/variants.bzl | 397 ++ ...spans.cc => want_disable_dynamic_slabs.cc} | 10 +- .../want_disable_huge_region_more_often.cc | 28 + .../want_disable_tcmalloc_big_span.cc | 26 + contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc | 4 +- .../tcmalloc/want_legacy_size_classes.cc | 25 + .../tcmalloc/want_legacy_size_classes_test.cc | 46 + contrib/libs/tcmalloc/ya.make | 10 +- .../yatest_common/yatest/common/process.py | 4 +- yt/yt/library/ytprof/profile.cpp | 2 +- 401 files changed, 63094 insertions(+), 26324 deletions(-) create mode 100644 contrib/libs/tcmalloc/.yandex_meta/build.ym create mode 100644 contrib/libs/tcmalloc/patches/010-fork.patch create mode 100644 contrib/libs/tcmalloc/patches/020-user-data.patch rename contrib/libs/tcmalloc/patches/{handler.patch => 030-soft-limit-handler.patch} (58%) create mode 100644 contrib/libs/tcmalloc/patches/040-remove-conflicting-noexcept.patch create mode 100644 contrib/libs/tcmalloc/patches/050-avoid-cycle.patch create mode 100644 contrib/libs/tcmalloc/patches/060-system-headers.sh create mode 100644 contrib/libs/tcmalloc/patches/900-undeprecate-rate-interval.patch delete mode 100644 contrib/libs/tcmalloc/patches/fork.patch delete mode 100644 contrib/libs/tcmalloc/patches/userdata.patch delete mode 100644 contrib/libs/tcmalloc/patches/yandex.patch create mode 100644 contrib/libs/tcmalloc/tcmalloc/.github/CODEOWNERS create mode 100644 contrib/libs/tcmalloc/tcmalloc/.github/workflows/ci.yml create mode 100644 contrib/libs/tcmalloc/tcmalloc/allocation_sample.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/allocation_sample.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/allocation_sample_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/allocation_sampling.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/allocation_sampling.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/central_freelist_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/copts.bzl create mode 100644 contrib/libs/tcmalloc/tcmalloc/cpu_cache_activate_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc delete mode 100755 contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc mode change 100755 => 100644 contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/global_stats.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/global_stats.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/guarded_allocations.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_profile_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/hinted_tracker_lists.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/huge_page_filler_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/huge_region_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/BUILD create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/affinity.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/affinity.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/affinity_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/bits.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/config_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/cpu_utils.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/explicitly_constructed.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased.h create mode 100755 contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/fake_profile.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/mismatched_delete_state.h rename contrib/libs/tcmalloc/tcmalloc/{noruntime_size_classes.cc => internal/overflow.h} (58%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/page_size.cc rename contrib/libs/tcmalloc/tcmalloc/{want_hpaa_subrelease.cc => internal/page_size.h} (75%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/pageflags.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/pageflags.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/pageflags_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/percpu_early_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/percpu_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/prefetch.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/prefetch_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile.proto create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_no_tcmalloc_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/residency.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/residency.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/residency_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5534221534363648 create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5647243657216000 create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5915530833559552 create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-6685031907328000.fuzz create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/crash-adc83b19e793491b1c6ea0fd8b46cd9f32e592fc create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal/ya.make create mode 100644 contrib/libs/tcmalloc/tcmalloc/internal_malloc_tracing_extension.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/metadata_allocator.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/metadata_object_allocator.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/mock_metadata_allocator.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/mock_static_forwarder.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/mock_virtual_allocator.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/new_extension.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/new_extension_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/page_heap.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/page_heap.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/pages_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/profile_marshaler.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/profile_marshaler.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/profile_marshaler_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/reuse_size_classes.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h delete mode 100644 contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/segv_handler.cc rename contrib/libs/tcmalloc/tcmalloc/{want_no_hpaa.cc => segv_handler.h} (68%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/segv_handler_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/BUILD create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/README.md create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/report_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/selsan.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/selsan.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/selsan/shadow_test.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/sizemap.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/sizemap.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/sizemap_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/sizemap_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/span_fuzz.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/central_freelist_fuzz/clusterfuzz-testcase-central_freelist_fuzz-6338860943802368 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/central_freelist_fuzz/clusterfuzz-testcase-minimized-central_freelist_fuzz-5328515345809408 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-4796454007799808 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-4650158169391104 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-5216394376773632 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-5397442449178624 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-5580915038093312 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-6140744194457600 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/clusterfuzz-testcase-minimized-huge_page_aware_allocator_fuzz-6302517124005888 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-4507694249082880 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-4582514590875648 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-5091122805276672 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-5185382849773568 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-5781242586923008 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_aware_allocator_fuzz/testcase-6591694528970752 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-huge_page_filler_fuzz-5476984341004288 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-5161409228701696.test create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-5516474505363456.test create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-6053674183688192.test create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-6159120802381824 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-6512022070886400.test create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/clusterfuzz-testcase-minimized-huge_page_filler_fuzz-6622985612820480 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/crash-869dbc1cdf6a1f79b386adf046c7df32257ef684 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/crash-e9f3aa3ad83e808a5588ec529c6cdf00d5d397fc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_page_filler_fuzz/testcase-6686265543557120 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_region_fuzz/testcase-5235702354214912 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/huge_region_fuzz/testcase-5618130730156032 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/malloc_extension_fuzz/crash-4338e5c59e1bda5104fb5f0aa5553aeb1d3d6465 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/malloc_extension_fuzz/crash-c20bfc5c10e885f8e5498c2907cfab82da7c0cff create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/sizemap_fuzz/clusterfuzz-testcase-minimized-sizemap_fuzz-5240920228626432 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/clusterfuzz-testcase-minimized-span_fuzz-6271015625031680 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/clusterfuzz-testcase-minimized-span_fuzz-6321706670620672.fuzz create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-01d72a40d5815461b92d3f7c0f6377fd441b0034 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-32697afd59029eb8356fee8ba568e7f6b58d728f create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-42b80edf9551d1095aebb6724c070ee43d490125 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-500955af6568b0ed234bd40d6a01af496ba15eb2 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-6ef2b6ae2246d1bda0190983b1007df2699e7738 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/crash-746940d0368bfe3e4a94b60659eeb6cb87106618 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/span_fuzz/testcase-5877384059617280 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testdata/transfer_cache_fuzz/leak-0b593173f17376c77a3a74a6644af58f77d7a366 create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/BUILD create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/aligned_new_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/background_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/benchmark_main.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/current_allocated_bytes_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/deallocation_profiler_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/default_parameters_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/disable_numa_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/fast_path.insecure.golden create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/fast_path.opt.golden create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/fast_path.release+insecure.golden create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/fast_path.release.golden create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/fast_path.unstable.release.golden create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/frag_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/get_stats_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/heap_profiling_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/hello_main.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/large_alloc_size_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/largesmall_frag_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/limit_test.cc rename contrib/libs/tcmalloc/tcmalloc/{ => testing}/malloc_extension_system_malloc_test.cc (95%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/malloc_extension_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/malloc_tracing_extension_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/markidle_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/memalign_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/memory_errors_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/no_deps_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/numa_locality_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/outofmemory_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/parallel_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/peak_heap_profiling_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/profile_drop_frames_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/profile_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/realized_fragmentation_test.cc rename contrib/libs/tcmalloc/tcmalloc/{ => testing}/realloc_test.cc (96%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/reclaim_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/releasing_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/sample_size_class_test.cc create mode 100755 contrib/libs/tcmalloc/tcmalloc/testing/sampler_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/sampling_memusage_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/sampling_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/startup_size_test.cc rename contrib/libs/tcmalloc/tcmalloc/{ => testing}/system-alloc_test.cc (53%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/tcmalloc_benchmark.cc rename contrib/libs/tcmalloc/tcmalloc/{ => testing}/tcmalloc_large_test.cc (85%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/tcmalloc_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/test_allocator_harness.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/testutil.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/testutil.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/thread_ctor_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/thread_ctor_test_lib.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/thread_manager.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/threadcachesize_test.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/variants_test.cc rename contrib/libs/tcmalloc/tcmalloc/{runtime_size_classes_fuzz.cc => testing/want_disable_huge_region_more_often_test_helper.cc} (53%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/want_disable_tcmalloc_big_span_test_helper.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/want_hpaa_test_helper.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/testing/want_pow2below64_size_classes_helper.cc delete mode 100644 contrib/libs/tcmalloc/tcmalloc/tracking.h create mode 100644 contrib/libs/tcmalloc/tcmalloc/variants.bzl rename contrib/libs/tcmalloc/tcmalloc/{want_legacy_spans.cc => want_disable_dynamic_slabs.cc} (71%) create mode 100644 contrib/libs/tcmalloc/tcmalloc/want_disable_huge_region_more_often.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/want_disable_tcmalloc_big_span.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/want_legacy_size_classes.cc create mode 100644 contrib/libs/tcmalloc/tcmalloc/want_legacy_size_classes_test.cc diff --git a/contrib/libs/tcmalloc/.yandex_meta/build.ym b/contrib/libs/tcmalloc/.yandex_meta/build.ym new file mode 100644 index 000000000000..b6bf9d0f75ac --- /dev/null +++ b/contrib/libs/tcmalloc/.yandex_meta/build.ym @@ -0,0 +1,62 @@ +{% extends '//builtin/bag.ym' %} + +{% block current_version %}c8dfee3e4c489c5ae0d30c484c92db102a69ec51{% endblock %} + +{% block current_url %} +https://github.com/google/tcmalloc/archive/{{self.version().strip()}}.tar.gz +{% endblock %} + +{% block current_date %}2025-01-30{% endblock %} + +{% block keep_sources %} +common.inc +default/ya.make +dynamic/ya.make +malloc_extension/ya.make +no_percpu_cache/ya.make +no_percpu_cache/aligned_alloc.c +numa_256k/ya.make +numa_large_pages/ya.make +small_but_slow/ya.make +tcmalloc/internal/ya.make +{% endblock %} + +{% block ya_make %} +SUBSCRIBER( + g:cpp-contrib + ayles + mikailbag +) + +SRCS( + # Options + tcmalloc/want_hpaa.cc +) + +INCLUDE(common.inc) + +CFLAGS( + -DTCMALLOC_256K_PAGES +) +{% endblock %} + +{% block ya_make_suffix %} +IF (NOT DLL_FOR) + RECURSE( + default + dynamic + malloc_extension + no_percpu_cache + numa_256k + numa_large_pages + small_but_slow + tcmalloc/internal + ) +ENDIF() +{% endblock %} + +{% block move_to_output %} +{{super()}} +cp -R tcmalloc common.inc ${OUTPUT} +cp no_percpu_cache/aligned_alloc.c ${OUTPUT}/no_percpu_cache/ +{% endblock %} diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report index 7acc7e09c852..33350ad4e7c7 100644 --- a/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report +++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report @@ -29,6 +29,20 @@ # FILE_INCLUDE - include all file data into licenses text file # ======================= +KEEP COPYRIGHT_SERVICE_LABEL 05bdd09fb9fdb384a61f2eb54df462d6 +BELONGS ya.make + License text: + // Copyright 2016 Google Inc. All Rights Reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + tcmalloc/internal/profile.proto [1:1] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make + KEEP COPYRIGHT_SERVICE_LABEL 279545394b5ad4b6b26c0686ac5f9921 BELONGS ya.make License text: @@ -40,161 +54,195 @@ BELONGS ya.make Files with this license: tcmalloc/BUILD [1:1] tcmalloc/arena.cc [1:1] - tcmalloc/arena.h [1:1] + tcmalloc/arena.h [2:2] tcmalloc/background.cc [1:1] tcmalloc/central_freelist.cc [1:1] - tcmalloc/central_freelist.h [1:1] + tcmalloc/central_freelist.h [2:2] tcmalloc/central_freelist_test.cc [1:1] tcmalloc/common.cc [1:1] - tcmalloc/common.h [1:1] + tcmalloc/common.h [2:2] + tcmalloc/copts.bzl [1:1] tcmalloc/cpu_cache.cc [1:1] - tcmalloc/cpu_cache.h [1:1] + tcmalloc/cpu_cache.h [2:2] tcmalloc/cpu_cache_test.cc [1:1] tcmalloc/experiment.cc [1:1] - tcmalloc/experiment.h [1:1] - tcmalloc/experiment_config.h [1:1] + tcmalloc/experiment.h [2:2] + tcmalloc/experiment_config.h [2:2] tcmalloc/experiment_config_test.cc [1:1] tcmalloc/experiment_fuzz.cc [1:1] - tcmalloc/experimental_56_size_class.cc [1:1] - tcmalloc/experimental_pow2_below64_size_class.cc [1:1] tcmalloc/experimental_pow2_size_class.cc [1:1] + tcmalloc/global_stats.cc [1:1] + tcmalloc/global_stats.h [2:2] tcmalloc/guarded_page_allocator.cc [1:1] - tcmalloc/guarded_page_allocator.h [1:1] + tcmalloc/guarded_page_allocator.h [2:2] tcmalloc/guarded_page_allocator_benchmark.cc [1:1] + tcmalloc/guarded_page_allocator_profile_test.cc [1:1] tcmalloc/guarded_page_allocator_test.cc [1:1] - tcmalloc/heap_profiling_test.cc [1:1] tcmalloc/huge_address_map.cc [1:1] - tcmalloc/huge_address_map.h [1:1] + tcmalloc/huge_address_map.h [2:2] tcmalloc/huge_address_map_test.cc [1:1] tcmalloc/huge_allocator.cc [1:1] - tcmalloc/huge_allocator.h [1:1] + tcmalloc/huge_allocator.h [2:2] tcmalloc/huge_allocator_test.cc [1:1] tcmalloc/huge_cache.cc [1:1] - tcmalloc/huge_cache.h [1:1] + tcmalloc/huge_cache.h [2:2] tcmalloc/huge_cache_test.cc [1:1] tcmalloc/huge_page_aware_allocator.cc [1:1] - tcmalloc/huge_page_aware_allocator.h [1:1] + tcmalloc/huge_page_aware_allocator.h [2:2] tcmalloc/huge_page_aware_allocator_test.cc [1:1] - tcmalloc/huge_page_filler.h [1:1] + tcmalloc/huge_page_filler.h [2:2] tcmalloc/huge_page_filler_test.cc [1:1] - tcmalloc/huge_pages.h [1:1] - tcmalloc/huge_region.h [1:1] + tcmalloc/huge_page_subrelease.h [2:2] + tcmalloc/huge_page_subrelease_test.cc [1:1] + tcmalloc/huge_pages.h [2:2] + tcmalloc/huge_region.h [2:2] tcmalloc/huge_region_test.cc [1:1] - tcmalloc/internal/atomic_danger.h [1:1] - tcmalloc/internal/atomic_stats_counter.h [1:1] - tcmalloc/internal/bits.h [1:1] - tcmalloc/internal/bits_test.cc [1:1] - tcmalloc/internal/config.h [1:1] - tcmalloc/internal/declarations.h [1:1] + tcmalloc/internal/BUILD [1:1] + tcmalloc/internal/affinity.cc [1:1] + tcmalloc/internal/affinity.h [2:2] + tcmalloc/internal/atomic_danger.h [2:2] + tcmalloc/internal/atomic_stats_counter.h [2:2] + tcmalloc/internal/config.h [2:2] + tcmalloc/internal/declarations.h [2:2] tcmalloc/internal/environment.cc [1:1] - tcmalloc/internal/environment.h [1:1] + tcmalloc/internal/environment.h [2:2] tcmalloc/internal/environment_test.cc [1:1] - tcmalloc/internal/lifetime_predictions_test.cc [1:1] - tcmalloc/internal/lifetime_tracker_test.cc [1:1] - tcmalloc/internal/linked_list.h [1:1] + tcmalloc/internal/explicitly_constructed.h [2:2] + tcmalloc/internal/exponential_biased.h [2:2] + tcmalloc/internal/exponential_biased_test.cc [1:1] + tcmalloc/internal/linked_list.h [2:2] tcmalloc/internal/linked_list_benchmark.cc [1:1] tcmalloc/internal/linked_list_test.cc [1:1] - tcmalloc/internal/linux_syscall_support.h [1:1] + tcmalloc/internal/linux_syscall_support.h [2:2] tcmalloc/internal/logging.cc [1:1] - tcmalloc/internal/logging.h [1:1] + tcmalloc/internal/logging.h [2:2] tcmalloc/internal/logging_test.cc [1:1] tcmalloc/internal/memory_stats.cc [1:1] - tcmalloc/internal/memory_stats.h [1:1] + tcmalloc/internal/memory_stats.h [2:2] tcmalloc/internal/memory_stats_test.cc [1:1] tcmalloc/internal/mincore.cc [1:1] - tcmalloc/internal/mincore.h [1:1] + tcmalloc/internal/mincore.h [2:2] tcmalloc/internal/mincore_benchmark.cc [1:1] tcmalloc/internal/mincore_test.cc [1:1] - tcmalloc/internal/mock_span.h [1:1] - tcmalloc/internal/parameter_accessors.h [1:1] + tcmalloc/internal/mock_span.h [2:2] + tcmalloc/internal/parameter_accessors.h [2:2] tcmalloc/internal/percpu.cc [1:1] - tcmalloc/internal/percpu.h [1:1] + tcmalloc/internal/percpu.h [2:2] tcmalloc/internal/percpu_rseq_asm.S [1:1] - tcmalloc/internal/percpu_rseq_ppc.S [2:2] tcmalloc/internal/percpu_rseq_unsupported.cc [1:1] tcmalloc/internal/percpu_rseq_x86_64.S [2:2] - tcmalloc/internal/percpu_tcmalloc.h [1:1] + tcmalloc/internal/percpu_tcmalloc.h [2:2] tcmalloc/internal/percpu_tcmalloc_test.cc [1:1] tcmalloc/internal/proc_maps.cc [1:1] - tcmalloc/internal/proc_maps.h [1:1] - tcmalloc/internal/range_tracker.h [1:1] + tcmalloc/internal/proc_maps.h [2:2] + tcmalloc/internal/range_tracker.h [2:2] tcmalloc/internal/range_tracker_benchmark.cc [1:1] tcmalloc/internal/range_tracker_test.cc [1:1] - tcmalloc/internal/timeseries_tracker.h [1:1] + tcmalloc/internal/residency.cc [1:1] + tcmalloc/internal/residency.h [2:2] + tcmalloc/internal/residency_test.cc [1:1] + tcmalloc/internal/timeseries_tracker.h [2:2] tcmalloc/internal/timeseries_tracker_test.cc [1:1] tcmalloc/internal/util.cc [1:1] - tcmalloc/internal/util.h [1:1] - tcmalloc/internal_malloc_extension.h [1:1] + tcmalloc/internal/util.h [2:2] + tcmalloc/internal_malloc_extension.h [2:2] tcmalloc/legacy_size_classes.cc [1:1] - tcmalloc/libc_override.h [1:1] - tcmalloc/libc_override_gcc_and_weak.h [1:1] - tcmalloc/libc_override_glibc.h [1:1] - tcmalloc/libc_override_redefine.h [1:1] + tcmalloc/libc_override.h [2:2] tcmalloc/malloc_extension.cc [1:1] - tcmalloc/malloc_extension.h [1:1] + tcmalloc/malloc_extension.h [2:2] tcmalloc/malloc_extension_fuzz.cc [1:1] - tcmalloc/malloc_extension_system_malloc_test.cc [1:1] - tcmalloc/malloc_extension_test.cc [1:1] - tcmalloc/noruntime_size_classes.cc [1:1] + tcmalloc/metadata_object_allocator.h [2:2] tcmalloc/page_allocator.cc [1:1] - tcmalloc/page_allocator.h [1:1] + tcmalloc/page_allocator.h [2:2] tcmalloc/page_allocator_interface.cc [1:1] - tcmalloc/page_allocator_interface.h [1:1] + tcmalloc/page_allocator_interface.h [2:2] tcmalloc/page_allocator_test.cc [1:1] - tcmalloc/page_allocator_test_util.h [1:1] - tcmalloc/page_heap.cc [1:1] - tcmalloc/page_heap.h [1:1] - tcmalloc/page_heap_allocator.h [1:1] - tcmalloc/page_heap_test.cc [1:1] + tcmalloc/page_allocator_test_util.h [2:2] tcmalloc/pagemap.cc [1:1] - tcmalloc/pagemap.h [1:1] + tcmalloc/pagemap.h [3:3] tcmalloc/pagemap_test.cc [1:1] - tcmalloc/pages.h [1:1] + tcmalloc/pages.h [2:2] tcmalloc/parameters.cc [1:1] - tcmalloc/parameters.h [1:1] + tcmalloc/parameters.h [2:2] tcmalloc/peak_heap_tracker.cc [1:1] - tcmalloc/peak_heap_tracker.h [1:1] + tcmalloc/peak_heap_tracker.h [2:2] tcmalloc/profile_test.cc [1:1] - tcmalloc/realloc_test.cc [1:1] - tcmalloc/runtime_size_classes.cc [1:1] - tcmalloc/runtime_size_classes.h [1:1] - tcmalloc/runtime_size_classes_fuzz.cc [1:1] - tcmalloc/runtime_size_classes_test.cc [1:1] + tcmalloc/reuse_size_classes.cc [1:1] tcmalloc/sampler.cc [1:1] - tcmalloc/sampler.h [1:1] - tcmalloc/size_class_info.h [1:1] + tcmalloc/sampler.h [2:2] + tcmalloc/segv_handler.cc [1:1] + tcmalloc/segv_handler.h [2:2] + tcmalloc/segv_handler_test.cc [1:1] + tcmalloc/size_class_info.h [2:2] tcmalloc/size_classes.cc [1:1] tcmalloc/size_classes_test.cc [1:1] - tcmalloc/size_classes_with_runtime_size_classes_test.cc [1:1] + tcmalloc/sizemap_test.cc [1:1] tcmalloc/span.cc [1:1] - tcmalloc/span.h [1:1] + tcmalloc/span.h [2:2] tcmalloc/span_benchmark.cc [1:1] - tcmalloc/span_stats.h [1:1] + tcmalloc/span_stats.h [2:2] tcmalloc/span_test.cc [1:1] tcmalloc/stack_trace_table.cc [1:1] - tcmalloc/stack_trace_table.h [1:1] + tcmalloc/stack_trace_table.h [2:2] tcmalloc/stack_trace_table_test.cc [1:1] tcmalloc/static_vars.cc [1:1] - tcmalloc/static_vars.h [1:1] + tcmalloc/static_vars.h [2:2] tcmalloc/stats.cc [1:1] - tcmalloc/stats.h [1:1] + tcmalloc/stats.h [2:2] tcmalloc/stats_test.cc [1:1] tcmalloc/system-alloc.cc [1:1] - tcmalloc/system-alloc.h [1:1] - tcmalloc/system-alloc_test.cc [1:1] + tcmalloc/system-alloc.h [2:2] tcmalloc/tcmalloc.cc [1:1] - tcmalloc/tcmalloc.h [1:1] - tcmalloc/tcmalloc_large_test.cc [1:1] - tcmalloc/tcmalloc_policy.h [1:1] + tcmalloc/tcmalloc.h [2:2] + tcmalloc/tcmalloc_policy.h [2:2] + tcmalloc/testing/BUILD [1:1] + tcmalloc/testing/aligned_new_test.cc [1:1] + tcmalloc/testing/current_allocated_bytes_test.cc [1:1] + tcmalloc/testing/default_parameters_test.cc [1:1] + tcmalloc/testing/frag_test.cc [1:1] + tcmalloc/testing/get_stats_test.cc [1:1] + tcmalloc/testing/heap_profiling_test.cc [1:1] + tcmalloc/testing/hello_main.cc [1:1] + tcmalloc/testing/large_alloc_size_test.cc [1:1] + tcmalloc/testing/largesmall_frag_test.cc [1:1] + tcmalloc/testing/limit_test.cc [1:1] + tcmalloc/testing/malloc_extension_system_malloc_test.cc [1:1] + tcmalloc/testing/malloc_extension_test.cc [1:1] + tcmalloc/testing/markidle_test.cc [1:1] + tcmalloc/testing/memalign_test.cc [1:1] + tcmalloc/testing/memory_errors_test.cc [1:1] + tcmalloc/testing/no_deps_test.cc [1:1] + tcmalloc/testing/outofmemory_test.cc [1:1] + tcmalloc/testing/peak_heap_profiling_test.cc [1:1] + tcmalloc/testing/realloc_test.cc [1:1] + tcmalloc/testing/releasing_test.cc [1:1] + tcmalloc/testing/sample_size_class_test.cc [1:1] + tcmalloc/testing/sampler_test.cc [1:1] + tcmalloc/testing/sampling_memusage_test.cc [1:1] + tcmalloc/testing/sampling_test.cc [1:1] + tcmalloc/testing/startup_size_test.cc [1:1] + tcmalloc/testing/system-alloc_test.cc [1:1] + tcmalloc/testing/tcmalloc_benchmark.cc [1:1] + tcmalloc/testing/tcmalloc_large_test.cc [1:1] + tcmalloc/testing/tcmalloc_test.cc [1:1] + tcmalloc/testing/testutil.cc [1:1] + tcmalloc/testing/testutil.h [2:2] + tcmalloc/testing/thread_ctor_test.cc [1:1] + tcmalloc/testing/thread_ctor_test_lib.cc [1:1] + tcmalloc/testing/threadcachesize_test.cc [1:1] + tcmalloc/testing/want_disable_huge_region_more_often_test_helper.cc [1:1] + tcmalloc/testing/want_disable_tcmalloc_big_span_test_helper.cc [1:1] + tcmalloc/testing/want_hpaa_test_helper.cc [1:1] + tcmalloc/testing/want_pow2below64_size_classes_helper.cc [1:1] tcmalloc/thread_cache.cc [1:1] - tcmalloc/thread_cache.h [1:1] + tcmalloc/thread_cache.h [2:2] tcmalloc/thread_cache_test.cc [1:1] - tcmalloc/tracking.h [1:1] tcmalloc/transfer_cache.cc [1:1] - tcmalloc/transfer_cache.h [1:1] + tcmalloc/transfer_cache.h [2:2] + tcmalloc/variants.bzl [1:1] tcmalloc/want_hpaa.cc [1:1] - tcmalloc/want_hpaa_subrelease.cc [1:1] - tcmalloc/want_no_hpaa.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make KEEP COPYRIGHT_SERVICE_LABEL 2f85f99f6e6cdec04f6948d273430658 BELONGS ya.make @@ -207,15 +255,47 @@ BELONGS ya.make Files with this license: tcmalloc/arena_test.cc [1:1] tcmalloc/central_freelist_benchmark.cc [1:1] + tcmalloc/cpu_cache_activate_test.cc [1:1] tcmalloc/internal/cache_topology.cc [1:1] - tcmalloc/internal/cache_topology.h [1:1] + tcmalloc/internal/cache_topology.h [2:2] tcmalloc/internal/cache_topology_test.cc [1:1] - tcmalloc/internal/clock.h [1:1] + tcmalloc/internal/clock.h [2:2] + tcmalloc/internal/fake_profile.h [2:2] tcmalloc/internal/logging_test_helper.cc [1:1] tcmalloc/internal/numa.cc [1:1] - tcmalloc/internal/numa.h [1:1] + tcmalloc/internal/numa.h [2:2] tcmalloc/internal/numa_test.cc [1:1] + tcmalloc/internal/profile_builder.cc [1:1] + tcmalloc/internal/profile_builder.h [2:2] + tcmalloc/internal/profile_builder_test.cc [1:1] + tcmalloc/internal/sampled_allocation.h [2:2] + tcmalloc/internal/sampled_allocation_test.cc [1:1] + tcmalloc/mock_static_forwarder.h [2:2] + tcmalloc/profile_marshaler.cc [1:1] + tcmalloc/profile_marshaler.h [2:2] + tcmalloc/profile_marshaler_test.cc [1:1] + tcmalloc/testing/numa_locality_test.cc [1:1] + tcmalloc/testing/profile_test.cc [1:1] + tcmalloc/testing/reclaim_test.cc [1:1] + tcmalloc/testing/test_allocator_harness.h [2:2] tcmalloc/want_numa_aware.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make + +KEEP COPYRIGHT_SERVICE_LABEL 3fb410b721d46624abdaeb2473ffa5d6 +BELONGS ya.make + License text: + // Copyright 2018 The Abseil Authors. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + tcmalloc/internal/sampled_allocation_recorder.h [2:2] + tcmalloc/internal/sampled_allocation_recorder_test.cc [1:1] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make KEEP COPYRIGHT_SERVICE_LABEL 62f2df7d02ddf07de59d1a4e25e663aa BELONGS ya.make @@ -226,20 +306,107 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - tcmalloc/internal/lifetime_predictions.h [1:1] - tcmalloc/internal/lifetime_tracker.h [1:1] - tcmalloc/internal/optimization.h [1:1] + tcmalloc/central_freelist_fuzz.cc [1:1] + tcmalloc/internal/config_test.cc [1:1] + tcmalloc/internal/optimization.h [2:2] tcmalloc/internal/percpu_rseq_aarch64.S [2:2] tcmalloc/mock_central_freelist.cc [1:1] - tcmalloc/mock_central_freelist.h [1:1] + tcmalloc/mock_central_freelist.h [2:2] tcmalloc/mock_transfer_cache.cc [1:1] - tcmalloc/mock_transfer_cache.h [1:1] + tcmalloc/mock_transfer_cache.h [2:2] + tcmalloc/new_extension.h [2:2] + tcmalloc/new_extension_test.cc [1:1] + tcmalloc/testing/benchmark_main.cc [1:1] + tcmalloc/testing/thread_manager.h [2:2] tcmalloc/transfer_cache_benchmark.cc [1:1] tcmalloc/transfer_cache_fuzz.cc [1:1] - tcmalloc/transfer_cache_internals.h [1:1] - tcmalloc/transfer_cache_stats.h [1:1] + tcmalloc/transfer_cache_internals.h [2:2] + tcmalloc/transfer_cache_stats.h [2:2] tcmalloc/transfer_cache_test.cc [1:1] - tcmalloc/want_legacy_spans.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make + +KEEP COPYRIGHT_SERVICE_LABEL 6ce13c4573adc84fa643b80888b20c2f +BELONGS ya.make + License text: + // Copyright 2023 The TCMalloc Authors + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + tcmalloc/guarded_allocations.h [2:2] + tcmalloc/internal/allocation_guard.cc [1:1] + tcmalloc/internal/allocation_guard.h [2:2] + tcmalloc/internal/allocation_guard_test.cc [1:1] + tcmalloc/internal/cpu_utils.h [2:2] + tcmalloc/internal/pageflags.cc [1:1] + tcmalloc/internal/pageflags.h [2:2] + tcmalloc/internal/pageflags_test.cc [1:1] + tcmalloc/internal/sysinfo.cc [1:1] + tcmalloc/internal/sysinfo.h [2:2] + tcmalloc/internal/sysinfo_test.cc [1:1] + tcmalloc/metadata_allocator.h [2:2] + tcmalloc/mock_huge_page_static_forwarder.cc [1:1] + tcmalloc/mock_huge_page_static_forwarder.h [2:2] + tcmalloc/mock_metadata_allocator.h [2:2] + tcmalloc/mock_virtual_allocator.h [2:2] + tcmalloc/pages_test.cc [1:1] + tcmalloc/testing/background_test.cc [1:1] + tcmalloc/testing/parallel_test.cc [1:1] + tcmalloc/testing/profile_drop_frames_test.cc [1:1] + tcmalloc/testing/variants_test.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make + +KEEP COPYRIGHT_SERVICE_LABEL 8f32d6982020583277a589c31d7b9283 +BELONGS ya.make + License text: + // Copyright 2022 The TCMalloc Authors + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + tcmalloc/.github/workflows/ci.yml [1:1] + tcmalloc/allocation_sample.cc [1:1] + tcmalloc/allocation_sample.h [2:2] + tcmalloc/allocation_sample_test.cc [1:1] + tcmalloc/allocation_sampling.cc [1:1] + tcmalloc/allocation_sampling.h [2:2] + tcmalloc/deallocation_profiler.cc [1:1] + tcmalloc/deallocation_profiler.h [2:2] + tcmalloc/hinted_tracker_lists.h [2:2] + tcmalloc/huge_page_aware_allocator_fuzz.cc [1:1] + tcmalloc/huge_page_filler_fuzz.cc [1:1] + tcmalloc/huge_region_fuzz.cc [1:1] + tcmalloc/internal/affinity_test.cc [1:1] + tcmalloc/internal/overflow.h [2:2] + tcmalloc/internal/page_size.cc [1:1] + tcmalloc/internal/page_size.h [2:2] + tcmalloc/internal/prefetch.h [2:2] + tcmalloc/internal/prefetch_test.cc [1:1] + tcmalloc/internal/profile_builder_fuzz.cc [1:1] + tcmalloc/internal/stacktrace_filter.h [2:2] + tcmalloc/internal/stacktrace_filter_test.cc [1:1] + tcmalloc/internal/sysinfo_fuzz.cc [1:1] + tcmalloc/internal_malloc_tracing_extension.h [2:2] + tcmalloc/malloc_tracing_extension.cc [1:1] + tcmalloc/malloc_tracing_extension.h [2:2] + tcmalloc/sizemap.cc [1:1] + tcmalloc/sizemap.h [2:2] + tcmalloc/sizemap_fuzz.cc [1:1] + tcmalloc/span_fuzz.cc [1:1] + tcmalloc/testing/deallocation_profiler_test.cc [1:1] + tcmalloc/testing/malloc_tracing_extension_test.cc [1:1] + tcmalloc/testing/realized_fragmentation_test.cc [1:1] + tcmalloc/want_disable_dynamic_slabs.cc [1:1] + tcmalloc/want_disable_huge_region_more_often.cc [1:1] + tcmalloc/want_disable_tcmalloc_big_span.cc [1:1] + tcmalloc/want_legacy_size_classes.cc [1:1] + tcmalloc/want_legacy_size_classes_test.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make SKIP COPYRIGHT_SERVICE_LABEL b7c6499c855f04bbe7161fc4de3a41d6 BELONGS ya.make @@ -252,4 +419,29 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - tcmalloc/huge_region_test.cc [433:435] + tcmalloc/huge_region_test.cc [582:584] + tcmalloc/huge_region_test.cc [614:616] + +KEEP COPYRIGHT_SERVICE_LABEL d4c3b5c5d59dc3fc89f75c05560b08bd +BELONGS ya.make + License text: + // Copyright 2024 The TCMalloc Authors + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + tcmalloc/internal/memory_tag.cc [1:1] + tcmalloc/internal/memory_tag.h [2:2] + tcmalloc/internal/mismatched_delete_state.h [2:2] + tcmalloc/internal/percpu_early_test.cc [1:1] + tcmalloc/internal/percpu_test.cc [1:1] + tcmalloc/internal/profile_builder_no_tcmalloc_test.cc [1:1] + tcmalloc/selsan/BUILD [1:1] + tcmalloc/selsan/report_test.cc [1:1] + tcmalloc/selsan/selsan.cc [1:1] + tcmalloc/selsan/selsan.h [2:2] + tcmalloc/selsan/shadow_test.cc [1:1] + tcmalloc/testing/disable_numa_test.cc [1:1] + Belongs difference: + - tcmalloc/internal/ya.make diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report index 29c5c149ce10..2e8f17be06de 100644 --- a/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report +++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report @@ -38,184 +38,301 @@ BELONGS ya.make Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: + tcmalloc/allocation_sample.cc [3:13] + tcmalloc/allocation_sample.h [4:14] + tcmalloc/allocation_sample_test.cc [3:13] + tcmalloc/allocation_sampling.cc [3:13] + tcmalloc/allocation_sampling.h [4:14] tcmalloc/arena.cc [3:13] - tcmalloc/arena.h [3:13] + tcmalloc/arena.h [4:14] tcmalloc/arena_test.cc [3:13] tcmalloc/background.cc [3:13] tcmalloc/central_freelist.cc [3:13] - tcmalloc/central_freelist.h [3:13] + tcmalloc/central_freelist.h [4:14] tcmalloc/central_freelist_benchmark.cc [3:13] + tcmalloc/central_freelist_fuzz.cc [3:13] tcmalloc/central_freelist_test.cc [3:13] tcmalloc/common.cc [3:13] - tcmalloc/common.h [3:13] + tcmalloc/common.h [4:14] tcmalloc/cpu_cache.cc [3:13] - tcmalloc/cpu_cache.h [3:13] + tcmalloc/cpu_cache.h [4:14] + tcmalloc/cpu_cache_activate_test.cc [3:13] tcmalloc/cpu_cache_test.cc [3:13] + tcmalloc/deallocation_profiler.cc [3:13] + tcmalloc/deallocation_profiler.h [4:14] tcmalloc/experiment.cc [3:13] - tcmalloc/experiment.h [3:13] - tcmalloc/experiment_config.h [3:13] + tcmalloc/experiment.h [4:14] + tcmalloc/experiment_config.h [4:14] tcmalloc/experiment_config_test.cc [3:13] tcmalloc/experiment_fuzz.cc [3:13] - tcmalloc/experimental_56_size_class.cc [3:13] - tcmalloc/experimental_pow2_below64_size_class.cc [3:13] tcmalloc/experimental_pow2_size_class.cc [3:13] + tcmalloc/global_stats.cc [3:13] + tcmalloc/global_stats.h [4:14] + tcmalloc/guarded_allocations.h [4:14] tcmalloc/guarded_page_allocator.cc [3:13] - tcmalloc/guarded_page_allocator.h [3:13] + tcmalloc/guarded_page_allocator.h [4:14] tcmalloc/guarded_page_allocator_benchmark.cc [3:13] + tcmalloc/guarded_page_allocator_profile_test.cc [3:13] tcmalloc/guarded_page_allocator_test.cc [3:13] - tcmalloc/heap_profiling_test.cc [3:13] + tcmalloc/hinted_tracker_lists.h [4:14] tcmalloc/huge_address_map.cc [3:13] - tcmalloc/huge_address_map.h [3:13] + tcmalloc/huge_address_map.h [4:14] tcmalloc/huge_address_map_test.cc [3:13] tcmalloc/huge_allocator.cc [3:13] - tcmalloc/huge_allocator.h [3:13] + tcmalloc/huge_allocator.h [4:14] tcmalloc/huge_allocator_test.cc [3:13] tcmalloc/huge_cache.cc [3:13] - tcmalloc/huge_cache.h [3:13] + tcmalloc/huge_cache.h [4:14] tcmalloc/huge_cache_test.cc [3:13] tcmalloc/huge_page_aware_allocator.cc [3:13] - tcmalloc/huge_page_aware_allocator.h [3:13] + tcmalloc/huge_page_aware_allocator.h [4:14] + tcmalloc/huge_page_aware_allocator_fuzz.cc [3:13] tcmalloc/huge_page_aware_allocator_test.cc [3:13] - tcmalloc/huge_page_filler.h [3:13] + tcmalloc/huge_page_filler.h [4:14] + tcmalloc/huge_page_filler_fuzz.cc [3:13] tcmalloc/huge_page_filler_test.cc [3:13] - tcmalloc/huge_pages.h [3:13] - tcmalloc/huge_region.h [3:13] + tcmalloc/huge_page_subrelease.h [4:14] + tcmalloc/huge_page_subrelease_test.cc [3:13] + tcmalloc/huge_pages.h [4:14] + tcmalloc/huge_region.h [4:14] + tcmalloc/huge_region_fuzz.cc [3:13] tcmalloc/huge_region_test.cc [3:13] - tcmalloc/internal/atomic_danger.h [3:13] - tcmalloc/internal/atomic_stats_counter.h [3:13] - tcmalloc/internal/bits.h [3:13] - tcmalloc/internal/bits_test.cc [3:13] + tcmalloc/internal/affinity.cc [3:13] + tcmalloc/internal/affinity.h [4:14] + tcmalloc/internal/affinity_test.cc [3:13] + tcmalloc/internal/allocation_guard.cc [3:13] + tcmalloc/internal/allocation_guard.h [4:14] + tcmalloc/internal/allocation_guard_test.cc [3:13] + tcmalloc/internal/atomic_danger.h [4:14] + tcmalloc/internal/atomic_stats_counter.h [4:14] tcmalloc/internal/cache_topology.cc [3:13] - tcmalloc/internal/cache_topology.h [3:13] + tcmalloc/internal/cache_topology.h [4:14] tcmalloc/internal/cache_topology_test.cc [3:13] - tcmalloc/internal/clock.h [3:13] - tcmalloc/internal/config.h [3:13] - tcmalloc/internal/declarations.h [3:13] + tcmalloc/internal/clock.h [4:14] + tcmalloc/internal/config.h [4:14] + tcmalloc/internal/config_test.cc [3:13] + tcmalloc/internal/cpu_utils.h [4:14] + tcmalloc/internal/declarations.h [4:14] tcmalloc/internal/environment.cc [3:13] - tcmalloc/internal/environment.h [3:13] + tcmalloc/internal/environment.h [4:14] tcmalloc/internal/environment_test.cc [3:13] - tcmalloc/internal/lifetime_predictions.h [3:13] - tcmalloc/internal/lifetime_predictions_test.cc [3:13] - tcmalloc/internal/lifetime_tracker.h [3:13] - tcmalloc/internal/lifetime_tracker_test.cc [3:13] - tcmalloc/internal/linked_list.h [3:13] + tcmalloc/internal/explicitly_constructed.h [4:14] + tcmalloc/internal/exponential_biased.h [4:14] + tcmalloc/internal/exponential_biased_test.cc [3:13] + tcmalloc/internal/fake_profile.h [4:14] + tcmalloc/internal/linked_list.h [4:14] tcmalloc/internal/linked_list_benchmark.cc [3:13] tcmalloc/internal/linked_list_test.cc [3:13] - tcmalloc/internal/linux_syscall_support.h [3:13] + tcmalloc/internal/linux_syscall_support.h [4:14] tcmalloc/internal/logging.cc [3:13] - tcmalloc/internal/logging.h [3:13] + tcmalloc/internal/logging.h [4:14] tcmalloc/internal/logging_test.cc [3:13] tcmalloc/internal/logging_test_helper.cc [3:13] tcmalloc/internal/memory_stats.cc [3:13] - tcmalloc/internal/memory_stats.h [3:13] + tcmalloc/internal/memory_stats.h [4:14] tcmalloc/internal/memory_stats_test.cc [3:13] + tcmalloc/internal/memory_tag.cc [3:13] + tcmalloc/internal/memory_tag.h [4:14] tcmalloc/internal/mincore.cc [3:13] - tcmalloc/internal/mincore.h [3:13] + tcmalloc/internal/mincore.h [4:14] tcmalloc/internal/mincore_benchmark.cc [3:13] tcmalloc/internal/mincore_test.cc [3:13] - tcmalloc/internal/mock_span.h [3:13] + tcmalloc/internal/mismatched_delete_state.h [4:14] + tcmalloc/internal/mock_span.h [4:14] tcmalloc/internal/numa.cc [3:13] - tcmalloc/internal/numa.h [3:13] + tcmalloc/internal/numa.h [4:14] tcmalloc/internal/numa_test.cc [3:13] - tcmalloc/internal/optimization.h [3:13] - tcmalloc/internal/parameter_accessors.h [3:13] + tcmalloc/internal/optimization.h [4:14] + tcmalloc/internal/overflow.h [4:14] + tcmalloc/internal/page_size.cc [3:13] + tcmalloc/internal/page_size.h [4:14] + tcmalloc/internal/pageflags.cc [3:13] + tcmalloc/internal/pageflags.h [4:14] + tcmalloc/internal/pageflags_test.cc [3:13] + tcmalloc/internal/parameter_accessors.h [4:14] tcmalloc/internal/percpu.cc [3:13] - tcmalloc/internal/percpu.h [3:13] + tcmalloc/internal/percpu.h [4:14] + tcmalloc/internal/percpu_early_test.cc [3:13] tcmalloc/internal/percpu_rseq_asm.S [3:13] tcmalloc/internal/percpu_rseq_unsupported.cc [3:13] - tcmalloc/internal/percpu_tcmalloc.h [3:13] + tcmalloc/internal/percpu_tcmalloc.h [4:14] tcmalloc/internal/percpu_tcmalloc_test.cc [3:13] + tcmalloc/internal/percpu_test.cc [3:13] + tcmalloc/internal/prefetch.h [4:14] + tcmalloc/internal/prefetch_test.cc [3:13] tcmalloc/internal/proc_maps.cc [3:13] - tcmalloc/internal/proc_maps.h [3:13] - tcmalloc/internal/range_tracker.h [3:13] + tcmalloc/internal/proc_maps.h [4:14] + tcmalloc/internal/profile_builder.cc [3:13] + tcmalloc/internal/profile_builder.h [4:14] + tcmalloc/internal/profile_builder_fuzz.cc [3:13] + tcmalloc/internal/profile_builder_no_tcmalloc_test.cc [3:13] + tcmalloc/internal/profile_builder_test.cc [3:13] + tcmalloc/internal/range_tracker.h [4:14] tcmalloc/internal/range_tracker_benchmark.cc [3:13] tcmalloc/internal/range_tracker_test.cc [3:13] - tcmalloc/internal/timeseries_tracker.h [3:13] + tcmalloc/internal/residency.cc [3:13] + tcmalloc/internal/residency.h [4:14] + tcmalloc/internal/residency_test.cc [3:13] + tcmalloc/internal/sampled_allocation.h [4:14] + tcmalloc/internal/sampled_allocation_test.cc [3:13] + tcmalloc/internal/stacktrace_filter.h [4:14] + tcmalloc/internal/stacktrace_filter_test.cc [3:13] + tcmalloc/internal/sysinfo.cc [3:13] + tcmalloc/internal/sysinfo.h [4:14] + tcmalloc/internal/sysinfo_fuzz.cc [3:13] + tcmalloc/internal/sysinfo_test.cc [3:13] + tcmalloc/internal/timeseries_tracker.h [4:14] tcmalloc/internal/timeseries_tracker_test.cc [3:13] tcmalloc/internal/util.cc [3:13] - tcmalloc/internal/util.h [3:13] - tcmalloc/internal_malloc_extension.h [3:13] + tcmalloc/internal/util.h [4:14] + tcmalloc/internal_malloc_extension.h [4:14] + tcmalloc/internal_malloc_tracing_extension.h [4:14] tcmalloc/legacy_size_classes.cc [3:13] - tcmalloc/libc_override.h [3:13] - tcmalloc/libc_override_gcc_and_weak.h [3:13] - tcmalloc/libc_override_glibc.h [3:13] - tcmalloc/libc_override_redefine.h [3:13] + tcmalloc/libc_override.h [4:14] tcmalloc/malloc_extension.cc [3:13] - tcmalloc/malloc_extension.h [3:13] + tcmalloc/malloc_extension.h [4:14] tcmalloc/malloc_extension_fuzz.cc [3:13] - tcmalloc/malloc_extension_system_malloc_test.cc [3:13] - tcmalloc/malloc_extension_test.cc [3:13] + tcmalloc/malloc_tracing_extension.cc [3:13] + tcmalloc/malloc_tracing_extension.h [4:14] + tcmalloc/metadata_allocator.h [4:14] + tcmalloc/metadata_object_allocator.h [4:14] tcmalloc/mock_central_freelist.cc [3:13] - tcmalloc/mock_central_freelist.h [3:13] + tcmalloc/mock_central_freelist.h [4:14] + tcmalloc/mock_huge_page_static_forwarder.cc [3:13] + tcmalloc/mock_huge_page_static_forwarder.h [4:14] + tcmalloc/mock_metadata_allocator.h [4:14] + tcmalloc/mock_static_forwarder.h [4:14] tcmalloc/mock_transfer_cache.cc [3:13] - tcmalloc/mock_transfer_cache.h [3:13] - tcmalloc/noruntime_size_classes.cc [3:13] + tcmalloc/mock_transfer_cache.h [4:14] + tcmalloc/mock_virtual_allocator.h [4:14] + tcmalloc/new_extension.h [4:14] + tcmalloc/new_extension_test.cc [3:13] tcmalloc/page_allocator.cc [3:13] - tcmalloc/page_allocator.h [3:13] + tcmalloc/page_allocator.h [4:14] tcmalloc/page_allocator_interface.cc [3:13] - tcmalloc/page_allocator_interface.h [3:13] + tcmalloc/page_allocator_interface.h [4:14] tcmalloc/page_allocator_test.cc [3:13] - tcmalloc/page_allocator_test_util.h [3:13] - tcmalloc/page_heap.cc [3:13] - tcmalloc/page_heap.h [3:13] - tcmalloc/page_heap_allocator.h [3:13] - tcmalloc/page_heap_test.cc [3:13] + tcmalloc/page_allocator_test_util.h [4:14] tcmalloc/pagemap.cc [3:13] - tcmalloc/pagemap.h [3:13] + tcmalloc/pagemap.h [5:15] tcmalloc/pagemap_test.cc [3:13] - tcmalloc/pages.h [3:13] + tcmalloc/pages.h [4:14] + tcmalloc/pages_test.cc [3:13] tcmalloc/parameters.cc [3:13] - tcmalloc/parameters.h [3:13] + tcmalloc/parameters.h [4:14] tcmalloc/peak_heap_tracker.cc [3:13] - tcmalloc/peak_heap_tracker.h [3:13] + tcmalloc/peak_heap_tracker.h [4:14] + tcmalloc/profile_marshaler.cc [3:13] + tcmalloc/profile_marshaler.h [4:14] + tcmalloc/profile_marshaler_test.cc [3:13] tcmalloc/profile_test.cc [3:13] - tcmalloc/realloc_test.cc [3:13] - tcmalloc/runtime_size_classes.cc [3:13] - tcmalloc/runtime_size_classes.h [3:13] - tcmalloc/runtime_size_classes_fuzz.cc [3:13] - tcmalloc/runtime_size_classes_test.cc [3:13] + tcmalloc/reuse_size_classes.cc [3:13] tcmalloc/sampler.cc [3:13] - tcmalloc/sampler.h [3:13] - tcmalloc/size_class_info.h [3:13] + tcmalloc/sampler.h [4:14] + tcmalloc/segv_handler.cc [3:13] + tcmalloc/segv_handler.h [4:14] + tcmalloc/segv_handler_test.cc [3:13] + tcmalloc/selsan/report_test.cc [3:13] + tcmalloc/selsan/selsan.cc [3:13] + tcmalloc/selsan/selsan.h [4:14] + tcmalloc/selsan/shadow_test.cc [3:13] + tcmalloc/size_class_info.h [4:14] tcmalloc/size_classes.cc [3:13] tcmalloc/size_classes_test.cc [3:13] - tcmalloc/size_classes_with_runtime_size_classes_test.cc [3:13] + tcmalloc/sizemap.cc [3:13] + tcmalloc/sizemap.h [4:14] + tcmalloc/sizemap_fuzz.cc [3:13] + tcmalloc/sizemap_test.cc [3:13] tcmalloc/span.cc [3:13] - tcmalloc/span.h [3:13] + tcmalloc/span.h [4:14] tcmalloc/span_benchmark.cc [3:13] - tcmalloc/span_stats.h [3:13] + tcmalloc/span_fuzz.cc [3:13] + tcmalloc/span_stats.h [4:14] tcmalloc/span_test.cc [3:13] tcmalloc/stack_trace_table.cc [3:13] - tcmalloc/stack_trace_table.h [3:13] + tcmalloc/stack_trace_table.h [4:14] tcmalloc/stack_trace_table_test.cc [3:13] tcmalloc/static_vars.cc [3:13] - tcmalloc/static_vars.h [3:13] + tcmalloc/static_vars.h [4:14] tcmalloc/stats.cc [3:13] - tcmalloc/stats.h [3:13] + tcmalloc/stats.h [4:14] tcmalloc/stats_test.cc [3:13] tcmalloc/system-alloc.cc [3:13] - tcmalloc/system-alloc.h [3:13] - tcmalloc/system-alloc_test.cc [3:13] + tcmalloc/system-alloc.h [4:14] tcmalloc/tcmalloc.cc [3:13] - tcmalloc/tcmalloc.h [3:13] - tcmalloc/tcmalloc_large_test.cc [3:13] - tcmalloc/tcmalloc_policy.h [3:13] + tcmalloc/tcmalloc.h [4:14] + tcmalloc/tcmalloc_policy.h [4:14] + tcmalloc/testing/aligned_new_test.cc [3:13] + tcmalloc/testing/background_test.cc [3:13] + tcmalloc/testing/benchmark_main.cc [3:13] + tcmalloc/testing/current_allocated_bytes_test.cc [3:13] + tcmalloc/testing/deallocation_profiler_test.cc [3:13] + tcmalloc/testing/default_parameters_test.cc [3:13] + tcmalloc/testing/disable_numa_test.cc [3:13] + tcmalloc/testing/frag_test.cc [3:13] + tcmalloc/testing/get_stats_test.cc [3:13] + tcmalloc/testing/heap_profiling_test.cc [3:13] + tcmalloc/testing/hello_main.cc [3:13] + tcmalloc/testing/large_alloc_size_test.cc [3:13] + tcmalloc/testing/largesmall_frag_test.cc [3:13] + tcmalloc/testing/limit_test.cc [3:13] + tcmalloc/testing/malloc_extension_system_malloc_test.cc [3:13] + tcmalloc/testing/malloc_extension_test.cc [3:13] + tcmalloc/testing/malloc_tracing_extension_test.cc [3:13] + tcmalloc/testing/markidle_test.cc [3:13] + tcmalloc/testing/memalign_test.cc [3:13] + tcmalloc/testing/memory_errors_test.cc [3:13] + tcmalloc/testing/no_deps_test.cc [3:13] + tcmalloc/testing/numa_locality_test.cc [3:13] + tcmalloc/testing/outofmemory_test.cc [3:13] + tcmalloc/testing/parallel_test.cc [3:13] + tcmalloc/testing/peak_heap_profiling_test.cc [3:13] + tcmalloc/testing/profile_drop_frames_test.cc [3:13] + tcmalloc/testing/profile_test.cc [3:13] + tcmalloc/testing/realized_fragmentation_test.cc [3:13] + tcmalloc/testing/realloc_test.cc [3:13] + tcmalloc/testing/reclaim_test.cc [3:13] + tcmalloc/testing/releasing_test.cc [3:13] + tcmalloc/testing/sample_size_class_test.cc [3:13] + tcmalloc/testing/sampler_test.cc [3:13] + tcmalloc/testing/sampling_memusage_test.cc [3:13] + tcmalloc/testing/sampling_test.cc [3:13] + tcmalloc/testing/startup_size_test.cc [3:13] + tcmalloc/testing/system-alloc_test.cc [3:13] + tcmalloc/testing/tcmalloc_benchmark.cc [3:13] + tcmalloc/testing/tcmalloc_large_test.cc [3:13] + tcmalloc/testing/tcmalloc_test.cc [3:13] + tcmalloc/testing/test_allocator_harness.h [4:14] + tcmalloc/testing/testutil.cc [3:13] + tcmalloc/testing/testutil.h [4:14] + tcmalloc/testing/thread_ctor_test.cc [3:13] + tcmalloc/testing/thread_ctor_test_lib.cc [3:13] + tcmalloc/testing/thread_manager.h [4:14] + tcmalloc/testing/threadcachesize_test.cc [3:13] + tcmalloc/testing/variants_test.cc [3:13] + tcmalloc/testing/want_disable_huge_region_more_often_test_helper.cc [3:13] + tcmalloc/testing/want_disable_tcmalloc_big_span_test_helper.cc [3:13] + tcmalloc/testing/want_hpaa_test_helper.cc [3:13] + tcmalloc/testing/want_pow2below64_size_classes_helper.cc [3:13] tcmalloc/thread_cache.cc [3:13] - tcmalloc/thread_cache.h [3:13] + tcmalloc/thread_cache.h [4:14] tcmalloc/thread_cache_test.cc [3:13] - tcmalloc/tracking.h [3:13] tcmalloc/transfer_cache.cc [3:13] - tcmalloc/transfer_cache.h [3:13] + tcmalloc/transfer_cache.h [4:14] tcmalloc/transfer_cache_benchmark.cc [3:13] tcmalloc/transfer_cache_fuzz.cc [3:13] - tcmalloc/transfer_cache_internals.h [3:13] - tcmalloc/transfer_cache_stats.h [3:13] + tcmalloc/transfer_cache_internals.h [4:14] + tcmalloc/transfer_cache_stats.h [4:14] tcmalloc/transfer_cache_test.cc [3:13] + tcmalloc/want_disable_dynamic_slabs.cc [3:13] + tcmalloc/want_disable_huge_region_more_often.cc [3:13] + tcmalloc/want_disable_tcmalloc_big_span.cc [3:13] tcmalloc/want_hpaa.cc [3:13] - tcmalloc/want_hpaa_subrelease.cc [3:13] - tcmalloc/want_legacy_spans.cc [3:13] - tcmalloc/want_no_hpaa.cc [3:13] + tcmalloc/want_legacy_size_classes.cc [3:13] + tcmalloc/want_legacy_size_classes_test.cc [3:13] tcmalloc/want_numa_aware.cc [3:13] + Belongs difference: + - tcmalloc/internal/ya.make KEEP Apache-2.0 24be4e5673a9c71cdba851c53ed9677b BELONGS ya.make @@ -227,6 +344,37 @@ BELONGS ya.make Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: tcmalloc/internal/percpu_rseq_aarch64.S [4:14] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make + +KEEP Apache-2.0 2695f523f6550abd8506fe00ecd5fd73 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: Apache-2.0 + Score : 100.00 + Match type : NOTICE + Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 + Files with this license: + tcmalloc/internal/profile.proto [3:13] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make + +SKIP LicenseRef-scancode-unknown-license-reference 2b7ce4d6f90a7e895f2cf83f68079656 +BELONGS ya.make +FILE_INCLUDE LICENSE found in files: README.md at line 43 + License text: + The TCMalloc library is licensed under the terms of the Apache license. See + LICENSE for more information. + Scancode info: + Original SPDX id: LicenseRef-scancode-unknown-license-reference + Score : 100.00 + Match type : REFERENCE + Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE + Files with this license: + README.md [42:43] KEEP Apache-2.0 34ef0c6d1296bad9c0b8ea4447611e19 BELONGS ya.make @@ -237,21 +385,42 @@ BELONGS ya.make Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: + tcmalloc/.github/workflows/ci.yml [3:13] tcmalloc/BUILD [3:13] + tcmalloc/copts.bzl [3:13] + tcmalloc/internal/BUILD [3:13] + tcmalloc/selsan/BUILD [3:13] + tcmalloc/testing/BUILD [3:13] + tcmalloc/variants.bzl [3:13] + Belongs difference: + - tcmalloc/internal/ya.make + +KEEP Apache-2.0 3ea5060c4f08f5769674fbf0c0fb3992 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: Apache-2.0 + Score : 100.00 + Match type : NOTICE + Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 + Files with this license: + tcmalloc/internal/sampled_allocation_recorder.h [4:14] + tcmalloc/internal/sampled_allocation_recorder_test.cc [3:13] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make -KEEP Apache-2.0 566444825cbcc83578050639168bd08f +KEEP Apache-2.0 546d5c2ec03ff489fdf645b478946134 BELONGS ya.make -FILE_INCLUDE LICENSE found in files: README.md at line 42 License text: - The TCMalloc library is licensed under the terms of the Apache - license. See LICENSE for more information. + The TCMalloc library is licensed under the terms of the Apache license. See Scancode info: Original SPDX id: Apache-2.0 Score : 90.00 Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: - README.md [41:42] + README.md [42:42] SKIP LicenseRef-scancode-generic-cla 5d780ffa423067f23c6a123ae33e7c18 BELONGS ya.make @@ -274,8 +443,10 @@ BELONGS ya.make Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: - tcmalloc/internal/percpu_rseq_ppc.S [4:14] tcmalloc/internal/percpu_rseq_x86_64.S [4:14] + Belongs difference: + + ya.make + - tcmalloc/internal/ya.make SKIP LicenseRef-scancode-generic-cla 979d7de2e3ff119ee2c22c7efbec766d BELONGS ya.make @@ -301,31 +472,17 @@ BELONGS ya.make Files with this license: LICENSE [2:202] -SKIP LicenseRef-scancode-other-permissive cd348406a46a4c91e9edaa5be5e9c074 -BELONGS ya.make -FILE_INCLUDE LICENSE found in files: README.md at line 42 - # File LICENSES allready included - License text: - license. See LICENSE for more information. - Scancode info: - Original SPDX id: LicenseRef-scancode-unknown-license-reference - Score : 100.00 - Match type : REFERENCE - Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE - Files with this license: - README.md [42:42] - -KEEP Apache-2.0 e0f9a998414a9ae203fd34f4452d4dbc +SKIP MIT f360ff14698610d13d4c35791f051cde BELONGS ya.make -FILE_INCLUDE LICENSE found in files: README.md at line 42 +FILE_INCLUDE LICENSE found in files: README.md at line 43 License text: \## License - The TCMalloc library is licensed under the terms of the Apache - license. See LICENSE for more information. + The TCMalloc library is licensed under the terms of the Apache license. See + LICENSE for more information. Scancode info: Original SPDX id: MIT Score : 52.63 Match type : NOTICE Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - README.md [39:42] + README.md [40:43] diff --git a/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt index 7eb94bee02d9..9a38173d348c 100644 --- a/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt @@ -244,10 +244,31 @@ ====================Apache-2.0==================== -## License +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -The TCMalloc library is licensed under the terms of the Apache -license. See LICENSE for more information. + +====================Apache-2.0==================== +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. ====================Apache-2.0==================== @@ -265,222 +286,36 @@ license. See LICENSE for more information. ====================Apache-2.0==================== -The TCMalloc library is licensed under the terms of the Apache -license. See LICENSE for more information. +The TCMalloc library is licensed under the terms of the Apache license. See ====================COPYRIGHT==================== -// Copyright 2019 The TCMalloc Authors +// Copyright 2016 Google Inc. All Rights Reserved. ====================COPYRIGHT==================== -// Copyright 2020 The TCMalloc Authors +// Copyright 2018 The Abseil Authors. ====================COPYRIGHT==================== -// Copyright 2021 The TCMalloc Authors - - -====================File: LICENSE==================== - - Apache License - Version 2.0, January 2004 - https://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. +// Copyright 2019 The TCMalloc Authors - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. +====================COPYRIGHT==================== +// Copyright 2020 The TCMalloc Authors - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - END OF TERMS AND CONDITIONS +====================COPYRIGHT==================== +// Copyright 2021 The TCMalloc Authors - APPENDIX: How to apply the Apache License to your work. - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. +====================COPYRIGHT==================== +// Copyright 2022 The TCMalloc Authors - Copyright [yyyy] [name of copyright owner] - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +====================COPYRIGHT==================== +// Copyright 2023 The TCMalloc Authors - https://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +====================COPYRIGHT==================== +// Copyright 2024 The TCMalloc Authors diff --git a/contrib/libs/tcmalloc/README.md b/contrib/libs/tcmalloc/README.md index c848b4838d3f..46fc49c1fb9c 100644 --- a/contrib/libs/tcmalloc/README.md +++ b/contrib/libs/tcmalloc/README.md @@ -17,28 +17,29 @@ platform support for TCMalloc. All users of TCMalloc should consult the following documentation resources: -* The [TCMalloc Quickstart](docs/quickstart.md) covers downloading, installing, - building, and testing TCMalloc, including incorporating within your codebase. -* The [TCMalloc Overview](docs/overview.md) covers the basic architecture of - TCMalloc, and how that may affect configuration choices. -* The [TCMalloc Reference](docs/reference.md) covers the C and C++ TCMalloc API - endpoints. +* The [TCMalloc Quickstart](docs/quickstart.md) covers downloading, + installing, building, and testing TCMalloc, including incorporating within + your codebase. +* The [TCMalloc Overview](docs/overview.md) covers the basic architecture of + TCMalloc, and how that may affect configuration choices. +* The [TCMalloc Reference](docs/reference.md) covers the C and C++ TCMalloc + API endpoints. More advanced usages of TCMalloc may find the following documentation useful: -* The [TCMalloc Tuning Guide](docs/tuning.md) covers the configuration choices - in more depth, and also illustrates other ways to customize TCMalloc. This - also covers important operating system-level properties for improving TCMalloc - performance. -* The [TCMalloc Design Doc](docs/design.md) covers how TCMalloc works - underneath the hood, and why certain design choices were made. Most developers - will not need this level of implementation detail. -* The [TCMalloc Compatibility Guide](docs/compatibility.md) which documents our - expectations for how our APIs are used. +* The [TCMalloc Tuning Guide](docs/tuning.md) covers the configuration + choices in more depth, and also illustrates other ways to customize + TCMalloc. This also covers important operating system-level properties for + improving TCMalloc performance. +* The [TCMalloc Design Doc](docs/design.md) covers how TCMalloc works + underneath the hood, and why certain design choices were made. Most + developers will not need this level of implementation detail. +* The [TCMalloc Compatibility Guide](docs/compatibility.md) which documents + our expectations for how our APIs are used. ## License -The TCMalloc library is licensed under the terms of the Apache -license. See LICENSE for more information. +The TCMalloc library is licensed under the terms of the Apache license. See +LICENSE for more information. Disclaimer: This is not an officially supported Google product. diff --git a/contrib/libs/tcmalloc/common.inc b/contrib/libs/tcmalloc/common.inc index 077942c3871f..5acb463797d7 100644 --- a/contrib/libs/tcmalloc/common.inc +++ b/contrib/libs/tcmalloc/common.inc @@ -3,27 +3,33 @@ GLOBAL_SRCS( tcmalloc/tcmalloc.cc # Common Sources + tcmalloc/allocation_sample.cc + tcmalloc/allocation_sampling.cc tcmalloc/arena.cc tcmalloc/background.cc tcmalloc/central_freelist.cc tcmalloc/common.cc tcmalloc/cpu_cache.cc - tcmalloc/experimental_pow2_below64_size_class.cc + tcmalloc/deallocation_profiler.cc tcmalloc/experimental_pow2_size_class.cc - tcmalloc/legacy_size_classes.cc + tcmalloc/global_stats.cc tcmalloc/guarded_page_allocator.cc tcmalloc/huge_address_map.cc tcmalloc/huge_allocator.cc tcmalloc/huge_cache.cc tcmalloc/huge_page_aware_allocator.cc + tcmalloc/legacy_size_classes.cc tcmalloc/page_allocator.cc tcmalloc/page_allocator_interface.cc - tcmalloc/page_heap.cc tcmalloc/pagemap.cc tcmalloc/parameters.cc tcmalloc/peak_heap_tracker.cc + # tcmalloc/profile_marshaler.cc + tcmalloc/reuse_size_classes.cc tcmalloc/sampler.cc + tcmalloc/segv_handler.cc tcmalloc/size_classes.cc + tcmalloc/sizemap.cc tcmalloc/span.cc tcmalloc/stack_trace_table.cc tcmalloc/static_vars.cc @@ -34,21 +40,33 @@ GLOBAL_SRCS( # Common deps tcmalloc/experiment.cc - tcmalloc/noruntime_size_classes.cc # Internal libraries + tcmalloc/internal/affinity.cc + tcmalloc/internal/allocation_guard.cc tcmalloc/internal/cache_topology.cc tcmalloc/internal/environment.cc tcmalloc/internal/logging.cc tcmalloc/internal/memory_stats.cc + tcmalloc/internal/memory_tag.cc tcmalloc/internal/mincore.cc tcmalloc/internal/numa.cc + tcmalloc/internal/page_size.cc + tcmalloc/internal/pageflags.cc tcmalloc/internal/percpu.cc - tcmalloc/internal/percpu_rseq_asm.S tcmalloc/internal/percpu_rseq_unsupported.cc + tcmalloc/internal/percpu_rseq_asm.S + tcmalloc/internal/proc_maps.cc + # tcmalloc/internal/profile_builder.cc + tcmalloc/internal/residency.cc + tcmalloc/internal/sysinfo.cc tcmalloc/internal/util.cc ) +ADDINCL( + GLOBAL contrib/libs/tcmalloc +) + PEERDIR( contrib/restricted/abseil-cpp contrib/libs/tcmalloc/malloc_extension diff --git a/contrib/libs/tcmalloc/default/ya.make b/contrib/libs/tcmalloc/default/ya.make index 25a88b4bdeb2..1bc6ab6dacc1 100644 --- a/contrib/libs/tcmalloc/default/ya.make +++ b/contrib/libs/tcmalloc/default/ya.make @@ -5,15 +5,11 @@ WITHOUT_LICENSE_TEXTS() VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) LICENSE(Apache-2.0) + ALLOCATOR_IMPL() SRCDIR(contrib/libs/tcmalloc) INCLUDE(../common.inc) -GLOBAL_SRCS( - # Options - tcmalloc/want_hpaa_subrelease.cc -) - END() diff --git a/contrib/libs/tcmalloc/no_percpu_cache/ya.make b/contrib/libs/tcmalloc/no_percpu_cache/ya.make index a6b3ce700c4b..928d9092cf88 100644 --- a/contrib/libs/tcmalloc/no_percpu_cache/ya.make +++ b/contrib/libs/tcmalloc/no_percpu_cache/ya.make @@ -5,6 +5,7 @@ WITHOUT_LICENSE_TEXTS() VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) LICENSE(Apache-2.0) + ALLOCATOR_IMPL() SRCDIR(contrib/libs/tcmalloc) @@ -16,7 +17,9 @@ GLOBAL_SRCS( INCLUDE(../common.inc) -SRCS(aligned_alloc.c) +SRCS( + aligned_alloc.c +) CFLAGS( -DTCMALLOC_256K_PAGES diff --git a/contrib/libs/tcmalloc/numa_256k/ya.make b/contrib/libs/tcmalloc/numa_256k/ya.make index d6b3bb2df31e..5b43065230a0 100644 --- a/contrib/libs/tcmalloc/numa_256k/ya.make +++ b/contrib/libs/tcmalloc/numa_256k/ya.make @@ -5,6 +5,7 @@ WITHOUT_LICENSE_TEXTS() VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) LICENSE(Apache-2.0) + ALLOCATOR_IMPL() SRCDIR(contrib/libs/tcmalloc) @@ -13,7 +14,6 @@ INCLUDE(../common.inc) GLOBAL_SRCS( # Options - tcmalloc/want_hpaa_subrelease.cc tcmalloc/want_numa_aware.cc ) diff --git a/contrib/libs/tcmalloc/numa_large_pages/ya.make b/contrib/libs/tcmalloc/numa_large_pages/ya.make index 8cb48bd42319..0a94465472dd 100644 --- a/contrib/libs/tcmalloc/numa_large_pages/ya.make +++ b/contrib/libs/tcmalloc/numa_large_pages/ya.make @@ -5,6 +5,7 @@ WITHOUT_LICENSE_TEXTS() VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) LICENSE(Apache-2.0) + ALLOCATOR_IMPL() SRCDIR(contrib/libs/tcmalloc) @@ -13,7 +14,6 @@ INCLUDE(../common.inc) GLOBAL_SRCS( # Options - tcmalloc/want_hpaa_subrelease.cc tcmalloc/want_numa_aware.cc ) diff --git a/contrib/libs/tcmalloc/patches/010-fork.patch b/contrib/libs/tcmalloc/patches/010-fork.patch new file mode 100644 index 000000000000..d23b4de3d2b7 --- /dev/null +++ b/contrib/libs/tcmalloc/patches/010-fork.patch @@ -0,0 +1,400 @@ +diff --git a/tcmalloc/central_freelist.h b/tcmalloc/central_freelist.h +index 9fdcd83..90181e5 100644 +--- a/tcmalloc/central_freelist.h ++++ b/tcmalloc/central_freelist.h +@@ -131,6 +131,14 @@ class CentralFreeList { + size_t NumSpansInList(int n) ABSL_LOCKS_EXCLUDED(lock_); + SpanStats GetSpanStats() const; + ++ void AcquireInternalLocks() { ++ lock_.Lock(); ++ } ++ ++ void ReleaseInternalLocks() { ++ lock_.Unlock(); ++ } ++ + // Reports span utilization and lifetime histogram stats. + void PrintSpanUtilStats(Printer& out); + void PrintSpanLifetimeStats(Printer& out); +diff --git a/tcmalloc/cpu_cache.h b/tcmalloc/cpu_cache.h +index 164e06f..b9a6bd6 100644 +--- a/tcmalloc/cpu_cache.h ++++ b/tcmalloc/cpu_cache.h +@@ -487,6 +487,9 @@ class CpuCache { + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& region) const; + ++ void AcquireInternalLocks(); ++ void ReleaseInternalLocks(); ++ + const Forwarder& forwarder() const { return forwarder_; } + + Forwarder& forwarder() { return forwarder_; } +@@ -2635,6 +2638,22 @@ inline void CpuCache::PrintInPbtxt(PbtxtRegion& region) const { + dynamic_slab_info_.madvise_failed_bytes.load(std::memory_order_relaxed)); + } + ++template ++inline void CpuCache::AcquireInternalLocks() { ++ int ncpus = absl::base_internal::NumCPUs(); ++ for (int cpu = 0; cpu < ncpus; ++cpu) { ++ resize_[cpu].lock.Lock(); ++ } ++} ++ ++template ++inline void CpuCache::ReleaseInternalLocks() { ++ int ncpus = absl::base_internal::NumCPUs(); ++ for (int cpu = 0; cpu < ncpus; ++cpu) { ++ resize_[cpu].lock.Unlock(); ++ } ++} ++ + template + inline void CpuCache::PerClassResizeInfo::Init() { + state_.store(0, std::memory_order_relaxed); +diff --git a/tcmalloc/guarded_page_allocator.cc b/tcmalloc/guarded_page_allocator.cc +index 8acfdc4..9e2a54a 100644 +--- a/tcmalloc/guarded_page_allocator.cc ++++ b/tcmalloc/guarded_page_allocator.cc +@@ -92,6 +92,14 @@ void GuardedPageAllocator::Reset() { + stacktrace_filter_.DecayAll(); + } + ++void GuardedPageAllocator::AcquireInternalLocks() { ++ guarded_page_lock_.Lock(); ++} ++ ++void GuardedPageAllocator::ReleaseInternalLocks() { ++ guarded_page_lock_.Unlock(); ++} ++ + GuardedAllocWithStatus GuardedPageAllocator::TrySample( + size_t size, size_t alignment, Length num_pages, + const StackTrace& stack_trace) { +diff --git a/tcmalloc/guarded_page_allocator.h b/tcmalloc/guarded_page_allocator.h +index 4330ab7..8bd5c9a 100644 +--- a/tcmalloc/guarded_page_allocator.h ++++ b/tcmalloc/guarded_page_allocator.h +@@ -114,6 +114,10 @@ class GuardedPageAllocator { + // and avoiding use-after-destruction issues for static/global instances. + void Destroy(); + ++ void AcquireInternalLocks() ABSL_LOCKS_EXCLUDED(guarded_page_lock_); ++ void ReleaseInternalLocks() ABSL_LOCKS_EXCLUDED(guarded_page_lock_); ++ ++ + // If this allocation can be guarded, and if it's time to do a guarded sample, + // returns an instance of GuardedAllocWithStatus, that includes guarded + // allocation Span and guarded status. Otherwise, returns nullptr and the +diff --git a/tcmalloc/internal/sampled_allocation_recorder.h b/tcmalloc/internal/sampled_allocation_recorder.h +index a3ef3cc..8e1ec85 100644 +--- a/tcmalloc/internal/sampled_allocation_recorder.h ++++ b/tcmalloc/internal/sampled_allocation_recorder.h +@@ -92,6 +92,9 @@ class SampleRecorder { + // Iterates over all the registered samples. + void Iterate(const absl::FunctionRef& f); + ++ void AcquireInternalLocks(); ++ void ReleaseInternalLocks(); ++ + private: + void PushNew(T* sample); + void PushDead(T* sample); +@@ -240,7 +243,17 @@ void SampleRecorder::Iterate( + } + } + +-} // namespace tcmalloc_internal ++template ++void SampleRecorder::AcquireInternalLocks() { ++ graveyard_.lock.Lock(); ++} ++ ++template ++void SampleRecorder::ReleaseInternalLocks() { ++ graveyard_.lock.Unlock(); ++} ++ ++} // namespace tcmalloc_internal + } // namespace tcmalloc + GOOGLE_MALLOC_SECTION_END + +diff --git a/tcmalloc/internal_malloc_extension.h b/tcmalloc/internal_malloc_extension.h +index 2f8b329..190d742 100644 +--- a/tcmalloc/internal_malloc_extension.h ++++ b/tcmalloc/internal_malloc_extension.h +@@ -154,6 +154,9 @@ ABSL_ATTRIBUTE_WEAK int64_t + MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); + ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( + int64_t value); ++ ++ABSL_ATTRIBUTE_WEAK void ++MallocExtension_EnableForkSupport(); + } + + #endif +diff --git a/tcmalloc/malloc_extension.cc b/tcmalloc/malloc_extension.cc +index 1475faa..cee8ba3 100644 +--- a/tcmalloc/malloc_extension.cc ++++ b/tcmalloc/malloc_extension.cc +@@ -796,6 +796,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { + #endif + } + ++void MallocExtension::EnableForkSupport() { ++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS ++ if (&MallocExtension_EnableForkSupport != nullptr) { ++ MallocExtension_EnableForkSupport(); ++ } ++#endif ++} ++ + } // namespace tcmalloc + + // Default implementation just returns size. The expectation is that +diff --git a/tcmalloc/malloc_extension.h b/tcmalloc/malloc_extension.h +index 403520e..36fd433 100644 +--- a/tcmalloc/malloc_extension.h ++++ b/tcmalloc/malloc_extension.h +@@ -660,6 +660,10 @@ class MallocExtension final { + // Specifies the release rate from the page heap. ProcessBackgroundActions + // must be called for this to be operative. + static void SetBackgroundReleaseRate(BytesPerSecond rate); ++ ++ // Enables fork support. ++ // Allocator will continue to function correctly in the child, after calling fork(). ++ static void EnableForkSupport(); + }; + + } // namespace tcmalloc +diff --git a/tcmalloc/static_vars.cc b/tcmalloc/static_vars.cc +index aaacbbb..464b49a 100644 +--- a/tcmalloc/static_vars.cc ++++ b/tcmalloc/static_vars.cc +@@ -123,6 +123,7 @@ ABSL_CONST_INIT MetadataObjectAllocator + Static::linked_sample_allocator_{arena_}; + ABSL_CONST_INIT std::atomic Static::inited_{false}; + ABSL_CONST_INIT std::atomic Static::cpu_cache_active_{false}; ++ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; + ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; + ABSL_CONST_INIT PageMap Static::pagemap_; + ABSL_CONST_INIT GuardedPageAllocator Static::guardedpage_allocator_; +@@ -235,6 +236,14 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { + guardedpage_allocator_.Init(/*max_allocated_pages=*/64, + /*total_pages=*/128); + inited_.store(true, std::memory_order_release); ++ ++ // TODO: this is called with inited_ = true, so it looks like a race condition ++ pageheap_lock.Unlock(); ++ pthread_atfork( ++ TCMallocPreFork, ++ TCMallocPostFork, ++ TCMallocPostFork); ++ pageheap_lock.Lock(); + } + } + +diff --git a/tcmalloc/static_vars.h b/tcmalloc/static_vars.h +index af1c14b..58f706c 100644 +--- a/tcmalloc/static_vars.h ++++ b/tcmalloc/static_vars.h +@@ -72,6 +72,9 @@ enum class SizeClassConfiguration { + + bool tcmalloc_big_span(); + ++void TCMallocPreFork(); ++void TCMallocPostFork(); ++ + class Static final { + public: + constexpr Static() = default; +@@ -172,6 +175,13 @@ class Static final { + cpu_cache_active_.store(true, std::memory_order_release); + } + ++ static bool ForkSupportEnabled() { ++ return fork_support_enabled_; ++ } ++ static void EnableForkSupport() { ++ fork_support_enabled_ = true; ++ } ++ + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE HaveHooks() { + return false; + } +@@ -215,6 +225,7 @@ class Static final { + linked_sample_allocator_; + ABSL_CONST_INIT static std::atomic inited_; + ABSL_CONST_INIT static std::atomic cpu_cache_active_; ++ ABSL_CONST_INIT static bool fork_support_enabled_; + ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; + ABSL_CONST_INIT static NumaTopology + numa_topology_; +diff --git a/tcmalloc/system-alloc.h b/tcmalloc/system-alloc.h +index 84280cf..06392e2 100644 +--- a/tcmalloc/system-alloc.h ++++ b/tcmalloc/system-alloc.h +@@ -142,6 +142,13 @@ class SystemAllocator { + [[nodiscard]] void* MmapAligned(size_t size, size_t alignment, MemoryTag tag) + ABSL_LOCKS_EXCLUDED(spinlock_); + ++ void AcquireInternalLocks() { ++ spinlock_.Lock(); ++ } ++ void ReleaseInternalLocks() { ++ spinlock_.Unlock(); ++ } ++ + private: + const Topology& topology_; + +diff --git a/tcmalloc/tcmalloc.cc b/tcmalloc/tcmalloc.cc +index 062d257..846ab86 100644 +--- a/tcmalloc/tcmalloc.cc ++++ b/tcmalloc/tcmalloc.cc +@@ -117,6 +117,7 @@ + #include "tcmalloc/tcmalloc_policy.h" + #include "tcmalloc/thread_cache.h" + #include "tcmalloc/transfer_cache.h" ++#include "thread_cache.h" + + #if defined(TCMALLOC_HAVE_STRUCT_MALLINFO) || \ + defined(TCMALLOC_HAVE_STRUCT_MALLINFO2) +@@ -338,6 +339,44 @@ extern "C" size_t MallocExtension_Internal_ReleaseMemoryToSystem( + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); + } + ++extern "C" void MallocExtension_EnableForkSupport() { ++ Static::EnableForkSupport(); ++} ++ ++void TCMallocPreFork() { ++ if (!Static::ForkSupportEnabled()) { ++ return; ++ } ++ ++ if (Static::CpuCacheActive()) { ++ Static::cpu_cache().AcquireInternalLocks(); ++ } ++ Static::transfer_cache().AcquireInternalLocks(); ++ Static::guardedpage_allocator().AcquireInternalLocks(); ++ release_lock.Lock(); ++ pageheap_lock.Lock(); ++ Static::system_allocator().AcquireInternalLocks(); ++ ThreadCache::AcquireInternalLocks(); ++ Static::sampled_allocation_recorder().AcquireInternalLocks(); ++} ++ ++void TCMallocPostFork() { ++ if (!Static::ForkSupportEnabled()) { ++ return; ++ } ++ Static::system_allocator().ReleaseInternalLocks(); ++ pageheap_lock.Unlock(); ++ Static::guardedpage_allocator().ReleaseInternalLocks(); ++ release_lock.Unlock(); ++ Static::transfer_cache().ReleaseInternalLocks(); ++ if (Static::CpuCacheActive()) { ++ Static::cpu_cache().ReleaseInternalLocks(); ++ } ++ ThreadCache::ReleaseInternalLocks(); ++ Static::sampled_allocation_recorder().ReleaseInternalLocks(); ++} ++ ++ + // nallocx slow path. + // Moved to a separate function because size_class_with_alignment is not inlined + // which would cause nallocx to become non-leaf function with stack frame and +diff --git a/tcmalloc/tcmalloc.h b/tcmalloc/tcmalloc.h +index b908aff..c877168 100644 +--- a/tcmalloc/tcmalloc.h ++++ b/tcmalloc/tcmalloc.h +@@ -158,6 +158,9 @@ ABSL_ATTRIBUTE_UNUSED void TCMallocInternalDeleteArrayAlignedNothrow( + ABSL_ATTRIBUTE_SECTION(google_malloc); + #endif + ++void TCMallocInternalAcquireLocks(); ++void TCMallocInternalReleaseLocks(); ++ + } // extern "C" + + #endif // TCMALLOC_TCMALLOC_H_ +diff --git a/tcmalloc/thread_cache.cc b/tcmalloc/thread_cache.cc +index 0a6f038..ff50665 100644 +--- a/tcmalloc/thread_cache.cc ++++ b/tcmalloc/thread_cache.cc +@@ -424,6 +424,14 @@ void ThreadCache::set_overall_thread_cache_size(size_t new_size) { + RecomputePerThreadCacheSize(); + } + ++void ThreadCache::AcquireInternalLocks() { ++ threadcache_lock_.Lock(); ++} ++ ++void ThreadCache::ReleaseInternalLocks() { ++ threadcache_lock_.Unlock(); ++} ++ + } // namespace tcmalloc_internal + } // namespace tcmalloc + GOOGLE_MALLOC_SECTION_END +diff --git a/tcmalloc/thread_cache.h b/tcmalloc/thread_cache.h +index 2b94ac7..8ae5e76 100644 +--- a/tcmalloc/thread_cache.h ++++ b/tcmalloc/thread_cache.h +@@ -70,6 +70,9 @@ class ABSL_CACHELINE_ALIGNED ThreadCache { + return overall_thread_cache_size_.load(std::memory_order_relaxed); + } + ++ static void AcquireInternalLocks(); ++ static void ReleaseInternalLocks(); ++ + private: + // We inherit rather than include the list as a data structure to reduce + // compiler padding. Without inheritance, the compiler pads the list +diff --git a/tcmalloc/transfer_cache.h b/tcmalloc/transfer_cache.h +index b2b29d5..5af6dc8 100644 +--- a/tcmalloc/transfer_cache.h ++++ b/tcmalloc/transfer_cache.h +@@ -415,6 +415,18 @@ class TransferCacheManager : public StaticForwarder { + + void Init() { InitCaches(); } + ++ void AcquireInternalLocks() { ++ for (int i = 0; i < kNumClasses; ++i) { ++ cache_[i].tc.AcquireInternalLocks(); ++ } ++ } ++ ++ void ReleaseInternalLocks() { ++ for (int i = 0; i < kNumClasses; ++i) { ++ cache_[i].tc.ReleaseInternalLocks(); ++ } ++ } ++ + void InsertRange(int size_class, absl::Span batch) { + cache_[size_class].tc.InsertRange(size_class, batch); + } +diff --git a/tcmalloc/transfer_cache_internals.h b/tcmalloc/transfer_cache_internals.h +index 2a3bd4c..d8f1031 100644 +--- a/tcmalloc/transfer_cache_internals.h ++++ b/tcmalloc/transfer_cache_internals.h +@@ -205,6 +205,16 @@ class TransferCache { + return freelist().RemoveRange(batch); + } + ++ void AcquireInternalLocks() { ++ lock_.Lock(); ++ freelist().AcquireInternalLocks(); ++ } ++ ++ void ReleaseInternalLocks() { ++ lock_.Unlock(); ++ freelist().ReleaseInternalLocks(); ++ } ++ + // We record the lowest value of info.used in a low water mark since the last + // call to TryPlunder. We plunder all those objects to the freelist, as the + // objects not used within a full cycle are unlikely to be used again. diff --git a/contrib/libs/tcmalloc/patches/020-user-data.patch b/contrib/libs/tcmalloc/patches/020-user-data.patch new file mode 100644 index 000000000000..811d4cacd2ab --- /dev/null +++ b/contrib/libs/tcmalloc/patches/020-user-data.patch @@ -0,0 +1,269 @@ +diff --git a/tcmalloc/allocation_sampling.h b/tcmalloc/allocation_sampling.h +index 2af67c8..023263a 100644 +--- a/tcmalloc/allocation_sampling.h ++++ b/tcmalloc/allocation_sampling.h +@@ -193,6 +193,7 @@ SampleifyAllocation(Static& state, Policy policy, size_t requested_size, + stack_trace.allocation_time = absl::Now(); + stack_trace.guarded_status = alloc_with_status.status; + stack_trace.allocation_type = policy.allocation_type(); ++ stack_trace.user_data = SampleUserDataSupport::UserData::Make(); + + // How many allocations does this sample represent, given the sampling + // frequency (weight) and its size. +diff --git a/tcmalloc/internal/logging.cc b/tcmalloc/internal/logging.cc +index 1cd8d18..a53c26a 100644 +--- a/tcmalloc/internal/logging.cc ++++ b/tcmalloc/internal/logging.cc +@@ -43,6 +43,10 @@ GOOGLE_MALLOC_SECTION_BEGIN + namespace tcmalloc { + namespace tcmalloc_internal { + ++ABSL_CONST_INIT SampleUserDataSupport::CreateSampleUserDataCallback* SampleUserDataSupport::create_sample_user_data_callback_ = nullptr; ++ABSL_CONST_INIT SampleUserDataSupport::CopySampleUserDataCallback* SampleUserDataSupport::copy_sample_user_data_callback_ = nullptr; ++ABSL_CONST_INIT SampleUserDataSupport::DestroySampleUserDataCallback* SampleUserDataSupport::destroy_sample_user_data_callback_ = nullptr; ++ + // Variables for storing crash output. Allocated statically since we + // may not be able to heap-allocate while crashing. + ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock( +diff --git a/tcmalloc/internal/logging.h b/tcmalloc/internal/logging.h +index 2a5c761..f2ecc1d 100644 +--- a/tcmalloc/internal/logging.h ++++ b/tcmalloc/internal/logging.h +@@ -51,6 +51,87 @@ GOOGLE_MALLOC_SECTION_BEGIN + namespace tcmalloc { + namespace tcmalloc_internal { + ++class SampleUserDataSupport { ++public: ++ using CreateSampleUserDataCallback = void*(); ++ using CopySampleUserDataCallback = void*(void*); ++ using DestroySampleUserDataCallback = void(void*); ++ ++ class UserData { ++ public: ++ static UserData Make() { ++ return UserData{CreateSampleUserData()}; ++ } ++ ++ constexpr UserData() noexcept : ptr_(nullptr) {} ++ ++ UserData(const UserData& that) noexcept : ptr_(CopySampleUserData(that.ptr_)) {} ++ UserData& operator=(const UserData& that) noexcept { ++ DestroySampleUserData(ptr_); ++ ptr_ = CopySampleUserData(that.ptr_); ++ return *this; ++ } ++ ++ UserData(UserData&& that) noexcept : ptr_(that.ptr_) { ++ that.ptr_ = nullptr; ++ } ++ UserData& operator=(UserData&& that) noexcept { ++ if (this == &that) { ++ return *this; ++ } ++ DestroySampleUserData(ptr_); ++ ptr_ = that.ptr_; ++ that.ptr_ = nullptr; ++ return *this; ++ } ++ void Reset() { ++ DestroySampleUserData(ptr_); ++ ptr_ = nullptr; ++ } ++ ++ ~UserData() { ++ DestroySampleUserData(ptr_); ++ } ++ ++ void* Get() const { return ptr_; } ++ private: ++ UserData(void* ptr) noexcept : ptr_(ptr) {} ++ private: ++ void* ptr_; ++ }; ++ ++ static void Enable(CreateSampleUserDataCallback create, ++ CopySampleUserDataCallback copy, ++ DestroySampleUserDataCallback destroy) { ++ create_sample_user_data_callback_ = create; ++ copy_sample_user_data_callback_ = copy; ++ destroy_sample_user_data_callback_ = destroy; ++ } ++private: ++ static void* CreateSampleUserData() { ++ if (create_sample_user_data_callback_ != nullptr) { ++ return create_sample_user_data_callback_(); ++ } ++ return nullptr; ++ } ++ ++ static void* CopySampleUserData(void* ptr) noexcept { ++ if (copy_sample_user_data_callback_ != nullptr) { ++ return copy_sample_user_data_callback_(ptr); ++ } ++ return nullptr; ++ } ++ ++ static void DestroySampleUserData(void* ptr) noexcept { ++ if (destroy_sample_user_data_callback_ != nullptr) { ++ destroy_sample_user_data_callback_(ptr); ++ } ++ } ++ ABSL_CONST_INIT static CreateSampleUserDataCallback* create_sample_user_data_callback_; ++ ABSL_CONST_INIT static CopySampleUserDataCallback* copy_sample_user_data_callback_; ++ ABSL_CONST_INIT static DestroySampleUserDataCallback* destroy_sample_user_data_callback_; ++}; ++ + static constexpr int kMaxStackDepth = 64; + + // An opaque handle type used to identify allocations. +@@ -84,6 +165,8 @@ struct StackTrace { + // between the previous sample and this one + size_t weight; + ++ SampleUserDataSupport::UserData user_data; ++ + // Timestamp of allocation. + absl::Time allocation_time; + +diff --git a/tcmalloc/internal/sampled_allocation_recorder.h b/tcmalloc/internal/sampled_allocation_recorder.h +index 8e1ec85..7f9818f 100644 +--- a/tcmalloc/internal/sampled_allocation_recorder.h ++++ b/tcmalloc/internal/sampled_allocation_recorder.h +@@ -169,6 +169,7 @@ void SampleRecorder::PushDead(T* sample) { + if (auto* dispose = dispose_.load(std::memory_order_relaxed)) { + dispose(*sample); + } ++ sample->sampled_stack.user_data.Reset(); + + AllocationGuardSpinLockHolder graveyard_lock(&graveyard_.lock); + AllocationGuardSpinLockHolder sample_lock(&sample->lock); +diff --git a/tcmalloc/internal_malloc_extension.h b/tcmalloc/internal_malloc_extension.h +index 190d742..dc0c0e0 100644 +--- a/tcmalloc/internal_malloc_extension.h ++++ b/tcmalloc/internal_malloc_extension.h +@@ -157,6 +157,12 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( + + ABSL_ATTRIBUTE_WEAK void + MallocExtension_EnableForkSupport(); ++ ++ABSL_ATTRIBUTE_WEAK void ++MallocExtension_SetSampleUserDataCallbacks( ++ tcmalloc::MallocExtension::CreateSampleUserDataCallback create, ++ tcmalloc::MallocExtension::CopySampleUserDataCallback copy, ++ tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy); + } + + #endif +diff --git a/tcmalloc/malloc_extension.cc b/tcmalloc/malloc_extension.cc +index cee8ba3..b7ca15a 100644 +--- a/tcmalloc/malloc_extension.cc ++++ b/tcmalloc/malloc_extension.cc +@@ -804,6 +804,21 @@ void MallocExtension::EnableForkSupport() { + #endif + } + ++void MallocExtension::SetSampleUserDataCallbacks( ++ CreateSampleUserDataCallback create, ++ CopySampleUserDataCallback copy, ++ DestroySampleUserDataCallback destroy) { ++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS ++ if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) { ++ MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy); ++ } ++#else ++ (void)create; ++ (void)copy; ++ (void)destroy; ++#endif ++} ++ + } // namespace tcmalloc + + // Default implementation just returns size. The expectation is that +diff --git a/tcmalloc/malloc_extension.h b/tcmalloc/malloc_extension.h +index 36fd433..702baa8 100644 +--- a/tcmalloc/malloc_extension.h ++++ b/tcmalloc/malloc_extension.h +@@ -214,6 +214,8 @@ class Profile final { + int depth; + void* stack[kMaxStackDepth]; + ++ void* user_data; ++ + // The following vars are used by the lifetime (deallocation) profiler. + uint64_t profile_id; + +@@ -664,6 +666,16 @@ class MallocExtension final { + // Enables fork support. + // Allocator will continue to function correctly in the child, after calling fork(). + static void EnableForkSupport(); ++ ++ using CreateSampleUserDataCallback = void*(); ++ using CopySampleUserDataCallback = void*(void*); ++ using DestroySampleUserDataCallback = void(void*); ++ ++ // Sets callbacks for lifetime control of custom user data attached to allocation samples ++ static void SetSampleUserDataCallbacks( ++ CreateSampleUserDataCallback create, ++ CopySampleUserDataCallback copy, ++ DestroySampleUserDataCallback destroy); + }; + + } // namespace tcmalloc +diff --git a/tcmalloc/stack_trace_table.cc b/tcmalloc/stack_trace_table.cc +index cf57148..2de1a25 100644 +--- a/tcmalloc/stack_trace_table.cc ++++ b/tcmalloc/stack_trace_table.cc +@@ -88,6 +88,7 @@ void StackTraceTable::AddTrace(double sample_weight, const StackTrace& t) { + s->sample.span_start_address = t.span_start_address; + s->sample.guarded_status = t.guarded_status; + s->sample.type = t.allocation_type; ++ s->sample.user_data = t.user_data.Get(); + + static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth, + "Profile stack size smaller than internal stack sizes"); +diff --git a/tcmalloc/static_vars.h b/tcmalloc/static_vars.h +index 58f706c..010ceed 100644 +--- a/tcmalloc/static_vars.h ++++ b/tcmalloc/static_vars.h +@@ -26,6 +26,7 @@ + #include "absl/base/attributes.h" + #include "absl/base/optimization.h" + #include "absl/base/thread_annotations.h" ++#include "internal/logging.h" + #include "tcmalloc/allocation_sample.h" + #include "tcmalloc/arena.h" + #include "tcmalloc/central_freelist.h" +@@ -182,6 +183,14 @@ class Static final { + fork_support_enabled_ = true; + } + ++ ++ static void SetSampleUserDataCallbacks( ++ SampleUserDataSupport::CreateSampleUserDataCallback create, ++ SampleUserDataSupport::CopySampleUserDataCallback copy, ++ SampleUserDataSupport::DestroySampleUserDataCallback destroy) { ++ SampleUserDataSupport::Enable(create, copy, destroy); ++ } ++ + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE HaveHooks() { + return false; + } +diff --git a/tcmalloc/tcmalloc.cc b/tcmalloc/tcmalloc.cc +index 846ab86..d4d4169 100644 +--- a/tcmalloc/tcmalloc.cc ++++ b/tcmalloc/tcmalloc.cc +@@ -376,6 +376,12 @@ void TCMallocPostFork() { + Static::sampled_allocation_recorder().ReleaseInternalLocks(); + } + ++extern "C" void MallocExtension_SetSampleUserDataCallbacks( ++ MallocExtension::CreateSampleUserDataCallback create, ++ MallocExtension::CopySampleUserDataCallback copy, ++ MallocExtension::DestroySampleUserDataCallback destroy) { ++ Static::SetSampleUserDataCallbacks(create, copy, destroy); ++} + + // nallocx slow path. + // Moved to a separate function because size_class_with_alignment is not inlined diff --git a/contrib/libs/tcmalloc/patches/handler.patch b/contrib/libs/tcmalloc/patches/030-soft-limit-handler.patch similarity index 58% rename from contrib/libs/tcmalloc/patches/handler.patch rename to contrib/libs/tcmalloc/patches/030-soft-limit-handler.patch index ce30d7e1c2f8..77c54cbff8a3 100644 --- a/contrib/libs/tcmalloc/patches/handler.patch +++ b/contrib/libs/tcmalloc/patches/030-soft-limit-handler.patch @@ -1,6 +1,8 @@ ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) -@@ -468,6 +468,20 @@ void MallocExtension::EnableForkSupport() { +diff --git a/tcmalloc/malloc_extension.cc b/tcmalloc/malloc_extension.cc +index b7ca15a..dc4aeb5 100644 +--- a/tcmalloc/malloc_extension.cc ++++ b/tcmalloc/malloc_extension.cc +@@ -804,6 +804,20 @@ void MallocExtension::EnableForkSupport() { #endif } @@ -21,9 +23,11 @@ void MallocExtension::SetSampleUserDataCallbacks( CreateSampleUserDataCallback create, CopySampleUserDataCallback copy, ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) -@@ -475,6 +475,10 @@ class MallocExtension final { +diff --git a/tcmalloc/malloc_extension.h b/tcmalloc/malloc_extension.h +index 702baa8..0505971 100644 +--- a/tcmalloc/malloc_extension.h ++++ b/tcmalloc/malloc_extension.h +@@ -667,6 +667,10 @@ class MallocExtension final { // Allocator will continue to function correctly in the child, after calling fork(). static void EnableForkSupport(); @@ -34,16 +38,18 @@ using CreateSampleUserDataCallback = void*(); using CopySampleUserDataCallback = void*(void*); using DestroySampleUserDataCallback = void(void*); ---- contrib/libs/tcmalloc/tcmalloc/page_allocator.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/page_allocator.cc (working tree) -@@ -151,6 +151,10 @@ void PageAllocator::ShrinkToUsageLimit() { +diff --git a/tcmalloc/page_allocator.cc b/tcmalloc/page_allocator.cc +index 2f450d8..f723297 100644 +--- a/tcmalloc/page_allocator.cc ++++ b/tcmalloc/page_allocator.cc +@@ -138,6 +138,10 @@ void PageAllocator::ShrinkToUsageLimit(Length n) { warned = true; - Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ", - limit_, "and OOM is likely to follow."); + TC_LOG("Couldn't respect usage limit of %v and OOM is likely to follow.", + limits_[kSoft]); + + if (auto* handler = MallocExtension::GetSoftMemoryLimitHandler()) { + (*handler)(); + } } - bool PageAllocator::ShrinkHardBy(Length pages) { + bool PageAllocator::ShrinkHardBy(Length pages, LimitKind limit_kind) { diff --git a/contrib/libs/tcmalloc/patches/040-remove-conflicting-noexcept.patch b/contrib/libs/tcmalloc/patches/040-remove-conflicting-noexcept.patch new file mode 100644 index 000000000000..bd80a5d6bee6 --- /dev/null +++ b/contrib/libs/tcmalloc/patches/040-remove-conflicting-noexcept.patch @@ -0,0 +1,13 @@ +diff --git a/tcmalloc/libc_override.h b/tcmalloc/libc_override.h +index 424e6ee..9674e9b 100644 +--- a/tcmalloc/libc_override.h ++++ b/tcmalloc/libc_override.h +@@ -174,7 +174,7 @@ void sdallocx(void* ptr, size_t size, int flags) noexcept + TCMALLOC_ALIAS(TCMallocInternalSdallocx); + void* realloc(void* ptr, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalRealloc); +-void* reallocarray(void* ptr, size_t n, size_t size) TCMALLOC_NOTHROW ++void* reallocarray(void* ptr, size_t n, size_t size) + TCMALLOC_ALIAS(TCMallocInternalReallocArray); + void* calloc(size_t n, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalCalloc); diff --git a/contrib/libs/tcmalloc/patches/050-avoid-cycle.patch b/contrib/libs/tcmalloc/patches/050-avoid-cycle.patch new file mode 100644 index 000000000000..aa7d649be6ab --- /dev/null +++ b/contrib/libs/tcmalloc/patches/050-avoid-cycle.patch @@ -0,0 +1,13 @@ +diff --git a/tcmalloc/internal/profile_builder.h b/tcmalloc/internal/profile_builder.h +index 06f2abe..d50992e 100644 +--- a/tcmalloc/internal/profile_builder.h ++++ b/tcmalloc/internal/profile_builder.h +@@ -24,7 +24,7 @@ + #include + #include + +-#include "tcmalloc/internal/profile.pb.h" ++// #include "tcmalloc/internal/profile.pb.h" + #include "absl/container/btree_map.h" + #include "absl/container/flat_hash_map.h" + #include "absl/status/status.h" diff --git a/contrib/libs/tcmalloc/patches/060-system-headers.sh b/contrib/libs/tcmalloc/patches/060-system-headers.sh new file mode 100644 index 000000000000..7f5b4b26deaa --- /dev/null +++ b/contrib/libs/tcmalloc/patches/060-system-headers.sh @@ -0,0 +1,5 @@ +set -eux +# DTCC-1856 +find . -type f -name '*.h' | while read l; do + sed -i '1s/^/#pragma clang system_header\n/' ${l} +done \ No newline at end of file diff --git a/contrib/libs/tcmalloc/patches/900-undeprecate-rate-interval.patch b/contrib/libs/tcmalloc/patches/900-undeprecate-rate-interval.patch new file mode 100644 index 000000000000..cdb0e19dae1a --- /dev/null +++ b/contrib/libs/tcmalloc/patches/900-undeprecate-rate-interval.patch @@ -0,0 +1,24 @@ +diff --git a/tcmalloc/malloc_extension.h b/tcmalloc/malloc_extension.h +index 0505971..26e15b4 100644 +--- a/tcmalloc/malloc_extension.h ++++ b/tcmalloc/malloc_extension.h +@@ -476,11 +476,9 @@ class MallocExtension final { + + // The old names to get and set profile sampling intervals used "rate" to + // refer to intervals. Use of the below is deprecated to avoid confusion. +- ABSL_DEPRECATE_AND_INLINE() + static int64_t GetProfileSamplingRate() { + return GetProfileSamplingInterval(); + } +- ABSL_DEPRECATE_AND_INLINE() + static void SetProfileSamplingRate(int64_t rate) { + SetProfileSamplingInterval(rate); + } +@@ -488,7 +486,6 @@ class MallocExtension final { + static int64_t GetGuardedSamplingRate() { + return GetGuardedSamplingInterval(); + } +- ABSL_DEPRECATE_AND_INLINE() + static void SetGuardedSamplingRate(int64_t rate) { + SetGuardedSamplingInterval(rate); + } diff --git a/contrib/libs/tcmalloc/patches/fork.patch b/contrib/libs/tcmalloc/patches/fork.patch deleted file mode 100644 index 250339443190..000000000000 --- a/contrib/libs/tcmalloc/patches/fork.patch +++ /dev/null @@ -1,310 +0,0 @@ ---- contrib/libs/tcmalloc/tcmalloc/central_freelist.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h (working tree) -@@ -70,6 +70,14 @@ class CentralFreeList { - - SpanStats GetSpanStats() const; - -+ void AcquireInternalLocks() { -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() { -+ lock_.Unlock(); -+ } -+ - private: - // Release an object to spans. - // Returns object's span if it become completely free. ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (working tree) -@@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { - } - } - -+void CPUCache::AcquireInternalLocks() { -+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; -+ ++cpu) { -+ resize_[cpu].lock.Lock(); -+ } -+} -+ -+void CPUCache::ReleaseInternalLocks() { -+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; -+ ++cpu) { -+ resize_[cpu].lock.Unlock(); -+ } -+} -+ - void CPUCache::PerClassResizeInfo::Init() { - state_.store(0, std::memory_order_relaxed); - } ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (working tree) -@@ -164,6 +164,9 @@ class CPUCache { - void Print(Printer* out) const; - void PrintInPbtxt(PbtxtRegion* region) const; - -+ void AcquireInternalLocks(); -+ void ReleaseInternalLocks(); -+ - private: - // Per-size-class freelist resizing info. - class PerClassResizeInfo { ---- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree) -@@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t - MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); - ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( - int64_t value); -+ -+ABSL_ATTRIBUTE_WEAK void -+MallocExtension_EnableForkSupport(); -+ - } - - #endif ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) -@@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { - #endif - } - -+void MallocExtension::EnableForkSupport() { -+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS -+ if (&MallocExtension_EnableForkSupport != nullptr) { -+ MallocExtension_EnableForkSupport(); -+ } -+#endif -+} -+ - } // namespace tcmalloc - - // Default implementation just returns size. The expectation is that ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) -@@ -468,6 +468,10 @@ class MallocExtension final { - // Specifies the release rate from the page heap. ProcessBackgroundActions - // must be called for this to be operative. - static void SetBackgroundReleaseRate(BytesPerSecond rate); -+ -+ // Enables fork support. -+ // Allocator will continue to function correctly in the child, after calling fork(). -+ static void EnableForkSupport(); - }; - - } // namespace tcmalloc ---- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree) -@@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator - Static::bucket_allocator_; - ABSL_CONST_INIT std::atomic Static::inited_{false}; - ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; -+ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; - ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; - ABSL_CONST_INIT PageMap Static::pagemap_; - ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( -@@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { - pagemap_.MapRootWithSmallPages(); - guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); - inited_.store(true, std::memory_order_release); -+ -+ pageheap_lock.Unlock(); -+ pthread_atfork( -+ TCMallocPreFork, -+ TCMallocPostFork, -+ TCMallocPostFork); -+ pageheap_lock.Lock(); - } - } - ---- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree) -@@ -50,6 +50,9 @@ class CPUCache; - class PageMap; - class ThreadCache; - -+void TCMallocPreFork(); -+void TCMallocPostFork(); -+ - class Static { - public: - // True if InitIfNecessary() has run to completion. -@@ -124,6 +127,9 @@ class Static { - static void ActivateCPUCache() { cpu_cache_active_ = true; } - static void DeactivateCPUCache() { cpu_cache_active_ = false; } - -+ static bool ForkSupportEnabled() { return fork_support_enabled_; } -+ static void EnableForkSupport() { fork_support_enabled_ = true; } -+ - static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { - return - #ifndef TCMALLOC_DEPRECATED_PERTHREAD -@@ -169,6 +175,7 @@ class Static { - static PageHeapAllocator bucket_allocator_; - ABSL_CONST_INIT static std::atomic inited_; - static bool cpu_cache_active_; -+ static bool fork_support_enabled_; - ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; - ABSL_CONST_INIT static NumaTopology - numa_topology_; ---- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (working tree) -@@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic system_release_errors = ATOMIC_VAR_INIT(0); - - } // namespace - -+void AcquireSystemAllocLock() { -+ spinlock.Lock(); -+} -+ -+void ReleaseSystemAllocLock() { -+ spinlock.Unlock(); -+} -+ - void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, - const MemoryTag tag) { - // If default alignment is set request the minimum alignment provided by ---- contrib/libs/tcmalloc/tcmalloc/system-alloc.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h (working tree) -@@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, - // call to SystemRelease. - int SystemReleaseErrors(); - -+void AcquireSystemAllocLock(); -+void ReleaseSystemAllocLock(); -+ - // This call is a hint to the operating system that the pages - // contained in the specified range of memory will not be used for a - // while, and can be released for use by other processes or the OS. ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree) -@@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( - } - } - -+extern "C" void MallocExtension_EnableForkSupport() { -+ Static::EnableForkSupport(); -+} -+ -+void TCMallocPreFork() { -+ if (!Static::ForkSupportEnabled()) { -+ return; -+ } -+ -+ if (Static::CPUCacheActive()) { -+ Static::cpu_cache().AcquireInternalLocks(); -+ } -+ Static::transfer_cache().AcquireInternalLocks(); -+ guarded_page_lock.Lock(); -+ release_lock.Lock(); -+ pageheap_lock.Lock(); -+ AcquireSystemAllocLock(); -+} -+ -+void TCMallocPostFork() { -+ if (!Static::ForkSupportEnabled()) { -+ return; -+ } -+ -+ ReleaseSystemAllocLock(); -+ pageheap_lock.Unlock(); -+ guarded_page_lock.Unlock(); -+ release_lock.Unlock(); -+ Static::transfer_cache().ReleaseInternalLocks(); -+ if (Static::CPUCacheActive()) { -+ Static::cpu_cache().ReleaseInternalLocks(); -+ } -+} -+ - // nallocx slow path. - // Moved to a separate function because size_class_with_alignment is not inlined - // which would cause nallocx to become non-leaf function with stack frame and ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (working tree) -@@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW - } - #endif - -+void TCMallocInternalAcquireLocks(); -+void TCMallocInternalReleaseLocks(); -+ - #endif // TCMALLOC_TCMALLOC_H_ ---- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (working tree) -@@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder { - } - } - -+ void AcquireInternalLocks() { -+ for (int i = 0; i < kNumClasses; ++i) { -+ if (implementation_ == TransferCacheImplementation::Ring) { -+ cache_[i].rbtc.AcquireInternalLocks(); -+ } else { -+ cache_[i].tc.AcquireInternalLocks(); -+ } -+ } -+ } -+ -+ void ReleaseInternalLocks() { -+ for (int i = 0; i < kNumClasses; ++i) { -+ if (implementation_ == TransferCacheImplementation::Ring) { -+ cache_[i].rbtc.ReleaseInternalLocks(); -+ } else { -+ cache_[i].tc.ReleaseInternalLocks(); -+ } -+ } -+ } -+ - void InsertRange(int size_class, absl::Span batch) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[size_class].rbtc.InsertRange(size_class, batch); -@@ -295,6 +315,9 @@ class TransferCacheManager { - return TransferCacheImplementation::None; - } - -+ void AcquireInternalLocks() {} -+ void ReleaseInternalLocks() {} -+ - private: - CentralFreeList freelist_[kNumClasses]; - } ABSL_CACHELINE_ALIGNED; ---- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (working tree) -@@ -366,6 +366,18 @@ class TransferCache { - return freelist_do_not_access_directly_; - } - -+ void AcquireInternalLocks() -+ { -+ freelist().AcquireInternalLocks(); -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() -+ { -+ lock_.Unlock(); -+ freelist().ReleaseInternalLocks(); -+ } -+ - private: - // Returns first object of the i-th slot. - void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { -@@ -468,6 +480,18 @@ class RingBufferTransferCache { - - // These methods all do internal locking. - -+ void AcquireInternalLocks() -+ { -+ freelist().AcquireInternalLocks(); -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() -+ { -+ lock_.Unlock(); -+ freelist().ReleaseInternalLocks(); -+ } -+ - // Insert the specified batch into the transfer cache. N is the number of - // elements in the range. RemoveRange() is the opposite operation. - void InsertRange(int size_class, absl::Span batch) diff --git a/contrib/libs/tcmalloc/patches/userdata.patch b/contrib/libs/tcmalloc/patches/userdata.patch deleted file mode 100644 index 83373cebfe0a..000000000000 --- a/contrib/libs/tcmalloc/patches/userdata.patch +++ /dev/null @@ -1,220 +0,0 @@ ---- contrib/libs/tcmalloc/tcmalloc/internal/logging.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/internal/logging.h (working tree) -@@ -67,6 +67,8 @@ struct StackTrace { - // between the previous sample and this one - size_t weight; - -+ void* user_data; -+ - template - friend H AbslHashValue(H h, const StackTrace& t) { - // As we use StackTrace as a key-value node in StackTraceTable, we only ---- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree) -@@ -120,6 +120,12 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( - ABSL_ATTRIBUTE_WEAK void - MallocExtension_EnableForkSupport(); - -+ABSL_ATTRIBUTE_WEAK void -+MallocExtension_SetSampleUserDataCallbacks( -+ tcmalloc::MallocExtension::CreateSampleUserDataCallback create, -+ tcmalloc::MallocExtension::CopySampleUserDataCallback copy, -+ tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy); -+ - } - - #endif ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) -@@ -468,6 +468,21 @@ void MallocExtension::EnableForkSupport() { - #endif - } - -+void MallocExtension::SetSampleUserDataCallbacks( -+ CreateSampleUserDataCallback create, -+ CopySampleUserDataCallback copy, -+ DestroySampleUserDataCallback destroy) { -+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS -+ if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) { -+ MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy); -+ } -+#else -+ (void)create; -+ (void)copy; -+ (void)destroy; -+#endif -+} -+ - } // namespace tcmalloc - - // Default implementation just returns size. The expectation is that ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) -@@ -94,6 +94,8 @@ class Profile final { - - int depth; - void* stack[kMaxStackDepth]; -+ -+ void* user_data; - }; - - void Iterate(absl::FunctionRef f) const; -@@ -472,6 +474,16 @@ class MallocExtension final { - // Enables fork support. - // Allocator will continue to function correctly in the child, after calling fork(). - static void EnableForkSupport(); -+ -+ using CreateSampleUserDataCallback = void*(); -+ using CopySampleUserDataCallback = void*(void*); -+ using DestroySampleUserDataCallback = void(void*); -+ -+ // Sets callbacks for lifetime control of custom user data attached to allocation samples -+ static void SetSampleUserDataCallbacks( -+ CreateSampleUserDataCallback create, -+ CopySampleUserDataCallback copy, -+ DestroySampleUserDataCallback destroy); - }; - - } // namespace tcmalloc ---- contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc (working tree) -@@ -55,6 +55,7 @@ void PeakHeapTracker::MaybeSaveSample() { - StackTrace *t = peak_sampled_span_stacks_, *next = nullptr; - while (t != nullptr) { - next = reinterpret_cast(t->stack[kMaxStackDepth - 1]); -+ Static::DestroySampleUserData(t->user_data); - Static::stacktrace_allocator().Delete(t); - t = next; - } -@@ -63,7 +64,9 @@ void PeakHeapTracker::MaybeSaveSample() { - for (Span* s : Static::sampled_objects_) { - t = Static::stacktrace_allocator().New(); - -- *t = *s->sampled_stack(); -+ StackTrace* sampled_stack = s->sampled_stack(); -+ *t = *sampled_stack; -+ t->user_data = Static::CopySampleUserData(sampled_stack->user_data); - if (t->depth == kMaxStackDepth) { - t->depth = kMaxStackDepth - 1; - } ---- contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc (working tree) -@@ -73,6 +73,7 @@ StackTraceTable::~StackTraceTable() { - Bucket* b = table_[i]; - while (b != nullptr) { - Bucket* next = b->next; -+ Static::DestroySampleUserData(b->trace.user_data); - Static::bucket_allocator().Delete(b); - b = next; - } -@@ -104,6 +105,7 @@ void StackTraceTable::AddTrace(double count, const StackTrace& t) { - b = Static::bucket_allocator().New(); - b->hash = h; - b->trace = t; -+ b->trace.user_data = Static::CopySampleUserData(t.user_data); - b->count = count; - b->total_weight = t.weight * count; - b->next = table_[idx]; -@@ -135,6 +137,8 @@ void StackTraceTable::Iterate( - e.requested_alignment = b->trace.requested_alignment; - e.allocated_size = allocated_size; - -+ e.user_data = b->trace.user_data; -+ - e.depth = b->trace.depth; - static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth, - "Profile stack size smaller than internal stack sizes"); ---- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree) -@@ -60,6 +60,12 @@ ABSL_CONST_INIT PageHeapAllocator - ABSL_CONST_INIT std::atomic Static::inited_{false}; - ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; - ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; -+ABSL_CONST_INIT Static::CreateSampleUserDataCallback* -+ Static::create_sample_user_data_callback_ = nullptr; -+ABSL_CONST_INIT Static::CopySampleUserDataCallback* -+ Static::copy_sample_user_data_callback_ = nullptr; -+ABSL_CONST_INIT Static::DestroySampleUserDataCallback* -+ Static::destroy_sample_user_data_callback_ = nullptr; - ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; - ABSL_CONST_INIT PageMap Static::pagemap_; - ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( ---- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree) -@@ -130,6 +130,34 @@ class Static { - static bool ForkSupportEnabled() { return fork_support_enabled_; } - static void EnableForkSupport() { fork_support_enabled_ = true; } - -+ using CreateSampleUserDataCallback = void*(); -+ using CopySampleUserDataCallback = void*(void*); -+ using DestroySampleUserDataCallback = void(void*); -+ -+ static void SetSampleUserDataCallbacks( -+ CreateSampleUserDataCallback create, -+ CopySampleUserDataCallback copy, -+ DestroySampleUserDataCallback destroy) { -+ create_sample_user_data_callback_ = create; -+ copy_sample_user_data_callback_ = copy; -+ destroy_sample_user_data_callback_ = destroy; -+ } -+ -+ static void* CreateSampleUserData() { -+ if (create_sample_user_data_callback_) -+ return create_sample_user_data_callback_(); -+ return nullptr; -+ } -+ static void* CopySampleUserData(void* user_data) { -+ if (copy_sample_user_data_callback_) -+ return copy_sample_user_data_callback_(user_data); -+ return nullptr; -+ } -+ static void DestroySampleUserData(void* user_data) { -+ if (destroy_sample_user_data_callback_) -+ destroy_sample_user_data_callback_(user_data); -+ } -+ - static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { - return - #ifndef TCMALLOC_DEPRECATED_PERTHREAD -@@ -176,6 +204,9 @@ class Static { - ABSL_CONST_INIT static std::atomic inited_; - static bool cpu_cache_active_; - static bool fork_support_enabled_; -+ static CreateSampleUserDataCallback* create_sample_user_data_callback_; -+ static CopySampleUserDataCallback* copy_sample_user_data_callback_; -+ static DestroySampleUserDataCallback* destroy_sample_user_data_callback_; - ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; - ABSL_CONST_INIT static NumaTopology - numa_topology_; ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree) -@@ -1151,6 +1151,13 @@ void TCMallocPostFork() { - } - } - -+extern "C" void MallocExtension_SetSampleUserDataCallbacks( -+ MallocExtension::CreateSampleUserDataCallback create, -+ MallocExtension::CopySampleUserDataCallback copy, -+ MallocExtension::DestroySampleUserDataCallback destroy) { -+ Static::SetSampleUserDataCallbacks(create, copy, destroy); -+} -+ - // nallocx slow path. - // Moved to a separate function because size_class_with_alignment is not inlined - // which would cause nallocx to become non-leaf function with stack frame and -@@ -1500,6 +1507,7 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight, - tmp.requested_alignment = requested_alignment; - tmp.allocated_size = allocated_size; - tmp.weight = weight; -+ tmp.user_data = Static::CreateSampleUserData(); - - { - absl::base_internal::SpinLockHolder h(&pageheap_lock); -@@ -1629,6 +1637,7 @@ static void do_free_pages(void* ptr, const PageId p) { - 1); - } - notify_sampled_alloc = true; -+ Static::DestroySampleUserData(st->user_data); - Static::stacktrace_allocator().Delete(st); - } - if (IsSampledMemory(ptr)) { diff --git a/contrib/libs/tcmalloc/patches/yandex.patch b/contrib/libs/tcmalloc/patches/yandex.patch deleted file mode 100644 index 12d11f2dadf6..000000000000 --- a/contrib/libs/tcmalloc/patches/yandex.patch +++ /dev/null @@ -1,91 +0,0 @@ -commit ab4069ebdd376db4d32c29e1a2414565ec849249 -author: prime -date: 2021-10-07T14:52:42+03:00 - - Apply yandex patches - ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -1112,6 +1112,11 @@ extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { - return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); - } - -+extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { -+ tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); -+ tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); -+} -+ - extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { - return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size(); - } ---- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -75,6 +75,7 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( - ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( - const char* name_data, size_t name_size, size_t* value); - ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); -+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); - ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); - ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( - absl::Duration* ret); ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -287,6 +287,16 @@ bool MallocExtension::PerCpuCachesActive() { - #endif - } - -+void MallocExtension::DeactivatePerCpuCaches() { -+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS -+ if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { -+ return; -+ } -+ -+ MallocExtension_Internal_DeactivatePerCpuCaches(); -+#endif -+} -+ - int32_t MallocExtension::GetMaxPerCpuCacheSize() { - #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -329,6 +329,11 @@ class MallocExtension final { - // Gets whether TCMalloc is using per-CPU caches. - static bool PerCpuCachesActive(); - -+ // Extension for unified agent. -+ // -+ // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 -+ static void DeactivatePerCpuCaches(); -+ - // Gets the current maximum cache size per CPU cache. - static int32_t GetMaxPerCpuCacheSize(); - // Sets the maximum cache size per CPU cache. This is a per-core limit. ---- contrib/libs/tcmalloc/tcmalloc/static_vars.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -122,6 +122,7 @@ class Static { - return cpu_cache_active_; - } - static void ActivateCPUCache() { cpu_cache_active_ = true; } -+ static void DeactivateCPUCache() { cpu_cache_active_ = false; } - - static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { - return ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -2210,14 +2210,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size) - TCMALLOC_ALIAS(TCMallocInternalNew); - #else - { -- void* p = fast_alloc(CppPolicy().WithoutHooks(), size); -- // We keep this next instruction out of fast_alloc for a reason: when -- // it's in, and new just calls fast_alloc, the optimizer may fold the -- // new call into fast_alloc, which messes up our whole section-based -- // stacktracing (see ABSL_ATTRIBUTE_SECTION, above). This ensures fast_alloc -- // isn't the last thing this fn calls, and prevents the folding. -- MallocHook::InvokeNewHook(p, size); -- return p; -+ return fast_alloc(CppPolicy().WithoutHooks(), size); - } - #endif // TCMALLOC_ALIAS - diff --git a/contrib/libs/tcmalloc/small_but_slow/ya.make b/contrib/libs/tcmalloc/small_but_slow/ya.make index 6a21988172f3..b0259ab095d9 100644 --- a/contrib/libs/tcmalloc/small_but_slow/ya.make +++ b/contrib/libs/tcmalloc/small_but_slow/ya.make @@ -5,6 +5,7 @@ WITHOUT_LICENSE_TEXTS() VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) LICENSE(Apache-2.0) + ALLOCATOR_IMPL() SRCDIR(contrib/libs/tcmalloc) diff --git a/contrib/libs/tcmalloc/tcmalloc/.github/CODEOWNERS b/contrib/libs/tcmalloc/tcmalloc/.github/CODEOWNERS new file mode 100644 index 000000000000..df21f0476bfb --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/.github/CODEOWNERS @@ -0,0 +1,5 @@ +# Default owners +* @ckennelly + +# Documentation +docs/* @manshreck diff --git a/contrib/libs/tcmalloc/tcmalloc/.github/workflows/ci.yml b/contrib/libs/tcmalloc/tcmalloc/.github/workflows/ci.yml new file mode 100644 index 000000000000..e4a93a4077fd --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/.github/workflows/ci.yml @@ -0,0 +1,63 @@ +# Copyright 2022 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: ci + +on: + push: + branches: + - master + + pull_request: + +jobs: + Linux: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + compiler: + - g++ + - clang++ + + name: "Build/Test ${{matrix.compiler}}" + steps: + - name: Cancel previous + uses: styfle/cancel-workflow-action@0.8.0 + with: + access_token: ${{ github.token }} + + - name: Prepare + run: | + sudo apt-get update -qq + sudo apt install -y g++ clang + + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Create Cache Timestamp + id: cache_timestamp + uses: nanzm/get-time-action@v1.1 + with: + format: 'YYYY-MM-DD-HH-mm-ss' + + - name: Mount bazel cache + uses: actions/cache@v2 + with: + path: "/home/runner/.cache/bazel" + key: bazelcache_${{matrix.compiler}}_${{ steps.cache_timestamp.outputs.time }} + restore-keys: bazelcache_${{matrix.compiler}}_ + + - name: Tests + run: CXX=${{matrix.compiler}} bazel test --test_output=errors //... diff --git a/contrib/libs/tcmalloc/tcmalloc/BUILD b/contrib/libs/tcmalloc/tcmalloc/BUILD index e618b85eec2a..feaf22b3c01b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/BUILD +++ b/contrib/libs/tcmalloc/tcmalloc/BUILD @@ -18,9 +18,8 @@ # https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of # how this malloc works. -load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test") load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") -load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_testsuite") +load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_libraries", "create_tcmalloc_testsuite") package(default_visibility = ["//visibility:private"]) @@ -33,10 +32,7 @@ config_setting( flag_values = { "@bazel_tools//tools/cpp:compiler": "clang", }, - visibility = [ - "//tcmalloc/internal:__subpackages__", - "//tcmalloc/testing:__subpackages__", - ], + visibility = ["//tcmalloc:__subpackages__"], ) cc_library( @@ -47,11 +43,18 @@ cc_library( "experiment_config.h", ], copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc/testing:__pkg__", + ], deps = [ ":malloc_extension", + "//tcmalloc/internal:config", "//tcmalloc/internal:environment", "//tcmalloc/internal:logging", + "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/hash", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], @@ -62,23 +65,28 @@ cc_library( tcmalloc_deps = [ ":experiment", ":malloc_extension", + ":malloc_tracing_extension", "@com_google_absl//absl/base", "@com_google_absl//absl/base:config", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:dynamic_annotations", - "@com_google_absl//absl/debugging:leak_check", "@com_google_absl//absl/debugging:stacktrace", "@com_google_absl//absl/debugging:symbolize", + "//tcmalloc/selsan", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/numeric:bits", "//tcmalloc/internal:config", "//tcmalloc/internal:declarations", "//tcmalloc/internal:linked_list", "//tcmalloc/internal:logging", - "//tcmalloc/internal:memory_stats", + "//tcmalloc/internal:memory_tag", "//tcmalloc/internal:optimization", "//tcmalloc/internal:percpu", + "//tcmalloc/internal:sampled_allocation", ] # This library provides tcmalloc always @@ -86,66 +94,65 @@ cc_library( name = "tcmalloc", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_INTERNAL_8K_PAGES"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, visibility = ["//visibility:public"], deps = tcmalloc_deps + [ - ":common", + ":common_8k_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) -# Provides tcmalloc always; use per-thread mode. cc_library( - name = "tcmalloc_deprecated_perthread", + name = "tcmalloc_internal_methods_only", srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = [ - "//tcmalloc/internal:__pkg__", - "//tcmalloc/testing:__pkg__", + copts = TCMALLOC_DEFAULT_COPTS + [ + "-DTCMALLOC_INTERNAL_METHODS_ONLY", ], + linkstatic = 1, + visibility = ["//tcmalloc:__subpackages__"], deps = tcmalloc_deps + [ - ":common_deprecated_perthread", + ":common_8k_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) -# An opt tcmalloc build with ASSERTs forced on (by turning off -# NDEBUG). Useful for tracking down crashes in production binaries. -# To use add malloc = "//tcmalloc:opt_with_assertions" in your -# target's build rule. +# Provides tcmalloc always; use per-thread mode. cc_library( - name = "opt_with_assertions", + name = "tcmalloc_deprecated_perthread", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = [ - "-O2", - "-UNDEBUG", - ] + TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, - visibility = ["//visibility:public"], + visibility = [ + ":tcmalloc_tests", + ], deps = tcmalloc_deps + [ - ":common", + ":common_deprecated_perthread", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) @@ -155,188 +162,185 @@ cc_library( hdrs = ["size_class_info.h"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - "//tcmalloc/internal:logging", + "//tcmalloc/internal:config", + "@com_google_absl//absl/types:span", ], ) -# List of common source files used by the various tcmalloc libraries. -common_srcs = [ - "arena.cc", - "arena.h", - "background.cc", - "central_freelist.cc", - "central_freelist.h", - "common.cc", - "common.h", - "cpu_cache.cc", - "cpu_cache.h", - "experimental_pow2_below64_size_class.cc", - "experimental_pow2_size_class.cc", - "legacy_size_classes.cc", - "guarded_page_allocator.h", - "guarded_page_allocator.cc", - "huge_address_map.cc", - "huge_allocator.cc", - "huge_allocator.h", - "huge_cache.cc", - "huge_cache.h", - "huge_region.h", - "huge_page_aware_allocator.cc", - "huge_page_aware_allocator.h", - "huge_page_filler.h", - "huge_pages.h", - "page_allocator.cc", - "page_allocator.h", - "page_allocator_interface.cc", - "page_allocator_interface.h", - "page_heap.cc", - "page_heap.h", - "page_heap_allocator.h", - "pagemap.cc", - "pagemap.h", - "parameters.cc", - "peak_heap_tracker.cc", - "sampler.cc", - "sampler.h", - "size_classes.cc", - "span.cc", - "span.h", - "span_stats.h", - "stack_trace_table.cc", - "stack_trace_table.h", - "static_vars.cc", - "static_vars.h", - "stats.cc", - "system-alloc.cc", - "system-alloc.h", - "thread_cache.cc", - "thread_cache.h", - "tracking.h", - "transfer_cache_stats.h", - "transfer_cache.cc", - "transfer_cache.h", - "transfer_cache_internals.h", -] - -common_hdrs = [ - "arena.h", - "central_freelist.h", - "common.h", - "cpu_cache.h", - "guarded_page_allocator.h", - "huge_address_map.h", - "huge_allocator.h", - "tcmalloc_policy.h", - "huge_cache.h", - "huge_page_filler.h", - "huge_pages.h", - "huge_region.h", - "huge_page_aware_allocator.h", - "page_allocator.h", - "page_allocator_interface.h", - "page_heap.h", - "page_heap_allocator.h", - "pages.h", - "pagemap.h", - "parameters.h", - "peak_heap_tracker.h", - "sampler.h", - "span.h", - "span_stats.h", - "stack_trace_table.h", - "stats.h", - "static_vars.h", - "system-alloc.h", - "thread_cache.h", - "tracking.h", - "transfer_cache_stats.h", - "transfer_cache.h", - "transfer_cache_internals.h", -] - -common_deps = [ - ":experiment", - ":malloc_extension", - ":noruntime_size_classes", - ":size_class_info", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:config", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/base:dynamic_annotations", - "@com_google_absl//absl/container:fixed_array", - "@com_google_absl//absl/debugging:debugging_internal", - "@com_google_absl//absl/debugging:stacktrace", - "@com_google_absl//absl/debugging:symbolize", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/hash:hash", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "//tcmalloc/internal:atomic_stats_counter", - "@com_google_absl//absl/numeric:bits", - "//tcmalloc/internal:config", - "//tcmalloc/internal:declarations", - "//tcmalloc/internal:environment", - "//tcmalloc/internal:linked_list", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:mincore", - "//tcmalloc/internal:numa", - "//tcmalloc/internal:cache_topology", - "//tcmalloc/internal:optimization", - "//tcmalloc/internal:parameter_accessors", - "//tcmalloc/internal:percpu", - "//tcmalloc/internal:percpu_tcmalloc", - "//tcmalloc/internal:range_tracker", - "//tcmalloc/internal:timeseries_tracker", - "//tcmalloc/internal:util", -] - -cc_library( +create_tcmalloc_libraries( name = "common", - srcs = common_srcs, - hdrs = common_hdrs, + srcs = [ + "allocation_sample.cc", + "allocation_sampling.cc", + "arena.cc", + "arena.h", + "background.cc", + "central_freelist.cc", + "central_freelist.h", + "common.cc", + "common.h", + "cpu_cache.cc", + "cpu_cache.h", + "deallocation_profiler.cc", + "experimental_pow2_size_class.cc", + "global_stats.cc", + "guarded_allocations.h", + "guarded_page_allocator.cc", + "guarded_page_allocator.h", + "hinted_tracker_lists.h", + "huge_address_map.cc", + "huge_allocator.cc", + "huge_allocator.h", + "huge_cache.cc", + "huge_cache.h", + "huge_page_aware_allocator.cc", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_page_subrelease.h", + "huge_pages.h", + "huge_region.h", + "legacy_size_classes.cc", + "metadata_object_allocator.h", + "page_allocator.cc", + "page_allocator.h", + "page_allocator_interface.cc", + "page_allocator_interface.h", + "pagemap.cc", + "pagemap.h", + "parameters.cc", + "peak_heap_tracker.cc", + "reuse_size_classes.cc", + "sampler.cc", + "sampler.h", + "segv_handler.cc", + "segv_handler.h", + "size_classes.cc", + "sizemap.cc", + "span.cc", + "span.h", + "span_stats.h", + "stack_trace_table.cc", + "stack_trace_table.h", + "static_vars.cc", + "static_vars.h", + "stats.cc", + "system-alloc.cc", + "system-alloc.h", + "thread_cache.cc", + "thread_cache.h", + "transfer_cache.cc", + "transfer_cache.h", + "transfer_cache_internals.h", + "transfer_cache_stats.h", + ], + hdrs = [ + "allocation_sample.h", + "allocation_sampling.h", + "arena.h", + "central_freelist.h", + "common.h", + "cpu_cache.h", + "deallocation_profiler.h", + "global_stats.h", + "guarded_allocations.h", + "guarded_page_allocator.h", + "hinted_tracker_lists.h", + "huge_address_map.h", + "huge_allocator.h", + "huge_cache.h", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_page_subrelease.h", + "huge_pages.h", + "huge_region.h", + "metadata_object_allocator.h", + "page_allocator.h", + "page_allocator_interface.h", + "pagemap.h", + "pages.h", + "parameters.h", + "peak_heap_tracker.h", + "sampler.h", + "segv_handler.h", + "sizemap.h", + "span.h", + "span_stats.h", + "stack_trace_table.h", + "static_vars.h", + "stats.h", + "system-alloc.h", + "tcmalloc_policy.h", + "thread_cache.h", + "transfer_cache.h", + "transfer_cache_internals.h", + "transfer_cache_stats.h", + ], copts = TCMALLOC_DEFAULT_COPTS, linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -cc_library( - name = "common_deprecated_perthread", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - deps = common_deps, - alwayslink = 1, -) - -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to use hugepage-aware -# allocator. -cc_library( - name = "want_hpaa", - srcs = ["want_hpaa.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//visibility:public"], + visibility = [":tcmalloc_tests"], deps = [ + ":experiment", + ":malloc_extension", + ":malloc_tracing_extension", + ":metadata_allocator", + ":size_class_info", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:atomic_stats_counter", + "//tcmalloc/internal:cache_topology", + "//tcmalloc/internal:clock", "//tcmalloc/internal:config", + "//tcmalloc/internal:cpu_utils", + "//tcmalloc/internal:environment", + "//tcmalloc/internal:explicitly_constructed", + "//tcmalloc/internal:exponential_biased", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:memory_stats", + "//tcmalloc/internal:memory_tag", + "//tcmalloc/internal:mincore", + "//tcmalloc/internal:mismatched_delete_state", + "//tcmalloc/internal:numa", + "//tcmalloc/internal:optimization", + "//tcmalloc/internal:page_size", + "//tcmalloc/internal:pageflags", + "//tcmalloc/internal:parameter_accessors", + "//tcmalloc/internal:percpu", + "//tcmalloc/internal:percpu_tcmalloc", + "//tcmalloc/internal:prefetch", + "//tcmalloc/internal:range_tracker", + "//tcmalloc/internal:sampled_allocation", + "//tcmalloc/internal:sampled_allocation_recorder", + "//tcmalloc/internal:stacktrace_filter", + "//tcmalloc/internal:sysinfo", + "//tcmalloc/internal:timeseries_tracker", + "//tcmalloc/internal:util", + "//tcmalloc/selsan", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) # TEMPORARY. WILL BE REMOVED. # Add a dep to this if you want your binary to use hugepage-aware -# allocator with hpaa_subrelease=true. +# allocator. cc_library( - name = "want_hpaa_subrelease", - srcs = ["want_hpaa_subrelease.cc"], + name = "want_hpaa", + srcs = ["want_hpaa.cc"], copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ @@ -346,32 +350,25 @@ cc_library( alwayslink = 1, ) -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to not use hugepage-aware -# allocator. cc_library( - name = "want_no_hpaa", - srcs = ["want_no_hpaa.cc"], + # TODO(b/304135905): Remove this opt-out. + name = "want_disable_tcmalloc_big_span", + srcs = ["want_disable_tcmalloc_big_span.cc"], copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], + visibility = ["//visibility:public"], + deps = ["@com_google_absl//absl/base:core_headers"], alwayslink = 1, ) -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to use old span sizes. cc_library( - name = "want_legacy_spans", - srcs = ["want_legacy_spans.cc"], + # TODO(b/199203282, b/296281171): Remove this opt-out. + name = "want_disable_huge_region_more_often", + srcs = ["want_disable_huge_region_more_often.cc"], copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", + visibility = [ + "//tcmalloc/testing:__pkg__", ], + deps = ["@com_google_absl//absl/base:core_headers"], alwayslink = 1, ) @@ -393,135 +390,78 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "runtime_size_classes", - srcs = ["runtime_size_classes.cc"], - hdrs = ["runtime_size_classes.h"], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//visibility:private"], - deps = [ - ":size_class_info", - "//tcmalloc/internal:environment", - "//tcmalloc/internal:logging", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - ], - alwayslink = 1, -) - -cc_library( - name = "noruntime_size_classes", - srcs = ["noruntime_size_classes.cc"], - hdrs = ["runtime_size_classes.h"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":size_class_info", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - ], - alwayslink = 1, -) - # TCMalloc with large pages is usually faster but fragmentation is higher. See # https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. cc_library( name = "tcmalloc_large_pages", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_INTERNAL_32K_PAGES"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, visibility = ["//visibility:public"], deps = tcmalloc_deps + [ ":common_large_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) -cc_library( - name = "common_large_pages", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - # TCMalloc with 256k pages is usually faster but fragmentation is higher. See # https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. cc_library( name = "tcmalloc_256k_pages", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_INTERNAL_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, visibility = ["//visibility:public"], deps = tcmalloc_deps + [ ":common_256k_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) cc_library( - name = "common_256k_pages", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -cc_library( - name = "tcmalloc_256k_pages_and_numa", + name = "tcmalloc_256k_pages_numa_aware", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], copts = [ - "-DTCMALLOC_256K_PAGES", - "-DTCMALLOC_NUMA_AWARE", + "-DTCMALLOC_INTERNAL_256K_PAGES", + "-DTCMALLOC_INTERNAL_NUMA_AWARE", ] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, - visibility = ["//tcmalloc/testing:__pkg__"], + visibility = [ + ":tcmalloc_tests", + ], deps = tcmalloc_deps + [ - ":common_256k_pages_and_numa", + ":common_256k_pages_numa_aware", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) -cc_library( - name = "common_256k_pages_and_numa", - srcs = common_srcs, - hdrs = common_hdrs, - copts = [ - "-DTCMALLOC_256K_PAGES", - "-DTCMALLOC_NUMA_AWARE", - ] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - # TCMalloc small-but-slow is a a version of TCMalloc that chooses to minimize # fragmentation at a *severe* cost to performance. It should be used by # applications that have significant memory constraints, but don't need to @@ -532,32 +472,23 @@ cc_library( name = "tcmalloc_small_but_slow", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_INTERNAL_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, - visibility = ["//visibility:public"], + visibility = ["//tcmalloc:__subpackages__"], deps = tcmalloc_deps + [ ":common_small_but_slow", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) -cc_library( - name = "common_small_but_slow", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - # TCMalloc with NUMA awareness compiled in. Note that by default NUMA awareness # will still be disabled at runtime - this default can be changed by adding a # dependency upon want_numa_aware, or overridden by setting the @@ -566,29 +497,44 @@ cc_library( name = "tcmalloc_numa_aware", srcs = [ "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", "tcmalloc.cc", "tcmalloc.h", ], - copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, + copts = ["-DTCMALLOC_INTERNAL_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, visibility = ["//tcmalloc/testing:__pkg__"], deps = tcmalloc_deps + [ ":common_numa_aware", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], alwayslink = 1, ) cc_library( - name = "common_numa_aware", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, + name = "tcmalloc_legacy_locking", + srcs = [ + "libc_override.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = [ + "-DTCMALLOC_INTERNAL_8K_PAGES", + "-DTCMALLOC_INTERNAL_LEGACY_LOCKING", + ] + TCMALLOC_DEFAULT_COPTS, linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, + visibility = [":tcmalloc_tests"], + deps = tcmalloc_deps + [ + ":common_legacy_locking", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:overflow", + "//tcmalloc/internal:page_size", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + ], alwayslink = 1, ) @@ -600,48 +546,6 @@ package_group( ], ) -cc_library( - name = "headers_for_tests", - srcs = [ - "arena.h", - "central_freelist.h", - "guarded_page_allocator.h", - "huge_address_map.h", - "huge_allocator.h", - "huge_cache.h", - "huge_page_aware_allocator.h", - "huge_page_filler.h", - "huge_pages.h", - "huge_region.h", - "page_allocator.h", - "page_allocator_interface.h", - "page_heap.h", - "page_heap_allocator.h", - "pagemap.h", - "parameters.h", - "peak_heap_tracker.h", - "span_stats.h", - "stack_trace_table.h", - "tracking.h", - "transfer_cache.h", - "transfer_cache_internals.h", - "transfer_cache_stats.h", - ], - hdrs = [ - "common.h", - "pages.h", - "sampler.h", - "size_class_info.h", - "span.h", - "static_vars.h", - "stats.h", - "system-alloc.h", - ], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, -) - cc_library( name = "mock_central_freelist", testonly = 1, @@ -649,7 +553,7 @@ cc_library( hdrs = ["mock_central_freelist.h"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":common_8k_pages", "//tcmalloc/internal:logging", "@com_google_absl//absl/base", "@com_google_absl//absl/types:span", @@ -658,30 +562,46 @@ cc_library( ) cc_library( - name = "page_allocator_test_util", + name = "mock_static_forwarder", testonly = 1, - srcs = [ - "page_allocator_test_util.h", + hdrs = ["mock_static_forwarder.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common_8k_pages", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest", ], - hdrs = ["page_allocator_test_util.h"], +) + +cc_library( + name = "mock_virtual_allocator", + testonly = 1, + hdrs = ["mock_virtual_allocator.h"], copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc:tcmalloc_tests"], deps = [ - ":common", - ":malloc_extension", + ":common_8k_pages", + "//tcmalloc/internal:config", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", ], ) -cc_test( - name = "page_heap_test", - srcs = ["page_heap_test.cc"], +cc_library( + name = "page_allocator_test_util", + testonly = 1, + srcs = [ + "page_allocator_test_util.h", + ], + hdrs = ["page_allocator_test_util.h"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest_main", + ":common_8k_pages", + ":malloc_extension", + "//tcmalloc/internal:config", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/types:span", ], ) @@ -692,7 +612,7 @@ cc_library( hdrs = ["mock_transfer_cache.h"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":common_8k_pages", ":mock_central_freelist", "@com_google_absl//absl/random", "@com_google_absl//absl/random:distributions", @@ -700,31 +620,156 @@ cc_library( ], ) -cc_fuzz_test( - name = "transfer_cache_fuzz", +cc_library( + name = "mock_huge_page_static_forwarder", testonly = 1, - srcs = ["transfer_cache_fuzz.cc"], + srcs = ["mock_huge_page_static_forwarder.cc"], + hdrs = ["mock_huge_page_static_forwarder.h"], + deps = [ + ":common_8k_pages", + "//tcmalloc/internal:config", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:malloc_internal", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/time", + ], +) + +cc_test( + name = "huge_page_aware_allocator_fuzz", + srcs = ["huge_page_aware_allocator_fuzz.cc"], copts = TCMALLOC_DEFAULT_COPTS, - tags = [ - "noasan", - "nomsan", - "notsan", + data = glob(["testdata/huge_page_aware_allocator_fuzz/*"]), + deps = [ + ":common_8k_pages", + ":mock_huge_page_static_forwarder", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/time", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "huge_region_fuzz", + srcs = ["huge_region_fuzz.cc"], + args = ["--fuzztest_stack_limit_kb=2048"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/huge_region_fuzz/*"]), + deps = [ + ":common_8k_pages", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "huge_page_filler_fuzz", + srcs = ["huge_page_filler_fuzz.cc"], + args = ["--fuzztest_stack_limit_kb=2048"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/huge_page_filler_fuzz/*"]), + deps = [ + ":common_8k_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:clock", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/time", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "span_fuzz", + srcs = ["span_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/span_fuzz/*"]), + deps = [ + ":common_8k_pages", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/types:span", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "sizemap_fuzz", + srcs = ["sizemap_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/sizemap_fuzz/*"]), + deps = [ + ":common_8k_pages", + ":size_class_info", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/types:span", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "central_freelist_fuzz", + srcs = ["central_freelist_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/central_freelist_fuzz/*"]), + deps = [ + ":common_8k_pages", + ":mock_static_forwarder", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/types:span", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", ], +) + +cc_test( + name = "transfer_cache_fuzz", + srcs = ["transfer_cache_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/transfer_cache_fuzz/*"]), deps = [ - ":common", + ":common_8k_pages", ":mock_central_freelist", ":mock_transfer_cache", + "//tcmalloc/internal:config", + "@com_google_absl//absl/log:check", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", ], ) -cc_test( +create_tcmalloc_testsuite( name = "arena_test", timeout = "moderate", srcs = ["arena_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, + tags = [ + ], deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", "@com_google_googletest//:gtest_main", ], ) @@ -734,16 +779,15 @@ cc_test( timeout = "moderate", srcs = ["transfer_cache_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", shard_count = 3, deps = [ - ":common", + ":common_8k_pages", ":mock_central_freelist", ":mock_transfer_cache", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:percpu", "//tcmalloc/testing:thread_manager", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", @@ -756,10 +800,13 @@ create_tcmalloc_benchmark( copts = TCMALLOC_DEFAULT_COPTS, malloc = "//tcmalloc", deps = [ - ":common", + ":common_8k_pages", ":mock_central_freelist", ":mock_transfer_cache", + "//tcmalloc/internal:config", "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/types:optional", ], ) @@ -769,10 +816,12 @@ cc_test( srcs = ["huge_cache_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":common_8k_pages", + ":mock_metadata_allocator", + ":mock_virtual_allocator", + "//tcmalloc/internal:clock", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "@com_google_absl//absl/random", "@com_google_absl//absl/strings", @@ -786,12 +835,36 @@ cc_test( srcs = ["huge_allocator_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":common_8k_pages", + ":mock_metadata_allocator", + ":mock_virtual_allocator", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_page_subrelease_test", + srcs = ["huge_page_subrelease_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common_8k_pages", + "//tcmalloc/internal:clock", + "//tcmalloc/internal:config", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], @@ -799,23 +872,24 @@ cc_test( cc_test( name = "huge_page_filler_test", - timeout = "long", + timeout = "eternal", srcs = ["huge_page_filler_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, linkstatic = 1, + shard_count = 3, deps = [ - ":common", + ":common_8k_pages", + "//tcmalloc/internal:clock", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", "@com_github_google_benchmark//:benchmark", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/memory", "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", @@ -829,20 +903,24 @@ cc_test( copts = TCMALLOC_DEFAULT_COPTS, linkstatic = 1, malloc = "//tcmalloc", - tags = [ - ], deps = [ - ":common", + ":common_8k_pages", ":malloc_extension", + ":mock_huge_page_static_forwarder", ":page_allocator_test_util", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", + "//tcmalloc/internal:page_size", "//tcmalloc/testing:thread_manager", "@com_github_google_benchmark//:benchmark", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/random", + "@com_google_absl//absl/random:bit_gen_ref", + "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", @@ -856,12 +934,17 @@ cc_test( srcs = ["huge_region_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":common_8k_pages", + "//tcmalloc/internal:clock", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", + "//tcmalloc/testing:thread_manager", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/memory", "@com_google_absl//absl/random", + "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) @@ -872,10 +955,12 @@ create_tcmalloc_benchmark( copts = TCMALLOC_DEFAULT_COPTS, malloc = "//tcmalloc", deps = [ - ":common", + ":common_8k_pages", + ":malloc_extension", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", + "//tcmalloc/internal:page_size", "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", ], ) @@ -885,55 +970,58 @@ cc_test( copts = TCMALLOC_DEFAULT_COPTS, malloc = "//tcmalloc", deps = [ - ":common", + ":common_8k_pages", + ":malloc_extension", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", + "//tcmalloc/internal:page_size", + "//tcmalloc/internal:sysinfo", + "//tcmalloc/testing:testutil", + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/numeric:bits", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], ) cc_test( - name = "pagemap_test", - srcs = ["pagemap_test.cc"], + name = "guarded_page_allocator_profile_test", + srcs = ["guarded_page_allocator_profile_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + tags = [ + "noasan", + "nomsan", + "notsan", + ], deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", + ":common_8k_pages", + ":malloc_extension", + "//tcmalloc/testing:testutil", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/log:check", "@com_google_googletest//:gtest_main", ], ) -cc_test( - name = "realloc_test", - srcs = ["realloc_test.cc"], +create_tcmalloc_testsuite( + name = "pagemap_test", + srcs = ["pagemap_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", deps = [ - "@com_github_google_benchmark//:benchmark", + "//tcmalloc/internal:config", "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", "@com_google_googletest//:gtest_main", ], ) -cc_test( +create_tcmalloc_testsuite( name = "stack_trace_table_test", srcs = ["stack_trace_table_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", + ":malloc_extension", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/debugging:stacktrace", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_googletest//:gtest_main", @@ -941,88 +1029,36 @@ cc_test( ) cc_test( - name = "system-alloc_test", - srcs = ["system-alloc_test.cc"], + name = "malloc_extension_fuzz", + srcs = ["malloc_extension_fuzz.cc"], copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = ["nosan"], + data = glob(["testdata/malloc_extension_fuzz/*"]), deps = [ - ":common", ":malloc_extension", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/types:optional", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", ], ) -# This test has been named "large" since before tests were s/m/l. -# The "large" refers to large allocation sizes. -cc_test( - name = "tcmalloc_large_test", - size = "small", - timeout = "moderate", - srcs = ["tcmalloc_large_test.cc"], +cc_library( + name = "metadata_allocator", + hdrs = ["metadata_allocator.h"], copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "noasan", - "noubsan", - ], deps = [ - ":common", - ":malloc_extension", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/container:node_hash_set", - "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/base:core_headers", ], ) -cc_test( - name = "malloc_extension_system_malloc_test", - srcs = ["malloc_extension_system_malloc_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc/internal:system_malloc", - deps = [ - ":malloc_extension", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "malloc_extension_test", - srcs = ["malloc_extension_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "nosan", - ], - deps = [ - ":malloc_extension", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_fuzz_test( - name = "malloc_extension_fuzz", +cc_library( + name = "mock_metadata_allocator", testonly = 1, - srcs = ["malloc_extension_fuzz.cc"], + hdrs = ["mock_metadata_allocator.h"], copts = TCMALLOC_DEFAULT_COPTS, - tags = [ - "noasan", - "nomsan", - "notsan", - ], deps = [ - ":malloc_extension", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", + ":metadata_allocator", + "@com_google_absl//absl/base:core_headers", ], ) @@ -1032,16 +1068,12 @@ cc_test( copts = TCMALLOC_DEFAULT_COPTS, linkstatic = 1, deps = [ - ":common", + ":common_8k_pages", ":malloc_extension", ":page_allocator_test_util", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], ) @@ -1052,7 +1084,6 @@ cc_test( timeout = "long", srcs = ["profile_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, - flaky = 1, # TODO(b/134690164) linkstatic = 1, malloc = "//tcmalloc", shard_count = 2, @@ -1066,9 +1097,24 @@ cc_test( "//tcmalloc/internal:declarations", "//tcmalloc/internal:linked_list", "//tcmalloc/testing:testutil", - "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "segv_handler_test", + srcs = ["segv_handler_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common_8k_pages", + ":malloc_extension", + "//tcmalloc/internal:logging", "@com_google_googletest//:gtest_main", ], ) @@ -1081,7 +1127,10 @@ cc_test( linkstatic = 1, malloc = "//tcmalloc:tcmalloc_deprecated_perthread", tags = [ - "nosan", + "noasan", + "nomsan", + "notsan", + "noubsan", ], deps = [ ":malloc_extension", @@ -1089,8 +1138,8 @@ cc_test( "//tcmalloc/internal:memory_stats", "//tcmalloc/internal:parameter_accessors", "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], ) @@ -1101,70 +1150,28 @@ create_tcmalloc_testsuite( copts = TCMALLOC_DEFAULT_COPTS, deps = [ ":size_class_info", - "@com_github_google_benchmark//:benchmark", + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "size_classes_test_with_runtime_size_classes", - srcs = ["size_classes_with_runtime_size_classes_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc", - deps = [ - ":common", - ":runtime_size_classes", - ":size_class_info", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "heap_profiling_test", - srcs = ["heap_profiling_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "nosan", - ], - deps = [ - ":common", - ":malloc_extension", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:parameter_accessors", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "runtime_size_classes_test", - srcs = ["runtime_size_classes_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc", - deps = [ - ":runtime_size_classes", - "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) create_tcmalloc_testsuite( name = "span_test", + timeout = "long", srcs = ["span_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ + ":experiment", "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/random", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) @@ -1175,11 +1182,13 @@ create_tcmalloc_benchmark( copts = TCMALLOC_DEFAULT_COPTS, malloc = ":tcmalloc", deps = [ - ":common", + ":common_8k_pages", + "//tcmalloc/internal:allocation_guard", + "//tcmalloc/internal:config", "//tcmalloc/internal:logging", "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", "@com_google_absl//absl/random", + "@com_google_absl//absl/types:span", ], ) @@ -1189,8 +1198,8 @@ cc_test( copts = TCMALLOC_DEFAULT_COPTS, malloc = "//tcmalloc", deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", + ":common_8k_pages", + "//tcmalloc/internal:logging", "@com_google_absl//absl/base", "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", @@ -1202,8 +1211,8 @@ cc_test( srcs = ["huge_address_map_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", + ":common_8k_pages", + ":mock_metadata_allocator", "@com_google_googletest//:gtest_main", ], ) @@ -1220,12 +1229,10 @@ cc_library( "//visibility:public", ], deps = [ - "//tcmalloc/internal:parameter_accessors", - "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:malloc_internal", "@com_google_absl//absl/functional:function_ref", - "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", "@com_google_absl//absl/types:optional", @@ -1233,70 +1240,152 @@ cc_library( ], ) +cc_library( + name = "malloc_tracing_extension", + srcs = ["malloc_tracing_extension.cc"], + hdrs = [ + "internal_malloc_tracing_extension.h", + "malloc_tracing_extension.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + ":__subpackages__", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "new_extension", + hdrs = ["new_extension.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deprecation = "Use :malloc_extension directly", + visibility = [":__subpackages__"], + deps = [ + ":malloc_extension", + "@com_google_absl//absl/base:core_headers", + ], +) + +create_tcmalloc_testsuite( + name = "new_extension_test", + srcs = ["new_extension_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":malloc_extension", + "//tcmalloc/internal:page_size", + "//tcmalloc/testing:testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "experiment_config_test", srcs = ["experiment_config_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ ":experiment", - "@com_github_google_benchmark//:benchmark", "@com_google_googletest//:gtest_main", ], ) -cc_fuzz_test( +cc_test( name = "experiment_fuzz", - testonly = 1, srcs = ["experiment_fuzz.cc"], copts = TCMALLOC_DEFAULT_COPTS, deps = [ ":experiment", "@com_google_absl//absl/strings", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", ], ) -cc_fuzz_test( - name = "runtime_size_classes_fuzz", - testonly = 1, - srcs = ["runtime_size_classes_fuzz.cc"], +cc_test( + name = "cpu_cache_test", + timeout = "long", + srcs = ["cpu_cache_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, + # Some experiments change expected size class capacities. + env = {"BORG_EXPERIMENTS": ""}, + # There can be only one CpuCache due to slab offset caching in rseq. + malloc = "//tcmalloc/internal:system_malloc", + shard_count = 3, + tags = ["noubsan"], deps = [ - ":common", - ":runtime_size_classes", + ":common_8k_pages", + ":mock_transfer_cache", ":size_class_info", - "@com_google_absl//absl/strings", + "//tcmalloc/internal:affinity", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:optimization", + "//tcmalloc/internal:percpu", + "//tcmalloc/internal:percpu_tcmalloc", + "//tcmalloc/internal:sysinfo", + "//tcmalloc/testing:testutil", + "//tcmalloc/testing:thread_manager", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:bit_gen_ref", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", ], ) cc_test( - name = "cpu_cache_test", - srcs = ["cpu_cache_test.cc"], + name = "cpu_cache_activate_test", + srcs = ["cpu_cache_activate_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, malloc = ":tcmalloc_deprecated_perthread", - tags = [ - # TODO(b/193887621): Add TSan annotations to CPUCache and/or add - # atomics to PageMap - "notsan", - ], + tags = ["noubsan"], deps = [ ":common_deprecated_perthread", - "//tcmalloc/internal:optimization", - "//tcmalloc/internal:util", - "//tcmalloc/testing:testutil", + ":malloc_extension", + "//tcmalloc/internal:percpu", + "//tcmalloc/internal:sysinfo", "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", "@com_google_absl//absl/random", - "@com_google_absl//absl/random:seed_sequences", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], ) -create_tcmalloc_testsuite( +cc_test( name = "central_freelist_test", srcs = ["central_freelist_test.cc"], copts = TCMALLOC_DEFAULT_COPTS, + tags = [ + ], deps = [ + ":common_8k_pages", + ":mock_static_forwarder", + ":size_class_info", + "//tcmalloc/internal:logging", + "//tcmalloc/testing:thread_manager", "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/random", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) @@ -1307,10 +1396,106 @@ create_tcmalloc_benchmark( copts = TCMALLOC_DEFAULT_COPTS, malloc = "//tcmalloc", deps = [ - ":common", + ":common_8k_pages", "@com_github_google_benchmark//:benchmark", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/random", - "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "profile_marshaler", + srcs = ["profile_marshaler.cc"], + hdrs = ["profile_marshaler.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":malloc_extension", + "//tcmalloc/internal:profile_builder", + "@com_google_absl//absl/status:statusor", + "@com_google_protobuf//:protobuf", + ], +) + +cc_test( + name = "profile_marshaler_test", + srcs = ["profile_marshaler_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":malloc_extension", + ":profile_marshaler", + "//tcmalloc/internal:fake_profile", + "//tcmalloc/internal:profile_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + "@com_google_protobuf//:protobuf", + ], +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use old size classes. +# +# TODO(b/242710633): Remove this. +cc_library( + name = "want_legacy_size_classes", + srcs = ["want_legacy_size_classes.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +cc_test( + name = "want_legacy_size_classes_test", + srcs = ["want_legacy_size_classes_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + env = {"BORG_DISABLE_EXPERIMENTS": "all"}, + malloc = "//tcmalloc", + deps = [ + ":common_8k_pages", + ":want_legacy_size_classes", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "allocation_sample_test", + srcs = ["allocation_sample_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc/internal:logging", + "//tcmalloc/testing:thread_manager", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:bit_gen_ref", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "pages_test", + srcs = ["pages_test.cc"], + deps = [ + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "sizemap_test", + srcs = ["sizemap_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":size_class_info", + "@com_google_googletest//:gtest_main", ], ) diff --git a/contrib/libs/tcmalloc/tcmalloc/allocation_sample.cc b/contrib/libs/tcmalloc/tcmalloc/allocation_sample.cc new file mode 100644 index 000000000000..219eba32b5a8 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/allocation_sample.cc @@ -0,0 +1,59 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/allocation_sample.h" + +#include +#include + +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/stack_trace_table.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +AllocationSample::AllocationSample(AllocationSampleList* list, absl::Time start) + : list_(list), start_(start) { + mallocs_ = std::make_unique(ProfileType::kAllocations); + mallocs_->SetStartTime(start_); + list->Add(this); +} + +AllocationSample::~AllocationSample() { + if (mallocs_ == nullptr) { + return; + } + + // deleted before ending profile, do it for them + list_->Remove(this); +} + +Profile AllocationSample::Stop() && { + // We need to remove ourselves from list_ before we mutate mallocs_; + // + // A concurrent call to AllocationSampleList::ReportMalloc can access mallocs_ + // until we remove it from list_. + if (mallocs_) { + list_->Remove(this); + mallocs_->SetDuration(absl::Now() - start_); + } + return ProfileAccessor::MakeProfile(std::move(mallocs_)); +} + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/allocation_sample.h b/contrib/libs/tcmalloc/tcmalloc/allocation_sample.h new file mode 100644 index 000000000000..69f15e13a5cc --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/allocation_sample.h @@ -0,0 +1,93 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_ALLOCATION_SAMPLE_H_ +#define TCMALLOC_ALLOCATION_SAMPLE_H_ + +#include + +#include "absl/base/const_init.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/stack_trace_table.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +class AllocationSampleList; + +class AllocationSample final : public AllocationProfilingTokenBase { + public: + AllocationSample(AllocationSampleList* list, absl::Time start); + ~AllocationSample() override; + + Profile Stop() && override; + + private: + AllocationSampleList* list_; + std::unique_ptr mallocs_; + absl::Time start_; + AllocationSample* next_ = nullptr; + friend class AllocationSampleList; +}; + +class AllocationSampleList { + public: + constexpr AllocationSampleList() = default; + + void Add(AllocationSample* as) { + AllocationGuardSpinLockHolder h(&lock_); + as->next_ = first_; + first_ = as; + } + + // This list is very short and we're nowhere near a hot path, just walk + void Remove(AllocationSample* as) { + AllocationGuardSpinLockHolder h(&lock_); + AllocationSample** link = &first_; + AllocationSample* cur = first_; + while (cur != as) { + TC_CHECK_NE(cur, nullptr); + link = &cur->next_; + cur = cur->next_; + } + *link = as->next_; + } + + void ReportMalloc(const struct StackTrace& sample) { + AllocationGuardSpinLockHolder h(&lock_); + AllocationSample* cur = first_; + while (cur != nullptr) { + cur->mallocs_->AddTrace(1.0, sample); + cur = cur->next_; + } + } + + private: + // Guard against any concurrent modifications on the list of allocation + // samples. Invoking `new` while holding this lock can lead to deadlock. + absl::base_internal::SpinLock lock_{ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; + AllocationSample* first_ ABSL_GUARDED_BY(lock_) = nullptr; +}; + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_ALLOCATION_SAMPLE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/allocation_sample_test.cc b/contrib/libs/tcmalloc/tcmalloc/allocation_sample_test.cc new file mode 100644 index 000000000000..7290154478ac --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/allocation_sample_test.cc @@ -0,0 +1,132 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/allocation_sample.h" + +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/thread_annotations.h" +#include "absl/random/bit_gen_ref.h" +#include "absl/random/random.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/testing/thread_manager.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +TEST(AllocationSample, Threaded) { + // StackTraceTable uses a global allocator. It must be initialized. + tc_globals.InitIfNecessary(); + + // This test exercises b/143623146 by ensuring that the state of the sample is + // not modified before it is removed from the linked list. + AllocationSampleList list; + + const int kThreads = 5; + const int kMaxSamplers = 3; + const int kMaxAllocations = 100; + ThreadManager m; + std::vector thread_states(kThreads); + + struct GlobalState { + absl::Mutex mu; + std::vector> samplers ABSL_GUARDED_BY(mu); + } global; + + auto PopSample = [&](absl::BitGenRef rng) { + std::unique_ptr ret; + + // Do our test bookkeeping separately, so we don't synchronize list + // externally. + absl::MutexLock l(&global.mu); + if (global.samplers.empty()) { + return ret; + } + size_t index = absl::Uniform(rng, 0, global.samplers.size() - 1u); + std::swap(global.samplers[index], global.samplers.back()); + ret = std::move(global.samplers.back()); + global.samplers.pop_back(); + + TC_CHECK_NE(ret.get(), nullptr); + return ret; + }; + + m.Start(kThreads, [&](int thread) { + auto& state = thread_states[thread]; + const double coin = absl::Uniform(state, 0., 1.0); + + if (coin < 0.1) { + // Add a sampler. This occurs implicitly in the AllocationSample + // constructor. + auto sampler = std::make_unique(&list, absl::Now()); + + // Do our test bookkeeping separately, so we don't synchronize list + // externally. + { + absl::MutexLock l(&global.mu); + if (global.samplers.size() < kMaxSamplers) { + // Add to the list. + global.samplers.push_back(std::move(sampler)); + } + } + + // If we didn't push it, we will unregister in ~AllocationSample. + } else if (coin < 0.2) { + std::unique_ptr sampler = PopSample(state); + + // Remove a sample and allow its destructor to handle unregistering. + sampler.reset(); + } else if (coin < 0.25) { + // Call Stop occasionally. + std::unique_ptr sampler = PopSample(state); + + if (sampler) { + std::move(*sampler).Stop(); + } + } else { + int allocations; + { + // StackTraceTable uses a global allocator, rather than one that is + // injected. Consult the global state to see how many allocations are + // active. + PageHeapSpinLockHolder l; + allocations = tc_globals.linked_sample_allocator().stats().in_use; + } + if (allocations >= kMaxAllocations) { + return; + } + + StackTrace s{}; + s.requested_size = 16; + s.allocated_size = 32; + list.ReportMalloc(s); + } + }); + + absl::SleepFor(absl::Milliseconds(1)); + + m.Stop(); +} + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.cc b/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.cc new file mode 100644 index 000000000000..d0b60347e2d4 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.cc @@ -0,0 +1,270 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/allocation_sampling.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/debugging/stacktrace.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "tcmalloc/common.h" +#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/guarded_allocations.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/exponential_biased.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/sampled_allocation.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stack_trace_table.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/tcmalloc_policy.h" +#include "tcmalloc/thread_cache.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +std::unique_ptr DumpFragmentationProfile(Static& state) { + auto profile = std::make_unique(ProfileType::kFragmentation); + state.sampled_allocation_recorder().Iterate( + [&state, &profile](const SampledAllocation& sampled_allocation) { + // Compute fragmentation to charge to this sample: + const StackTrace& t = sampled_allocation.sampled_stack; + if (t.proxy == nullptr) { + // There is just one object per-span, and neighboring spans + // can be released back to the system, so we charge no + // fragmentation to this sampled object. + return; + } + + // Fetch the span on which the proxy lives so we can examine its + // co-residents. + const PageId p = PageIdContaining(t.proxy); + Span* span = state.pagemap().GetDescriptor(p); + if (span == nullptr) { + // Avoid crashes in production mode code, but report in tests. + TC_ASSERT_NE(span, nullptr); + return; + } + + const double frag = span->Fragmentation(t.allocated_size); + if (frag > 0) { + // Associate the memory warmth with the actual object, not the proxy. + // The residency information (t.span_start_address) is likely not very + // useful, but we might as well pass it along. + profile->AddTrace(frag, t); + } + }); + return profile; +} + +std::unique_ptr DumpHeapProfile(Static& state) { + auto profile = std::make_unique(ProfileType::kHeap); + profile->SetStartTime(absl::Now()); + state.sampled_allocation_recorder().Iterate( + [&](const SampledAllocation& sampled_allocation) { + profile->AddTrace(1.0, sampled_allocation.sampled_stack); + }); + return profile; +} + +ABSL_ATTRIBUTE_NOINLINE void FreeProxyObject(Static& state, void* ptr, + size_t size_class) { + if (ABSL_PREDICT_TRUE(UsePerCpuCache(state))) { + state.cpu_cache().Deallocate(ptr, size_class); + } else if (ThreadCache* cache = ThreadCache::GetCacheIfPresent(); + ABSL_PREDICT_TRUE(cache)) { + cache->Deallocate(ptr, size_class); + } else { + // This thread doesn't have thread-cache yet or already. Delete directly + // into transfer cache. + state.transfer_cache().InsertRange(size_class, absl::Span(&ptr, 1)); + } +} + +ABSL_ATTRIBUTE_NOINLINE +static void ReportMismatchedDelete(Static& state, + const SampledAllocation& alloc, size_t size, + size_t requested_size, + std::optional allocated_size) { + TC_LOG("*** GWP-ASan (https://google.github.io/tcmalloc/gwp-asan.html) has detected a memory error ***"); + TC_LOG("Error originates from memory allocated at:"); + PrintStackTrace(alloc.sampled_stack.stack, alloc.sampled_stack.depth); + + size_t maximum_size; + if (allocated_size.value_or(requested_size) != requested_size) { + TC_LOG( + "Mismatched-size-delete " + "(https://github.com/google/tcmalloc/tree/master/docs/mismatched-sized-delete.md) " + "of %v bytes (expected %v - %v bytes) at:", + size, requested_size, *allocated_size); + + maximum_size = *allocated_size; + } else { + TC_LOG( + "Mismatched-size-delete " + "(https://github.com/google/tcmalloc/tree/master/docs/mismatched-sized-delete.md) " + "of %v bytes (expected %v bytes) at:", + size, requested_size); + + maximum_size = requested_size; + } + static void* stack[kMaxStackDepth]; + const size_t depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); + PrintStackTrace(stack, depth); + + RecordCrash("GWP-ASan", "mismatched-size-delete"); + state.mismatched_delete_state().Record( + size, size, requested_size, maximum_size, + absl::MakeSpan(alloc.sampled_stack.stack, alloc.sampled_stack.depth), + absl::MakeSpan(stack, depth)); + abort(); +} + +ABSL_ATTRIBUTE_NOINLINE +static void ReportMismatchedDelete(Static& state, void* ptr, size_t size, + size_t minimum_size, size_t maximum_size) { + // Try to refine the maximum possible size. + const PageId p = PageIdContainingTagged(ptr); + size_t size_class = state.pagemap().sizeclass(p); + if (size_class != 0) { + maximum_size = state.sizemap().class_to_size(size_class); + if (maximum_size < minimum_size) { + // Our size class refinement may have made the bounds inconsistent. + // Consult the size map to find the correct bounds. + minimum_size = state.sizemap().class_to_size_range(size_class).first; + } + } + + TC_LOG("*** GWP-ASan (https://google.github.io/tcmalloc/gwp-asan.html) has detected a memory error ***"); + + TC_LOG( + "Mismatched-size-delete " + "(https://github.com/google/tcmalloc/tree/master/docs/mismatched-sized-delete.md) " + "of %v bytes (expected between [%v, %v] bytes) for %p at:", + size, minimum_size, maximum_size, ptr); + + static void* stack[kMaxStackDepth]; + const size_t depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); + PrintStackTrace(stack, depth); + + RecordCrash("GWP-ASan", "mismatched-size-delete"); + state.mismatched_delete_state().Record(/*provided_min=*/size, + /*provided_max=*/size, minimum_size, + maximum_size, std::nullopt, + absl::MakeSpan(stack, depth)); + abort(); +} + +void MaybeUnsampleAllocation(Static& state, void* ptr, + std::optional size, Span& span) { + // No pageheap_lock required. The sampled span should be unmarked and have its + // state cleared only once. External synchronization when freeing is required; + // otherwise, concurrent writes here would likely report a double-free. + SampledAllocation* sampled_allocation = span.Unsample(); + if (sampled_allocation == nullptr) { + if (ABSL_PREDICT_TRUE(size.has_value())) { + const size_t maximum_size = span.bytes_in_span(); + const size_t minimum_size = maximum_size - (kPageSize - 1u); + + if (ABSL_PREDICT_FALSE(*size < minimum_size || *size > maximum_size)) { + // While we don't have precise allocation-time information because this + // span was not sampled, the deallocated object's purported size exceeds + // the span it is on. This is impossible and indicates corruption. + ReportMismatchedDelete(state, ptr, *size, minimum_size, maximum_size); + } + } + + return; + } + + TC_ASSERT_EQ(state.pagemap().sizeclass(PageIdContainingTagged(ptr)), 0); + + void* const proxy = sampled_allocation->sampled_stack.proxy; + const size_t weight = sampled_allocation->sampled_stack.weight; + const size_t requested_size = + sampled_allocation->sampled_stack.requested_size; + const size_t allocated_size = + sampled_allocation->sampled_stack.allocated_size; + if (size.has_value()) { + if (sampled_allocation->sampled_stack.requested_size_returning) { + if (ABSL_PREDICT_FALSE( + !(requested_size <= *size && *size <= allocated_size))) { + ReportMismatchedDelete(state, *sampled_allocation, *size, + requested_size, allocated_size); + } + } else if (ABSL_PREDICT_FALSE(size != requested_size)) { + ReportMismatchedDelete(state, *sampled_allocation, *size, requested_size, + std::nullopt); + } + } + // SampleifyAllocation turns alignment 1 into 0, turn it back for + // SizeMap::SizeClass. + const size_t alignment = + sampled_allocation->sampled_stack.requested_alignment != 0 + ? sampled_allocation->sampled_stack.requested_alignment + : 1; + // How many allocations does this sample represent, given the sampling + // frequency (weight) and its size. + const double allocation_estimate = + static_cast(weight) / (requested_size + 1); + AllocHandle sampled_alloc_handle = + sampled_allocation->sampled_stack.sampled_alloc_handle; + state.sampled_allocation_recorder().Unregister(sampled_allocation); + + // Adjust our estimate of internal fragmentation. + TC_ASSERT_LE(requested_size, allocated_size); + if (requested_size < allocated_size) { + const size_t sampled_fragmentation = + allocation_estimate * (allocated_size - requested_size); + + // Check against wraparound + TC_ASSERT_GE(state.sampled_internal_fragmentation_.value(), + sampled_fragmentation); + state.sampled_internal_fragmentation_.Add(-sampled_fragmentation); + } + + state.deallocation_samples.ReportFree(sampled_alloc_handle); + + if (proxy) { + const auto policy = CppPolicy().InSameNumaPartitionAs(proxy); + size_t size_class; + if (AccessFromPointer(proxy) == AllocationAccess::kCold) { + size_class = state.sizemap().SizeClass( + policy.AccessAsCold().AlignAs(alignment), allocated_size); + } else { + size_class = state.sizemap().SizeClass( + policy.AccessAsHot().AlignAs(alignment), allocated_size); + } + TC_ASSERT_EQ(size_class, + state.pagemap().sizeclass(PageIdContainingTagged(proxy))); + FreeProxyObject(state, proxy, size_class); + } +} + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.h b/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.h new file mode 100644 index 000000000000..552150db46a5 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/allocation_sampling.h @@ -0,0 +1,259 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_ALLOCATION_SAMPLING_H_ +#define TCMALLOC_ALLOCATION_SAMPLING_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/debugging/stacktrace.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/span.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +class Static; + +// This function computes a profile that maps a live stack trace to +// the number of bytes of central-cache memory pinned by an allocation +// at that stack trace. +// In the case when span is hosting >= 1 number of small objects (t.proxy != +// nullptr), we call span::Fragmentation() and read `span->allocated_`. It is +// safe to do so since we hold the per-sample lock while iterating over sampled +// allocations. It prevents the sampled allocation that has the proxy object to +// complete deallocation, thus `proxy` can not be returned to the span yet. It +// thus prevents the central free list to return the span to the page heap. +std::unique_ptr DumpFragmentationProfile(Static& state); + +std::unique_ptr DumpHeapProfile(Static& state); + +extern "C" ABSL_CONST_INIT thread_local Sampler tcmalloc_sampler + ABSL_ATTRIBUTE_INITIAL_EXEC; + +// Compiler needs to see definition of this variable to generate more +// efficient code for -fPIE/PIC. If the compiler does not see the definition +// it considers it may come from another dynamic library. So even for +// initial-exec model, it need to emit an access via GOT (GOTTPOFF). +// When it sees the definition, it can emit direct %fs:TPOFF access. +// So we provide a weak definition here, but the actual definition is in +// percpu_rseq_asm.S. +ABSL_CONST_INIT ABSL_ATTRIBUTE_WEAK thread_local Sampler tcmalloc_sampler + ABSL_ATTRIBUTE_INITIAL_EXEC; + +inline Sampler* GetThreadSampler() { + static_assert(sizeof(Sampler) == TCMALLOC_SAMPLER_SIZE, + "update TCMALLOC_SAMPLER_SIZE"); + static_assert(alignof(Sampler) == TCMALLOC_SAMPLER_ALIGN, + "update TCMALLOC_SAMPLER_ALIGN"); + static_assert(Sampler::HotDataOffset() == TCMALLOC_SAMPLER_HOT_OFFSET, + "update TCMALLOC_SAMPLER_HOT_OFFSET"); + return &tcmalloc_sampler; +} + +void FreeProxyObject(Static& state, void* ptr, size_t size_class); + +// Performs sampling for already occurred allocation of object. +// +// For very small object sizes, object is used as 'proxy' and full +// page with sampled marked is allocated instead. +// +// For medium-sized objects that have single instance per span, +// they're simply freed and fresh page span is allocated to represent +// sampling. +// +// For large objects (i.e. allocated with do_malloc_pages) they are +// also fully reused and their span is marked as sampled. +// +// Note that do_free_with_size assumes sampled objects have +// page-aligned addresses. Please change both functions if need to +// invalidate the assumption. +// +// Note that size_class might not match requested_size in case of +// memalign. I.e. when larger than requested allocation is done to +// satisfy alignment constraint. +// +// In case of out-of-memory condition when allocating span or +// stacktrace struct, this function simply cheats and returns original +// object. As if no sampling was requested. +template +ABSL_ATTRIBUTE_NOINLINE sized_ptr_t +SampleifyAllocation(Static& state, Policy policy, size_t requested_size, + size_t weight, size_t size_class, void* obj, Span* span) { + TC_CHECK((size_class != 0 && obj != nullptr && span == nullptr) || + (size_class == 0 && obj == nullptr && span != nullptr)); + + StackTrace stack_trace; + stack_trace.proxy = nullptr; + stack_trace.requested_size = requested_size; + // Grab the stack trace outside the heap lock. + stack_trace.depth = absl::GetStackTrace(stack_trace.stack, kMaxStackDepth, 0); + + // requested_alignment = 1 means 'small size table alignment was used' + // Historically this is reported as requested_alignment = 0 + stack_trace.requested_alignment = policy.align(); + if (stack_trace.requested_alignment == 1) { + stack_trace.requested_alignment = 0; + } + + stack_trace.requested_size_returning = policy.size_returning(); + stack_trace.access_hint = static_cast(policy.access()); + stack_trace.weight = weight; + + GuardedAllocWithStatus alloc_with_status{ + nullptr, Profile::Sample::GuardedStatus::NotAttempted}; + + size_t capacity = 0; + if (size_class != 0) { + TC_ASSERT_EQ(size_class, + state.pagemap().sizeclass(PageIdContainingTagged(obj))); + + stack_trace.allocated_size = state.sizemap().class_to_size(size_class); + stack_trace.cold_allocated = IsExpandedSizeClass(size_class); + + Length num_pages = BytesToLengthCeil(stack_trace.allocated_size); + alloc_with_status = state.guardedpage_allocator().TrySample( + requested_size, stack_trace.requested_alignment, num_pages, + stack_trace); + if (alloc_with_status.status == Profile::Sample::GuardedStatus::Guarded) { + TC_ASSERT(!IsNormalMemory(alloc_with_status.alloc)); + const PageId p = PageIdContaining(alloc_with_status.alloc); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + span = Span::New(Range(p, num_pages)); + state.pagemap().Set(p, span); + // If we report capacity back from a size returning allocation, we can not + // report the stack_trace.allocated_size, as we guard the size to + // 'requested_size', and we maintain the invariant that GetAllocatedSize() + // must match the returned size from size returning allocations. So in + // that case, we report the requested size for both capacity and + // GetAllocatedSize(). + if (policy.size_returning()) { + stack_trace.allocated_size = requested_size; + } + capacity = requested_size; + } else if ((span = state.page_allocator().New( + num_pages, {1, AccessDensityPrediction::kSparse}, + MemoryTag::kSampled)) == nullptr) { + capacity = stack_trace.allocated_size; + return {obj, capacity}; + } else { + capacity = stack_trace.allocated_size; + } + + size_t span_size = + Length(state.sizemap().class_to_pages(size_class)).in_bytes(); + size_t objects_per_span = span_size / stack_trace.allocated_size; + + if (objects_per_span != 1) { + TC_ASSERT_GT(objects_per_span, 1); + stack_trace.proxy = obj; + obj = nullptr; + } + } else { + // Set stack_trace.allocated_size to the exact size for a page allocation. + // NOTE: if we introduce gwp-asan sampling / guarded allocations + // for page allocations, then we need to revisit do_malloc_pages as + // the current assumption is that only class sized allocs are sampled + // for gwp-asan. + stack_trace.allocated_size = span->bytes_in_span(); + stack_trace.cold_allocated = + GetMemoryTag(span->start_address()) == MemoryTag::kCold; + capacity = stack_trace.allocated_size; + } + + // A span must be provided or created by this point. + TC_ASSERT_NE(span, nullptr); + + stack_trace.sampled_alloc_handle = + state.sampled_alloc_handle_generator.fetch_add( + 1, std::memory_order_relaxed) + + 1; + stack_trace.span_start_address = span->start_address(); + stack_trace.allocation_time = absl::Now(); + stack_trace.guarded_status = alloc_with_status.status; + stack_trace.allocation_type = policy.allocation_type(); + stack_trace.user_data = SampleUserDataSupport::UserData::Make(); + + // How many allocations does this sample represent, given the sampling + // frequency (weight) and its size. + const double allocation_estimate = + static_cast(weight) / (requested_size + 1); + + // Adjust our estimate of internal fragmentation. + TC_ASSERT_LE(requested_size, stack_trace.allocated_size); + if (requested_size < stack_trace.allocated_size) { + state.sampled_internal_fragmentation_.Add( + allocation_estimate * (stack_trace.allocated_size - requested_size)); + } + + state.allocation_samples.ReportMalloc(stack_trace); + + state.deallocation_samples.ReportMalloc(stack_trace); + + // The SampledAllocation object is visible to readers after this. Readers only + // care about its various metadata (e.g. stack trace, weight) to generate the + // heap profile, and won't need any information from Span::Sample() next. + SampledAllocation* sampled_allocation = + state.sampled_allocation_recorder().Register(std::move(stack_trace)); + // No pageheap_lock required. The span is freshly allocated and no one else + // can access it. It is visible after we return from this allocation path. + span->Sample(sampled_allocation); + + state.peak_heap_tracker().MaybeSaveSample(); + + if (obj != nullptr) { + // We are not maintaining precise statistics on malloc hit/miss rates at our + // cache tiers. We can deallocate into our ordinary cache. + TC_ASSERT_NE(size_class, 0); + FreeProxyObject(state, obj, size_class); + } + TC_ASSERT_EQ(state.pagemap().sizeclass(span->first_page()), 0); + return {(alloc_with_status.alloc != nullptr) ? alloc_with_status.alloc + : span->start_address(), + capacity}; +} + +void MaybeUnsampleAllocation(Static& state, void* ptr, + std::optional size, Span& span); + +template +static sized_ptr_t SampleLargeAllocation(Static& state, Policy policy, + size_t requested_size, size_t weight, + Span* span) { + return SampleifyAllocation(state, policy, requested_size, weight, 0, nullptr, + span); +} + +template +static sized_ptr_t SampleSmallAllocation(Static& state, Policy policy, + size_t requested_size, size_t weight, + size_t size_class, sized_ptr_t res) { + return SampleifyAllocation(state, policy, requested_size, weight, size_class, + res.p, nullptr); +} +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_ALLOCATION_SAMPLING_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.cc b/contrib/libs/tcmalloc/tcmalloc/arena.cc index 5ba1a65bf374..05d505805460 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena.cc +++ b/contrib/libs/tcmalloc/tcmalloc/arena.cc @@ -14,7 +14,16 @@ #include "tcmalloc/arena.h" +#include +#include +#include + +#include "absl/base/optimization.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/memory_tag.h" #include "tcmalloc/static_vars.h" #include "tcmalloc/system-alloc.h" @@ -22,50 +31,44 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -void* Arena::Alloc(size_t bytes, int alignment) { - ASSERT(alignment > 0); +void* Arena::Alloc(size_t bytes, std::align_val_t alignment) { + size_t align = static_cast(alignment); + TC_ASSERT_GT(align, 0); + + AllocationGuardSpinLockHolder l(&arena_lock_); + { // First we need to move up to the correct alignment. - const int misalignment = - reinterpret_cast(free_area_) % alignment; - const int alignment_bytes = - misalignment != 0 ? alignment - misalignment : 0; + const int misalignment = reinterpret_cast(free_area_) % align; + const int alignment_bytes = misalignment != 0 ? align - misalignment : 0; free_area_ += alignment_bytes; free_avail_ -= alignment_bytes; bytes_allocated_ += alignment_bytes; } char* result; + auto& system_allocator = tc_globals.system_allocator(); if (free_avail_ < bytes) { size_t ask = bytes > kAllocIncrement ? bytes : kAllocIncrement; - size_t actual_size; - // TODO(b/171081864): Arena allocations should be made relatively - // infrequently. Consider tagging this memory with sampled objects which - // are also infrequently allocated. - // - // In the meantime it is important that we use the current NUMA partition - // rather than always using a particular one because it's possible that any - // single partition we choose might only contain nodes that the process is - // unable to allocate from due to cgroup restrictions. - MemoryTag tag; - const auto& numa_topology = Static::numa_topology(); - if (numa_topology.numa_aware()) { - tag = NumaNormalTag(numa_topology.GetCurrentPartition()); - } else { - tag = MemoryTag::kNormal; - } - free_area_ = - reinterpret_cast(SystemAlloc(ask, &actual_size, kPageSize, tag)); + auto [ptr, actual_size] = + system_allocator.Allocate(ask, kPageSize, MemoryTag::kMetadata); + free_area_ = reinterpret_cast(ptr); if (ABSL_PREDICT_FALSE(free_area_ == nullptr)) { - Crash(kCrash, __FILE__, __LINE__, - "FATAL ERROR: Out of memory trying to allocate internal tcmalloc " - "data (bytes, object-size); is something preventing mmap from " - "succeeding (sandbox, VSS limitations)?", - kAllocIncrement, bytes); + TC_BUG( + "FATAL ERROR: Out of memory trying to allocate internal tcmalloc " + "data (bytes=%v, object-size=%v); is something preventing mmap from " + "succeeding (sandbox, VSS limitations)?", + kAllocIncrement, bytes); } - SystemBack(free_area_, actual_size); + system_allocator.Back(free_area_, actual_size); + + // We've discarded the previous free_area_, so any bytes that were + // unallocated are effectively inaccessible to future allocations. + bytes_unavailable_ += free_avail_; + blocks_++; + free_avail_ = actual_size; } - ASSERT(reinterpret_cast(free_area_) % alignment == 0); + TC_ASSERT_EQ(reinterpret_cast(free_area_) % align, 0); result = free_area_; free_area_ += bytes; free_avail_ -= bytes; diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.h b/contrib/libs/tcmalloc/tcmalloc/arena.h index 06552535405a..b70fefff213e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena.h +++ b/contrib/libs/tcmalloc/tcmalloc/arena.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,49 +19,100 @@ #include #include +#include + #include "absl/base/attributes.h" +#include "absl/base/optimization.h" #include "absl/base/thread_annotations.h" #include "tcmalloc/common.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +struct ArenaStats { + // The number of bytes allocated and in-use by calls to Alloc(). + size_t bytes_allocated; + // The number of bytes currently reserved for future calls to Alloc(). + size_t bytes_unallocated; + // The number of bytes lost and unavailable to calls to Alloc() due to + // inefficiencies in Arena. + size_t bytes_unavailable; + // The number of allocated bytes that have subsequently become non-resident, + // e.g. due to the slab being resized. Note that these bytes are disjoint from + // the ones counted in `bytes_allocated`. + size_t bytes_nonresident; + + // The number of blocks allocated by the Arena. + size_t blocks; +}; + // Arena allocation; designed for use by tcmalloc internal data structures like // spans, profiles, etc. Always expands. -class Arena { +// +// Thread-safe. +class ABSL_CACHELINE_ALIGNED Arena { public: - constexpr Arena() - : free_area_(nullptr), free_avail_(0), bytes_allocated_(0) {} - - // Return a properly aligned byte array of length "bytes". Crashes if - // allocation fails. Requires pageheap_lock is held. - ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc(size_t bytes, - int alignment = kAlignment) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - - // Returns the total number of bytes allocated from this arena. Requires - // pageheap_lock is held. - uint64_t bytes_allocated() const - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - return bytes_allocated_; + constexpr Arena() = default; + + // Returns a properly aligned byte array of length "bytes". Crashes if + // allocation fails. + ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc( + size_t bytes, std::align_val_t alignment = kAlignment); + + // Updates the stats for allocated and non-resident bytes. + void UpdateAllocatedAndNonresident(int64_t allocated, int64_t nonresident) { + AllocationGuardSpinLockHolder l(&arena_lock_); + TC_ASSERT_GE(static_cast(bytes_allocated_) + allocated, 0); + bytes_allocated_ += allocated; + TC_ASSERT_GE(static_cast(bytes_nonresident_) + nonresident, 0); + bytes_nonresident_ += nonresident; + } + + // Returns statistics about memory allocated and managed by this Arena. + ArenaStats stats() const { + AllocationGuardSpinLockHolder l(&arena_lock_); + + ArenaStats s; + s.bytes_allocated = bytes_allocated_; + s.bytes_unallocated = free_avail_; + s.bytes_unavailable = bytes_unavailable_; + s.bytes_nonresident = bytes_nonresident_; + s.blocks = blocks_; + return s; } private: // How much to allocate from system at a time static constexpr int kAllocIncrement = 128 << 10; + mutable absl::base_internal::SpinLock arena_lock_{ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; + // Free area from which to carve new objects - char* free_area_ ABSL_GUARDED_BY(pageheap_lock); - size_t free_avail_ ABSL_GUARDED_BY(pageheap_lock); + char* free_area_ ABSL_GUARDED_BY(arena_lock_) = nullptr; + size_t free_avail_ ABSL_GUARDED_BY(arena_lock_) = 0; // Total number of bytes allocated from this arena - uint64_t bytes_allocated_ ABSL_GUARDED_BY(pageheap_lock); + size_t bytes_allocated_ ABSL_GUARDED_BY(arena_lock_) = 0; + // The number of bytes that are unused and unavailable for future allocations + // because they are at the end of a discarded arena block. + size_t bytes_unavailable_ ABSL_GUARDED_BY(arena_lock_) = 0; + // The number of bytes on the arena that have been MADV_DONTNEEDed away. Note + // that these bytes are disjoint from the ones counted in `bytes_allocated`. + size_t bytes_nonresident_ ABSL_GUARDED_BY(arena_lock_) = 0; + // Total number of blocks/free areas managed by this Arena. + size_t blocks_ ABSL_GUARDED_BY(arena_lock_) = 0; Arena(const Arena&) = delete; Arena& operator=(const Arena&) = delete; }; +static_assert(sizeof(Arena) <= ABSL_CACHELINE_SIZE, + "Arena is unexpectedly large"); + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc index 2fb728cac9c9..43bfec995912 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc @@ -14,25 +14,113 @@ #include "tcmalloc/arena.h" -#include "gmock/gmock.h" +#include + +#include + #include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "tcmalloc/common.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { +std::align_val_t Align(int align) { + return static_cast(align); +} + TEST(Arena, AlignedAlloc) { Arena arena; - absl::base_internal::SpinLockHolder h(&pageheap_lock); - EXPECT_EQ(reinterpret_cast(arena.Alloc(64, 64)) % 64, 0); + EXPECT_EQ(reinterpret_cast(arena.Alloc(64, Align(64))) % 64, 0); EXPECT_EQ(reinterpret_cast(arena.Alloc(7)) % 8, 0); - EXPECT_EQ(reinterpret_cast(arena.Alloc(128, 64)) % 64, 0); + EXPECT_EQ(reinterpret_cast(arena.Alloc(128, Align(64))) % 64, 0); for (int alignment = 1; alignment < 100; ++alignment) { - EXPECT_EQ( - reinterpret_cast(arena.Alloc(7, alignment)) % alignment, 0); + EXPECT_EQ(reinterpret_cast(arena.Alloc(7, Align(alignment))) % + alignment, + 0); } } +TEST(Arena, Stats) { + Arena arena; + + ArenaStats stats = arena.stats(); + EXPECT_EQ(stats.bytes_allocated, 0); + EXPECT_EQ(stats.bytes_unallocated, 0); + EXPECT_EQ(stats.bytes_unavailable, 0); + EXPECT_EQ(stats.bytes_nonresident, 0); + EXPECT_EQ(stats.blocks, 0); + + // Trigger an allocation and grab new stats. + void* ptr = arena.Alloc(1, Align(1)); + ArenaStats stats_after_alloc = arena.stats(); + + EXPECT_NE(ptr, nullptr); + + EXPECT_EQ(stats_after_alloc.bytes_allocated, 1); + EXPECT_GE(stats_after_alloc.bytes_unallocated, 0); + EXPECT_EQ(stats_after_alloc.bytes_unavailable, 0); + EXPECT_EQ(stats_after_alloc.bytes_nonresident, 0); + EXPECT_EQ(stats_after_alloc.blocks, 1); + + // Trigger an allocation that is larger than the remaining free bytes. + // + // TODO(b/201694482): Optimize this. + ptr = arena.Alloc(stats_after_alloc.bytes_unallocated + 1, Align(1)); + ArenaStats stats_after_alloc2 = arena.stats(); + EXPECT_NE(ptr, nullptr); + + EXPECT_EQ(stats_after_alloc2.bytes_allocated, + stats_after_alloc.bytes_unallocated + 2); + EXPECT_GE(stats_after_alloc2.bytes_unallocated, 0); + EXPECT_EQ(stats_after_alloc2.bytes_unavailable, + stats_after_alloc.bytes_unallocated); + EXPECT_EQ(stats_after_alloc.bytes_nonresident, 0); + EXPECT_EQ(stats_after_alloc2.blocks, 2); +} + +TEST(Arena, ReportUnmapped) { + Arena arena; + void* ptr = arena.Alloc(10, Align(1)); + ArenaStats stats_after_alloc = arena.stats(); + EXPECT_NE(ptr, nullptr); + + EXPECT_EQ(stats_after_alloc.bytes_allocated, 10); + EXPECT_EQ(stats_after_alloc.bytes_nonresident, 0); + + arena.UpdateAllocatedAndNonresident(-5, 5); + stats_after_alloc = arena.stats(); + + EXPECT_EQ(stats_after_alloc.bytes_allocated, 5); + EXPECT_EQ(stats_after_alloc.bytes_nonresident, 5); + + arena.UpdateAllocatedAndNonresident(3, -3); + stats_after_alloc = arena.stats(); + + EXPECT_EQ(stats_after_alloc.bytes_allocated, 8); + EXPECT_EQ(stats_after_alloc.bytes_nonresident, 2); +} + +TEST(Arena, BytesImpending) { + Arena arena; + + ArenaStats stats = arena.stats(); + EXPECT_EQ(stats.bytes_allocated, 0); + + arena.UpdateAllocatedAndNonresident(100, 0); + stats = arena.stats(); + + EXPECT_EQ(stats.bytes_allocated, 100); + + arena.UpdateAllocatedAndNonresident(-100, 0); + void* ptr = arena.Alloc(100, Align(1)); + stats = arena.stats(); + + EXPECT_NE(ptr, nullptr); + EXPECT_EQ(stats.bytes_allocated, 100); +} + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/background.cc b/contrib/libs/tcmalloc/tcmalloc/background.cc index ec57c039016e..762214cedd8f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/background.cc +++ b/contrib/libs/tcmalloc/tcmalloc/background.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +#include -#include "absl/base/internal/sysinfo.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "tcmalloc/cpu_cache.h" @@ -24,159 +24,146 @@ #include "tcmalloc/malloc_extension.h" #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -// Called by MallocExtension_Internal_ProcessBackgroundActions. -// -// We use a simple heuristic here: -// We keep track of the set of CPUs that we are allowed to run on. Whenever a -// CPU is removed from this list, the next call to this routine will detect the -// disappearance and call ReleaseCpuMemory on it. -// -// Note that this heuristic _explicitly_ does not reclaim from isolated cores -// that this process may have set up specific affinities for -- as this thread -// will never have been allowed to run there. -cpu_set_t prev_allowed_cpus; -void ReleasePerCpuMemoryToOS() { - cpu_set_t allowed_cpus; - - // Only attempt reclaim when per-CPU caches are in use. While - // ReleaseCpuMemory() itself is usually a no-op otherwise, we are experiencing - // failures in non-permissive sandboxes due to calls made to - // sched_getaffinity() below. It is expected that a runtime environment - // supporting per-CPU allocations supports sched_getaffinity(). - // See b/27247854. - if (!MallocExtension::PerCpuCachesActive()) { - return; - } - - if (subtle::percpu::UsingFlatVirtualCpus()) { - // Our (real) CPU mask does not provide useful information about the state - // of our virtual CPU set. - return; - } - - // This can only fail due to a sandbox or similar intercepting the syscall. - if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus)) { - // We log periodically as start-up errors are frequently ignored and this is - // something we do want clients to fix if they are experiencing it. - Log(kLog, __FILE__, __LINE__, - "Unexpected sched_getaffinity() failure; errno ", errno); - return; - } +// Release memory to the system at a constant rate. +void MallocExtension_Internal_ProcessBackgroundActions() { + using ::tcmalloc::tcmalloc_internal::Parameters; + using ::tcmalloc::tcmalloc_internal::tc_globals; - // Note: This is technically not correct in the presence of hotplug (it is - // not guaranteed that NumCPUs() is an upper bound on CPU-number). It is - // currently safe for Google systems. - const int num_cpus = absl::base_internal::NumCPUs(); - for (int cpu = 0; cpu < num_cpus; cpu++) { - if (CPU_ISSET(cpu, &prev_allowed_cpus) && !CPU_ISSET(cpu, &allowed_cpus)) { - // This is a CPU present in the old mask, but not the new. Reclaim. - MallocExtension::ReleaseCpuMemory(cpu); - } - } + tcmalloc::MallocExtension::MarkThreadIdle(); - // Update cached runnable CPUs for next iteration. - memcpy(&prev_allowed_cpus, &allowed_cpus, sizeof(cpu_set_t)); -} + absl::Time prev_time = absl::Now(); + absl::Time last_reclaim = prev_time; + absl::Time last_shuffle = prev_time; + absl::Time last_size_class_resize = prev_time; + absl::Time last_size_class_max_capacity_resize = prev_time; + absl::Time last_slab_resize_check = prev_time; + +#ifndef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + absl::Time last_transfer_cache_plunder_check = prev_time; + absl::Time last_transfer_cache_resize_check = prev_time; +#endif + + // We use a separate release rate smoother from the one used by + // ReleaseMemoryToSystem because a) we want to maintain a constant background + // release rate, regardless of whether the user is releasing memory; and b) we + // want to separately account for pages released by ProcessBackgroundActions. + tcmalloc::tcmalloc_internal::ConstantRatePageAllocatorReleaser releaser; + + while (tcmalloc::MallocExtension::GetBackgroundProcessActionsEnabled()) { + const absl::Duration sleep_time = + tcmalloc::MallocExtension::GetBackgroundProcessSleepInterval(); + + // Reclaim inactive per-cpu caches once per cpu_cache_shuffle_period. + // + // We use a longer 30 sleep cycle reclaim period to make sure that caches + // are indeed idle. Reclaim drains entire cache, as opposed to cache shuffle + // for instance that only shrinks a cache by a few objects at a time. So, we + // might have larger performance degradation if we use a shorter reclaim + // interval and drain caches that weren't supposed to. + const absl::Duration cpu_cache_reclaim_period = 30 * sleep_time; + + // Shuffle per-cpu caches once per cpu_cache_shuffle_period. + const absl::Duration cpu_cache_shuffle_period = 5 * sleep_time; + + const absl::Duration size_class_resize_period = 2 * sleep_time; + const absl::Duration size_class_max_capacity_resize_period = + 29 * sleep_time; + + // See if we should resize the slab once per cpu_cache_slab_resize_period. + // This period is coprime to cpu_cache_shuffle_period and + // cpu_cache_shuffle_period. + const absl::Duration cpu_cache_slab_resize_period = 29 * sleep_time; + +#ifndef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + // We reclaim unused objects from the transfer caches once per + // transfer_cache_plunder_period. + const absl::Duration transfer_cache_plunder_period = 5 * sleep_time; + // Resize transfer caches once per transfer_cache_resize_period. + const absl::Duration transfer_cache_resize_period = 2 * sleep_time; +#endif -void ShuffleCpuCaches() { - if (!MallocExtension::PerCpuCachesActive()) { - return; - } + absl::Time now = absl::Now(); - // Shuffle per-cpu caches - Static::cpu_cache().ShuffleCpuCaches(); -} + // We follow the cache hierarchy in TCMalloc from outermost (per-CPU) to + // innermost (the page heap). Freeing up objects at one layer can help aid + // memory coalescing for inner caches. -// Reclaims per-cpu caches. The CPU mask used in ReleasePerCpuMemoryToOS does -// not provide useful information about virtual CPU state and hence, does not -// reclaim memory when virtual CPUs are enabled. -// -// Here, we use heuristics that are based on cache usage and misses, to -// determine if the caches have been recently inactive and if they may be -// reclaimed. -void ReclaimIdleCpuCaches() { - // Attempts reclaim only when per-CPU caches are in use. - if (!MallocExtension::PerCpuCachesActive()) { - return; - } + if (tcmalloc::MallocExtension::PerCpuCachesActive()) { + // Accelerate fences as part of this operation by registering this thread + // with rseq. While this is not strictly required to succeed, we do not + // expect an inconsistent state for rseq (some threads registered and some + // threads unable to). + TC_CHECK(tcmalloc::tcmalloc_internal::subtle::percpu::IsFast()); - Static::cpu_cache().TryReclaimingCaches(); -} + // Try to reclaim per-cpu caches once every cpu_cache_reclaim_period + // when enabled. + if (now - last_reclaim >= cpu_cache_reclaim_period) { + tc_globals.cpu_cache().TryReclaimingCaches(); + last_reclaim = now; + } -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END + if (now - last_shuffle >= cpu_cache_shuffle_period) { + tc_globals.cpu_cache().ShuffleCpuCaches(); + last_shuffle = now; + } -// Release memory to the system at a constant rate. -void MallocExtension_Internal_ProcessBackgroundActions() { - tcmalloc::MallocExtension::MarkThreadIdle(); + if (now - last_size_class_resize >= size_class_resize_period) { + tc_globals.cpu_cache().ResizeSizeClasses(); + last_size_class_resize = now; + } - // Initialize storage for ReleasePerCpuMemoryToOS(). - CPU_ZERO(&tcmalloc::tcmalloc_internal::prev_allowed_cpus); + if (Parameters::resize_size_class_max_capacity() && + now - last_size_class_max_capacity_resize >= + size_class_max_capacity_resize_period) { + tc_globals.cpu_cache().ResizeSizeClassMaxCapacities(); + last_size_class_max_capacity_resize = now; + } - absl::Time prev_time = absl::Now(); - constexpr absl::Duration kSleepTime = absl::Seconds(1); - - // Reclaim inactive per-cpu caches once per kCpuCacheReclaimPeriod. - // - // We use a longer 30 sec reclaim period to make sure that caches are indeed - // idle. Reclaim drains entire cache, as opposed to cache shuffle for instance - // that only shrinks a cache by a few objects at a time. So, we might have - // larger performance degradation if we use a shorter reclaim interval and - // drain caches that weren't supposed to. - constexpr absl::Duration kCpuCacheReclaimPeriod = absl::Seconds(30); - absl::Time last_reclaim = absl::Now(); - - // Shuffle per-cpu caches once per kCpuCacheShufflePeriod secs. - constexpr absl::Duration kCpuCacheShufflePeriod = absl::Seconds(5); - absl::Time last_shuffle = absl::Now(); - - while (true) { - absl::Time now = absl::Now(); - const ssize_t bytes_to_release = - static_cast(tcmalloc::tcmalloc_internal::Parameters:: - background_release_rate()) * - absl::ToDoubleSeconds(now - prev_time); - if (bytes_to_release > 0) { // may be negative if time goes backwards - tcmalloc::MallocExtension::ReleaseMemoryToSystem(bytes_to_release); + // See if we need to grow the slab once every kCpuCacheSlabResizePeriod + // when enabled. + if (Parameters::per_cpu_caches_dynamic_slab_enabled() && + now - last_slab_resize_check >= cpu_cache_slab_resize_period) { + tc_globals.cpu_cache().ResizeSlabIfNeeded(); + last_slab_resize_check = now; + } } - const bool reclaim_idle_per_cpu_caches = - tcmalloc::tcmalloc_internal::Parameters::reclaim_idle_per_cpu_caches(); + tc_globals.sharded_transfer_cache().Plunder(); - // If enabled, we use heuristics to determine if the per-cpu caches are - // inactive. If disabled, we use a more conservative approach, that uses - // allowed cpu masks, to reclaim cpu caches. - if (reclaim_idle_per_cpu_caches) { - // Try to reclaim per-cpu caches once every kCpuCacheReclaimPeriod - // when enabled. - if (now - last_reclaim >= kCpuCacheReclaimPeriod) { - tcmalloc::tcmalloc_internal::ReclaimIdleCpuCaches(); - last_reclaim = now; - } - } else { - tcmalloc::tcmalloc_internal::ReleasePerCpuMemoryToOS(); +#ifndef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + // Try to plunder and reclaim unused objects from transfer caches. + if (now - last_transfer_cache_plunder_check >= + transfer_cache_plunder_period) { + tc_globals.transfer_cache().TryPlunder(); + last_transfer_cache_plunder_check = now; } - const bool shuffle_per_cpu_caches = - tcmalloc::tcmalloc_internal::Parameters::shuffle_per_cpu_caches(); + if (now - last_transfer_cache_resize_check >= + transfer_cache_resize_period) { + tc_globals.transfer_cache().TryResizingCaches(); + last_transfer_cache_resize_check = now; + } +#endif - if (shuffle_per_cpu_caches) { - if (now - last_shuffle >= kCpuCacheShufflePeriod) { - tcmalloc::tcmalloc_internal::ShuffleCpuCaches(); - last_shuffle = now; - } + // If time goes backwards, we would like to cap the release rate at 0. + ssize_t bytes_to_release = + static_cast(Parameters::background_release_rate()) * + absl::ToDoubleSeconds(now - prev_time); + bytes_to_release = std::max(bytes_to_release, 0); + + // If release rate is set to 0, do not release memory to system. However, if + // we want to release free and backed hugepages from HugeRegion, + // ReleaseMemoryToSystem should be able to release those pages to the + // system even with bytes_to_release = 0. + if (bytes_to_release > 0 || Parameters::release_pages_from_huge_region()) { + releaser.Release(bytes_to_release, + /*reason=*/tcmalloc::tcmalloc_internal:: + PageReleaseReason::kProcessBackgroundActions); } - tcmalloc::tcmalloc_internal::Static().sharded_transfer_cache().Plunder(); prev_time = now; - absl::SleepFor(kSleepTime); + absl::SleepFor(sleep_time); } } diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc index 8620e228a133..cfcf1353e799 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc @@ -14,205 +14,193 @@ #include "tcmalloc/central_freelist.h" -#include - -#include "tcmalloc/internal/linked_list.h" +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/types/span.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" -#include "tcmalloc/page_heap.h" +#include "tcmalloc/internal/prefetch.h" +#include "tcmalloc/page_allocator_interface.h" #include "tcmalloc/pagemap.h" #include "tcmalloc/pages.h" +#include "tcmalloc/selsan/selsan.h" +#include "tcmalloc/span.h" #include "tcmalloc/static_vars.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +namespace central_freelist_internal { -static MemoryTag MemoryTagFromSizeClass(size_t cl) { - if (!Static::numa_topology().numa_aware()) { +static MemoryTag MemoryTagFromSizeClass(size_t size_class) { + if (IsExpandedSizeClass(size_class)) { + return MemoryTag::kCold; + } + if (selsan::IsEnabled()) { + return MemoryTag::kSelSan; + } + if (!tc_globals.numa_topology().numa_aware()) { return MemoryTag::kNormal; } - return NumaNormalTag(cl / kNumBaseClasses); + return NumaNormalTag(size_class / kNumBaseClasses); } -// Like a constructor and hence we disable thread safety analysis. -void CentralFreeList::Init(size_t cl) ABSL_NO_THREAD_SAFETY_ANALYSIS { - size_class_ = cl; - object_size_ = Static::sizemap().class_to_size(cl); - pages_per_span_ = Length(Static::sizemap().class_to_pages(cl)); - objects_per_span_ = - pages_per_span_.in_bytes() / (object_size_ ? object_size_ : 1); +static AccessDensityPrediction AccessDensity(int objects_per_span) { + // Use number of objects per span as a proxy for estimating access density of + // the span. If number of objects per span is higher than + // kFewObjectsAllocMaxLimit threshold, we assume that the span would be + // long-lived. + return objects_per_span > kFewObjectsAllocMaxLimit + ? AccessDensityPrediction::kDense + : AccessDensityPrediction::kSparse; } -static Span* MapObjectToSpan(void* object) { - const PageId p = PageIdContaining(object); - Span* span = Static::pagemap().GetExistingDescriptor(p); - return span; +size_t StaticForwarder::class_to_size(int size_class) { + return tc_globals.sizemap().class_to_size(size_class); } -Span* CentralFreeList::ReleaseToSpans(void* object, Span* span, - size_t object_size) { - if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) { - nonempty_.prepend(span); - } - - if (ABSL_PREDICT_TRUE(span->FreelistPush(object, object_size))) { - return nullptr; - } - span->RemoveFromList(); // from nonempty_ - return span; +Length StaticForwarder::class_to_pages(int size_class) { + return Length(tc_globals.sizemap().class_to_pages(size_class)); } -void CentralFreeList::InsertRange(absl::Span batch) { - CHECK_CONDITION(!batch.empty() && batch.size() <= kMaxObjectsToMove); - Span* spans[kMaxObjectsToMove]; - // Safe to store free spans into freed up space in span array. - Span** free_spans = spans; - int free_count = 0; +ABSL_ATTRIBUTE_NOINLINE +static void ReportMismatchedSizeClass(void* object, int page_size_class, + int object_size_class) { + auto [object_min_size, object_max_size] = + tc_globals.sizemap().class_to_size_range(object_size_class); + auto [page_min_size, page_max_size] = + tc_globals.sizemap().class_to_size_range(page_size_class); + + TC_LOG("*** GWP-ASan (https://google.github.io/tcmalloc/gwp-asan.html) has detected a memory error ***"); + TC_LOG( + "Mismatched-size-class " + "(https://github.com/google/tcmalloc/tree/master/docs/mismatched-sized-delete.md) " + "discovered for pointer %p: this pointer was recently freed " + "with a size argument in the range [%v, %v], but the " + "associated span of allocated memory is for allocations with sizes " + "[%v, %v]. This is not a bug in tcmalloc, but rather is indicative " + "of an application bug such as buffer overrun/underrun, use-after-free " + "or double-free.", + object, object_min_size, object_max_size, page_min_size, page_max_size); + TC_LOG( + "NOTE: The blamed stack trace that is about to crash is not likely the " + "root cause of the issue. We are detecting the invalid deletion at a " + "later point in time and different code location."); + RecordCrash("GWP-ASan", "mismatched-size-class"); + + tc_globals.mismatched_delete_state().Record(object_min_size, object_max_size, + page_min_size, page_max_size, + std::nullopt, std::nullopt); + abort(); +} +void StaticForwarder::MapObjectsToSpans(absl::Span batch, Span** spans, + int expected_size_class) { // Prefetch Span objects to reduce cache misses. for (int i = 0; i < batch.size(); ++i) { - Span* span = MapObjectToSpan(batch[i]); - ASSERT(span != nullptr); + const PageId p = PageIdContaining(batch[i]); + auto [span, page_size_class] = + tc_globals.pagemap().GetExistingDescriptorAndSizeClass(p); + TC_ASSERT_NE(span, nullptr); + if (ABSL_PREDICT_FALSE(page_size_class != expected_size_class)) { + ReportMismatchedSizeClass(span, page_size_class, expected_size_class); + } span->Prefetch(); spans[i] = span; } +} - // First, release all individual objects into spans under our mutex - // and collect spans that become completely free. - { - // Use local copy of variable to ensure that it is not reloaded. - size_t object_size = object_size_; - absl::base_internal::SpinLockHolder h(&lock_); - for (int i = 0; i < batch.size(); ++i) { - Span* span = ReleaseToSpans(batch[i], spans[i], object_size); - if (ABSL_PREDICT_FALSE(span)) { - free_spans[free_count] = span; - free_count++; - } - } - - RecordMultiSpansDeallocated(free_count); - UpdateObjectCounts(batch.size()); +Span* StaticForwarder::AllocateSpan(int size_class, size_t objects_per_span, + Length pages_per_span) { + const MemoryTag tag = MemoryTagFromSizeClass(size_class); + const AccessDensityPrediction density = AccessDensity(objects_per_span); + + SpanAllocInfo span_alloc_info = {.objects_per_span = objects_per_span, + .density = density}; + TC_ASSERT(density == AccessDensityPrediction::kSparse || + (density == AccessDensityPrediction::kDense && + pages_per_span == Length(1))); + Span* span = + tc_globals.page_allocator().New(pages_per_span, span_alloc_info, tag); + if (ABSL_PREDICT_FALSE(span == nullptr)) { + return nullptr; } + TC_ASSERT_EQ(tag, GetMemoryTag(span->start_address())); + TC_ASSERT_EQ(span->num_pages(), pages_per_span); - // Then, release all free spans into page heap under its mutex. - if (ABSL_PREDICT_FALSE(free_count)) { - // Unregister size class doesn't require holding any locks. - for (int i = 0; i < free_count; ++i) { - Span* const free_span = free_spans[i]; - ASSERT(IsNormalMemory(free_span->start_address()) - ); - Static::pagemap().UnregisterSizeClass(free_span); - - // Before taking pageheap_lock, prefetch the PageTrackers these spans are - // on. - // - // Small-but-slow does not use the HugePageAwareAllocator (by default), so - // do not prefetch on this config. -#ifndef TCMALLOC_SMALL_BUT_SLOW - const PageId p = free_span->first_page(); - - // In huge_page_filler.h, we static_assert that PageTracker's key elements - // for deallocation are within the first two cachelines. - void* pt = Static::pagemap().GetHugepage(p); - // Prefetch for writing, as we will issue stores to the PageTracker - // instance. - __builtin_prefetch(pt, 1, 3); - __builtin_prefetch( - reinterpret_cast(reinterpret_cast(pt) + - ABSL_CACHELINE_SIZE), - 1, 3); -#endif // TCMALLOC_SMALL_BUT_SLOW - } - - const MemoryTag tag = MemoryTagFromSizeClass(size_class_); - absl::base_internal::SpinLockHolder h(&pageheap_lock); - for (int i = 0; i < free_count; ++i) { - Span* const free_span = free_spans[i]; - ASSERT(tag == GetMemoryTag(free_span->start_address())); - Static::page_allocator().Delete(free_span, tag); - } - } + tc_globals.pagemap().RegisterSizeClass(span, size_class); + return span; } -int CentralFreeList::RemoveRange(void** batch, int N) { - ASSUME(N > 0); - // Use local copy of variable to ensure that it is not reloaded. - size_t object_size = object_size_; - int result = 0; - absl::base_internal::SpinLockHolder h(&lock_); - if (ABSL_PREDICT_FALSE(nonempty_.empty())) { - result = Populate(batch, N); - } else { - do { - Span* span = nonempty_.first(); - int here = - span->FreelistPopBatch(batch + result, N - result, object_size); - ASSERT(here > 0); - if (span->FreelistEmpty(object_size)) { - span->RemoveFromList(); // from nonempty_ - } - result += here; - } while (result < N && !nonempty_.empty()); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING +static void ReturnSpansToPageHeap(MemoryTag tag, absl::Span free_spans, + size_t objects_per_span) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + PageHeapSpinLockHolder l; + for (Span* const free_span : free_spans) { + TC_ASSERT_EQ(tag, GetMemoryTag(free_span->start_address())); + tc_globals.page_allocator().Delete(free_span, tag); } - UpdateObjectCounts(-result); - return result; } - -// Fetch memory from the system and add to the central cache freelist. -int CentralFreeList::Populate(void** batch, - int N) ABSL_NO_THREAD_SAFETY_ANALYSIS { - // Release central list lock while operating on pageheap - // Note, this could result in multiple calls to populate each allocating - // a new span and the pushing those partially full spans onto nonempty. - lock_.Unlock(); - - const MemoryTag tag = MemoryTagFromSizeClass(size_class_); - Span* span = Static::page_allocator().New(pages_per_span_, tag); - if (ABSL_PREDICT_FALSE(span == nullptr)) { - Log(kLog, __FILE__, __LINE__, "tcmalloc: allocation failed", - pages_per_span_.in_bytes()); - lock_.Lock(); - return 0; - } - ASSERT(tag == GetMemoryTag(span->start_address())); - ASSERT(span->num_pages() == pages_per_span_); - - Static::pagemap().RegisterSizeClass(span, size_class_); - size_t objects_per_span = objects_per_span_; - int result = span->BuildFreelist(object_size_, objects_per_span, batch, N); - ASSERT(result > 0); - // This is a cheaper check than using FreelistEmpty(). - bool span_empty = result == objects_per_span; - - lock_.Lock(); - if (!span_empty) { - nonempty_.prepend(span); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + +static void ReturnAllocsToPageHeap( + MemoryTag tag, + absl::Span free_allocs) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + PageHeapSpinLockHolder l; + for (const auto& alloc : free_allocs) { + tc_globals.page_allocator().Delete(alloc, tag); } - RecordSpanAllocated(); - return result; } -size_t CentralFreeList::OverheadBytes() const { - if (ABSL_PREDICT_FALSE(object_size_ == 0)) { - return 0; +void StaticForwarder::DeallocateSpans(size_t objects_per_span, + absl::Span free_spans) { + TC_ASSERT_NE(free_spans.size(), 0); + const MemoryTag tag = GetMemoryTag(free_spans[0]->start_address()); + // Unregister size class doesn't require holding any locks. + for (Span* const free_span : free_spans) { + TC_ASSERT_EQ(GetMemoryTag(free_span->start_address()), tag); + TC_ASSERT_NE(GetMemoryTag(free_span->start_address()), MemoryTag::kSampled); + tc_globals.pagemap().UnregisterSizeClass(free_span); + + // Before taking pageheap_lock, prefetch the PageTrackers these spans are + // on. + const PageId p = free_span->first_page(); + + // In huge_page_filler.h, we static_assert that PageTracker's key elements + // for deallocation are within the first two cachelines. + void* pt = tc_globals.pagemap().GetHugepage(p); + // Prefetch for writing, as we will issue stores to the PageTracker + // instance. + PrefetchW(pt); + PrefetchW(reinterpret_cast(reinterpret_cast(pt) + + ABSL_CACHELINE_SIZE)); } - const size_t overhead_per_span = pages_per_span_.in_bytes() % object_size_; - return num_spans() * overhead_per_span; -} -SpanStats CentralFreeList::GetSpanStats() const { - SpanStats stats; - if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) { - return stats; +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + ReturnSpansToPageHeap(tag, free_spans, objects_per_span); +#else + PageAllocatorInterface::AllocationState allocs[kMaxObjectsToMove]; + for (int i = 0, n = free_spans.size(); i < n; ++i) { + Span* s = free_spans[i]; + TC_ASSERT_EQ(tag, GetMemoryTag(s->start_address())); + allocs[i].r = Range(s->first_page(), s->num_pages()); + allocs[i].donated = s->donated(); + Span::Delete(s); } - stats.num_spans_requested = static_cast(num_spans_requested_.value()); - stats.num_spans_returned = static_cast(num_spans_returned_.value()); - stats.obj_capacity = stats.num_live_spans() * objects_per_span_; - return stats; + ReturnAllocsToPageHeap(tag, absl::MakeSpan(allocs, free_spans.size())); +#endif } +} // namespace central_freelist_internal } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h index 266f184d6b37..0cd456f8a2f9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,14 +18,27 @@ #include +#include #include +#include #include "absl/base/attributes.h" #include "absl/base/const_init.h" +#include "absl/base/internal/cycleclock.h" #include "absl/base/internal/spinlock.h" -#include "absl/base/macros.h" +#include "absl/base/optimization.h" #include "absl/base/thread_annotations.h" +#include "absl/numeric/bits.h" +#include "absl/types/span.h" +#include "tcmalloc/common.h" +#include "tcmalloc/hinted_tracker_lists.h" #include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/selsan/selsan.h" #include "tcmalloc/span.h" #include "tcmalloc/span_stats.h" @@ -32,21 +46,66 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +namespace central_freelist_internal { + +// StaticForwarder provides access to the PageMap and page heap. +// +// This is a class, rather than namespaced globals, so that it can be mocked for +// testing. +class StaticForwarder { + public: + static uint32_t max_span_cache_size() { + return Parameters::max_span_cache_size(); + } + static uint32_t max_span_cache_array_size() { + return Parameters::max_span_cache_array_size(); + } + static uint64_t clock_now() { return absl::base_internal::CycleClock::Now(); } + static double clock_frequency() { + return absl::base_internal::CycleClock::Frequency(); + } + + static size_t class_to_size(int size_class); + static Length class_to_pages(int size_class); + static void MapObjectsToSpans(absl::Span batch, Span** spans, + int expected_size_class); + [[nodiscard]] static Span* AllocateSpan(int size_class, + size_t objects_per_span, + Length pages_per_span) + ABSL_LOCKS_EXCLUDED(pageheap_lock); + static void DeallocateSpans(size_t objects_per_span, + absl::Span free_spans) + ABSL_LOCKS_EXCLUDED(pageheap_lock); +}; + +// Specifies number of nonempty_ lists that keep track of non-empty spans. +static constexpr size_t kNumLists = 8; + +// Specifies the threshold for number of objects per span. The threshold is +// used to consider a span sparsely- vs. densely-accessed. +static constexpr size_t kFewObjectsAllocMaxLimit = 16; + // Data kept per size-class in central cache. +template class CentralFreeList { public: + using Forwarder = ForwarderT; + constexpr CentralFreeList() : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY), size_class_(0), object_size_(0), objects_per_span_(0), + first_nonempty_index_(0), pages_per_span_(0), - nonempty_() {} + nonempty_(), + use_all_buckets_for_few_object_spans_(false), + lifetime_bucket_bounds_() {} CentralFreeList(const CentralFreeList&) = delete; CentralFreeList& operator=(const CentralFreeList&) = delete; - void Init(size_t cl) ABSL_LOCKS_EXCLUDED(lock_); + void Init(size_t size_class) ABSL_LOCKS_EXCLUDED(lock_); // These methods all do internal locking. @@ -56,7 +115,7 @@ class CentralFreeList { // Fill a prefix of batch[0..N-1] with up to N elements removed from central // freelist. Return the number of elements removed. - ABSL_MUST_USE_RESULT int RemoveRange(void** batch, int N) + [[nodiscard]] int RemoveRange(absl::Span batch) ABSL_LOCKS_EXCLUDED(lock_); // Returns the number of free objects in cache. @@ -68,6 +127,9 @@ class CentralFreeList { // page full of 5-byte objects would have 2 bytes memory overhead). size_t OverheadBytes() const; + // Returns number of live spans currently in the nonempty_[n] list. + // REQUIRES: n >= 0 && n < kNumLists. + size_t NumSpansInList(int n) ABSL_LOCKS_EXCLUDED(lock_); SpanStats GetSpanStats() const; void AcquireInternalLocks() { @@ -78,17 +140,69 @@ class CentralFreeList { lock_.Unlock(); } + // Reports span utilization and lifetime histogram stats. + void PrintSpanUtilStats(Printer& out); + void PrintSpanLifetimeStats(Printer& out); + void PrintSpanUtilStatsInPbtxt(PbtxtRegion& region); + void PrintSpanLifetimeStatsInPbtxt(PbtxtRegion& region); + + // Get number of spans in the histogram bucket. We record spans in the + // histogram indexed by absl::bit_width(allocated). So, instead of using the + // absolute number of allocated objects, it uses absl::bit_width(allocated), + // passed as , to index and return the number of spans in the + // histogram. + size_t NumSpansWith(uint16_t bitwidth) const; + + Forwarder& forwarder() { return forwarder_; } + private: // Release an object to spans. // Returns object's span if it become completely free. - Span* ReleaseToSpans(void* object, Span* span, size_t object_size) + Span* ReleaseToSpans(void* object, Span* span, size_t object_size, + uint32_t size_reciprocal, uint32_t max_span_cache_size) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); // Populate cache by fetching from the page heap. // May temporarily release lock_. // Fill a prefix of batch[0..N-1] with up to N elements removed from central // freelist. Returns the number of elements removed. - int Populate(void** batch, int N) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); + int Populate(absl::Span batch) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // Allocate a span from the forwarder. + Span* AllocateSpan(); + + // Deallocate spans to the forwarder. + void DeallocateSpans(absl::Span spans); + + // Parses nonempty_ lists and returns span from the list with the lowest + // possible index. + // Returns the span if one exists in the nonempty_ lists. Else, returns + // nullptr. + Span* FirstNonEmptySpan() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // Returns first index to the nonempty_ lists that may record spans. + uint8_t GetFirstNonEmptyIndex() const; + + // Returns index into nonempty_ based on the number of allocated objects for + // the span. Depending on the number of objects per span, either the absolute + // number of allocated objects or the absl::bit_width(allocated), passed as + // bitwidth, is used to to calculate the list index. + uint8_t IndexFor(uint16_t allocated, uint8_t bitwidth); + + // Records span utilization in objects_to_span_ map. Instead of using the + // absolute number of allocated objects, it uses absl::bit_width(allocated), + // passed as , to index this map. + // + // If increase is set to true, includes the span by incrementing the count + // in the map. Otherwise, removes the span by decrementing the count in + // the map. + void RecordSpanUtil(uint8_t bitwidth, bool increase) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSUME(bitwidth > 0); + // Updates to objects_to_span_ are guarded by lock_, so writes may be + // performed using LossyAdd. + objects_to_spans_[bitwidth - 1].LossyAdd(increase ? 1 : -1); + } // This lock protects all the mutable data members. absl::base_internal::SpinLock lock_; @@ -96,6 +210,13 @@ class CentralFreeList { size_t size_class_; // My size class (immutable after Init()) size_t object_size_; size_t objects_per_span_; + // Size reciprocal is used to replace division with multiplication when + // computing object indices in the Span bitmap. + uint32_t size_reciprocal_ = 0; + // Hint used for parsing through the nonempty_ lists. This prevents us from + // parsing the lists with an index starting zero, if the lowest possible index + // is higher than that. + size_t first_nonempty_index_; Length pages_per_span_; size_t num_spans() const { @@ -120,6 +241,18 @@ class CentralFreeList { counter_.LossyAdd(num); } + static constexpr size_t kLifetimeBuckets = 8; + using LifetimeHistogram = size_t[kLifetimeBuckets]; + + int LifetimeBucketNum(absl::Duration duration) { + int64_t duration_ms = absl::ToInt64Milliseconds(duration); + auto it = std::upper_bound(lifetime_bucket_bounds_, + lifetime_bucket_bounds_ + kLifetimeBuckets, + duration_ms); + TC_CHECK_NE(it, lifetime_bucket_bounds_); + return it - lifetime_bucket_bounds_ - 1; + } + // The followings are kept as a StatsCounter so that they can read without // acquiring a lock. Updates to these variables are guarded by lock_ // so writes are performed using LossyAdd for speed, the lock still @@ -131,10 +264,508 @@ class CentralFreeList { StatsCounter num_spans_requested_; StatsCounter num_spans_returned_; - // Dummy header for non-empty spans - SpanList nonempty_ ABSL_GUARDED_BY(lock_); + // Records histogram of span utilization. + // + // Each bucket in the histogram records number of live spans with + // corresponding number of allocated objects. Instead of using the absolute + // value of number of allocated objects, we use absl::bit_width(allocated) to + // index this map. A bucket in the histogram corresponds to power-of-two + // number of objects. That is, bucket N tracks number of spans with allocated + // objects < 2^(N+1). For instance, objects_to_spans_ map tracks number of + // spans with allocated objects in the range [a,b), indexed as: [1,2) in + // objects_to_spans_[0], [2,4) in objects_to_spans_[1], [4, 8) in + // objects_to_spans_[2] and so on. We can query the objects_to_spans_ map + // using NumSpansWith(bitwidth) to obtain the number of spans associated + // with the corresponding bucket in the histogram. + // + // As the actual value of objects_per_span_ is not known at compile time, we + // use maximum value that it can be to initialize this hashmap, and + // kSpanUtilBucketCapacity determines this value. We also check during Init + // that absl::bit_width(objects_per_span_) is indeed less than or equal to + // kSpanUtilBucketCapacity. + // + // We disable collection of histogram stats for TCMalloc small-but-slow due to + // performance issues. See b/227362263. + static constexpr size_t kSpanUtilBucketCapacity = 16; + StatsCounter objects_to_spans_[kSpanUtilBucketCapacity]; + + // Non-empty lists that distinguish spans based on the number of objects + // allocated from them. As we prioritize spans, spans may be added to any of + // the kNumLists nonempty_ lists based on their allocated objects. If span + // prioritization is disabled, we add spans to the nonempty_[kNumlists-1] + // list, leaving other lists unused. + HintedTrackerLists nonempty_ ABSL_GUARDED_BY(lock_); + bool use_all_buckets_for_few_object_spans_; + + size_t lifetime_bucket_bounds_[kLifetimeBuckets]; + + ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Forwarder forwarder_; }; +// Like a constructor and hence we disable thread safety analysis. +template +inline void CentralFreeList::Init(size_t size_class) + ABSL_NO_THREAD_SAFETY_ANALYSIS { + size_class_ = size_class; + object_size_ = forwarder_.class_to_size(size_class); + if (object_size_ == 0) { + return; + } + if (selsan::IsEnabled()) { + object_size_ = selsan::RoundUpObjectSize(object_size_); + } + pages_per_span_ = forwarder_.class_to_pages(size_class); + objects_per_span_ = + pages_per_span_.in_bytes() / (object_size_ ? object_size_ : 1); + size_reciprocal_ = Span::CalcReciprocal(object_size_); + use_all_buckets_for_few_object_spans_ = objects_per_span_ <= 2 * kNumLists; + + // Records nonempty_ list index associated with the span with + // objects_per_span_ number of allocated objects. Refer to the comment in + // IndexFor(...) below for a detailed description. + first_nonempty_index_ = + use_all_buckets_for_few_object_spans_ + ? (kNumLists + 1 >= objects_per_span_ + ? kNumLists + 1 - objects_per_span_ + : 0) + : kNumLists - + std::min(absl::bit_width(objects_per_span_), kNumLists); + + TC_ASSERT(absl::bit_width(objects_per_span_) <= kSpanUtilBucketCapacity); + + lifetime_bucket_bounds_[0] = 0; + lifetime_bucket_bounds_[1] = 1; + for (int i = 2; i < kLifetimeBuckets; ++i) { + lifetime_bucket_bounds_[i] = lifetime_bucket_bounds_[i - 1] * 10; + } +} + +template +inline Span* CentralFreeList::ReleaseToSpans( + void* object, Span* span, size_t object_size, uint32_t size_reciprocal, + uint32_t max_span_cache_size) { + if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) { + const uint8_t index = GetFirstNonEmptyIndex(); + nonempty_.Add(span, index); + span->set_nonempty_index(index); + } + + const uint8_t prev_index = span->nonempty_index(); + const uint16_t prev_allocated = span->Allocated(); + const uint8_t prev_bitwidth = absl::bit_width(prev_allocated); + if (ABSL_PREDICT_FALSE(!span->FreelistPush( + object, object_size, size_reciprocal, max_span_cache_size))) { + // Update the histogram as the span is full and will be removed from the + // nonempty_ list. + RecordSpanUtil(prev_bitwidth, /*increase=*/false); + nonempty_.Remove(span, prev_index); + return span; + } + // As the objects are being added to the span, its utilization might change. + // We remove the stale utilization from the histogram and add the new + // utilization to the histogram after we release objects to the span. + uint16_t cur_allocated = prev_allocated - 1; + TC_ASSERT_EQ(cur_allocated, span->Allocated()); + const uint8_t cur_bitwidth = absl::bit_width(cur_allocated); + if (cur_bitwidth != prev_bitwidth) { + RecordSpanUtil(prev_bitwidth, /*increase=*/false); + RecordSpanUtil(cur_bitwidth, /*increase=*/true); + } + // If span allocation changes so that it moved to a different nonempty_ list, + // we remove it from the previous list and add it to the desired list indexed + // by cur_index. + const uint8_t cur_index = IndexFor(cur_allocated, cur_bitwidth); + if (cur_index != prev_index) { + nonempty_.Remove(span, prev_index); + nonempty_.Add(span, cur_index); + span->set_nonempty_index(cur_index); + } + return nullptr; +} + +template +inline Span* CentralFreeList::FirstNonEmptySpan() { + // Scan nonempty_ lists in the range [first_nonempty_index_, kNumLists) and + // return the span from a non-empty list if one exists. If all the lists are + // empty, return nullptr. + return nonempty_.PeekLeast(GetFirstNonEmptyIndex()); +} + +template +inline uint8_t CentralFreeList::GetFirstNonEmptyIndex() const { + return first_nonempty_index_; +} + +template +inline uint8_t CentralFreeList::IndexFor(uint16_t allocated, + uint8_t bitwidth) { + // We would like to index into the nonempty_ list based on the number of + // allocated objects from the span. Given a span with fewer allocated objects + // (i.e. when it is more likely to be freed), we would like to map it to a + // higher index in the nonempty_ list. + // + // The number of objects per span is less than or equal to 2 * kNumlists. + // We index such spans by just the number of allocated objects. When the + // allocated objects are in the range [1, 8], then we map the spans to buckets + // 7, 6, ... 0 respectively. When the allocated objects are more than + // kNumlists, then we map the span to bucket 0. + ASSUME(allocated > 0); + if (use_all_buckets_for_few_object_spans_) { + if (allocated <= kNumLists) { + return kNumLists - allocated; + } + return 0; + } + // Depending on the number of kNumLists and the number of objects per span, we + // may have to clamp multiple buckets in index 0. It should be ok to do that + // because it is less beneficial to differentiate between spans that have 128 + // vs 256 allocated objects, compared to those that have 16 vs 32 allocated + // objects. + // + // Consider objects_per_span = 1024 and kNumLists = 8. The following examples + // show spans with allocated objects in the range [a, b) indexed to the + // nonempty_[idx] list using a notation [a, b) -> idx. + // [1, 2) -> 7, [2, 4) -> 6, [4, 8) -> 5, [8, 16) -> 4, [16, 32) -> 3, [32, + // 64) -> 2, [64, 128) -> 1, [128, 1024) -> 0. + ASSUME(bitwidth > 0); + const uint8_t offset = std::min(bitwidth, kNumLists); + const uint8_t index = kNumLists - offset; + ASSUME(index < kNumLists); + return index; +} + +template +inline size_t CentralFreeList::NumSpansInList(int n) { + ASSUME(n >= 0); + ASSUME(n < kNumLists); + AllocationGuardSpinLockHolder h(&lock_); + return nonempty_.SizeOfList(n); +} + +template +inline void CentralFreeList::InsertRange(absl::Span batch) { + TC_CHECK(!batch.empty()); + TC_CHECK_LE(batch.size(), kMaxObjectsToMove); + + if (selsan::IsEnabled()) { + for (auto& ptr : batch) { + ptr = selsan::ResetTag(ptr, object_size_); + } + } + + Span* spans[kMaxObjectsToMove]; + // First, map objects to spans and prefetch spans outside of our mutex + // (to reduce critical section size and cache misses). + forwarder_.MapObjectsToSpans(batch, spans, size_class_); + + if (objects_per_span_ == 1) { + // If there is only 1 object per span, skip CentralFreeList entirely. + DeallocateSpans({spans, batch.size()}); + return; + } + + // Safe to store free spans into freed up space in span array. + const uint32_t max_span_cache_size = forwarder_.max_span_cache_size(); + Span** free_spans = spans; + int free_count = 0; + + // Then, release all individual objects into spans under our mutex + // and collect spans that become completely free. + { + // Use local copy of variables to ensure that they are not reloaded. + size_t object_size = object_size_; + uint32_t size_reciprocal = size_reciprocal_; + absl::base_internal::SpinLockHolder h(&lock_); + for (int i = 0; i < batch.size(); ++i) { + Span* span = ReleaseToSpans(batch[i], spans[i], object_size, + size_reciprocal, max_span_cache_size); + if (ABSL_PREDICT_FALSE(span)) { + free_spans[free_count] = span; + free_count++; + } + } + + RecordMultiSpansDeallocated(free_count); + UpdateObjectCounts(batch.size()); + } + + // Then, release all free spans into page heap under its mutex. + if (ABSL_PREDICT_FALSE(free_count)) { + DeallocateSpans(absl::MakeSpan(free_spans, free_count)); + } +} + +template +void CentralFreeList::DeallocateSpans(absl::Span spans) { + if (ABSL_PREDICT_TRUE(!selsan::IsEnabled())) { + return forwarder_.DeallocateSpans(objects_per_span_, spans); + } + Span* selsan_spans[kMaxObjectsToMove]; + size_t selsan_count = 0; + size_t normal_count = 0; + for (Span* span : spans) { + if (IsSelSanMemory(span->start_address())) { + selsan_spans[selsan_count++] = span; + } else { + spans[normal_count++] = span; + } + } + + if (normal_count) { + forwarder_.DeallocateSpans(objects_per_span_, {spans.data(), normal_count}); + } + if (selsan_count) { + forwarder_.DeallocateSpans(objects_per_span_, {selsan_spans, selsan_count}); + } +} + +template +inline int CentralFreeList::RemoveRange(absl::Span batch) { + TC_ASSERT(!batch.empty()); + + if (objects_per_span_ == 1) { + // If there is only 1 object per span, skip CentralFreeList entirely. + Span* span = AllocateSpan(); + if (ABSL_PREDICT_FALSE(span == nullptr)) { + return 0; + } + batch[0] = span->start_address(); + return 1; + } + + // Use local copy of variable to ensure that it is not reloaded. + size_t object_size = object_size_; + int result = 0; + absl::base_internal::SpinLockHolder h(&lock_); + + do { + Span* span = FirstNonEmptySpan(); + if (ABSL_PREDICT_FALSE(!span)) { + result += Populate(batch.subspan(result)); + break; + } + + const uint16_t prev_allocated = span->Allocated(); + const uint8_t prev_bitwidth = absl::bit_width(prev_allocated); + const uint8_t prev_index = span->nonempty_index(); + int here = span->FreelistPopBatch(batch.subspan(result), object_size); + TC_ASSERT_GT(here, 0); + // As the objects are being popped from the span, its utilization might + // change. So, we remove the stale utilization from the histogram here and + // add it again once we pop the objects. + const uint16_t cur_allocated = prev_allocated + here; + TC_ASSERT_EQ(cur_allocated, span->Allocated()); + const uint8_t cur_bitwidth = absl::bit_width(cur_allocated); + if (cur_bitwidth != prev_bitwidth) { + RecordSpanUtil(prev_bitwidth, /*increase=*/false); + RecordSpanUtil(cur_bitwidth, /*increase=*/true); + } + if (span->FreelistEmpty(object_size)) { + nonempty_.Remove(span, prev_index); + } else { + // If span allocation changes so that it must be moved to a different + // nonempty_ list, we remove it from the previous list and add it to the + // desired list indexed by cur_index. + const uint8_t cur_index = IndexFor(cur_allocated, cur_bitwidth); + if (cur_index != prev_index) { + nonempty_.Remove(span, prev_index); + nonempty_.Add(span, cur_index); + span->set_nonempty_index(cur_index); + } + } + result += here; + } while (result < batch.size()); + UpdateObjectCounts(-result); + return result; +} + +// Fetch memory from the system and add to the central cache freelist. +template +inline int CentralFreeList::Populate(absl::Span batch) + ABSL_NO_THREAD_SAFETY_ANALYSIS { + // Release central list lock while operating on pageheap + // Note, this could result in multiple calls to populate each allocating + // a new span and the pushing those partially full spans onto nonempty. + lock_.Unlock(); + + Span* span = AllocateSpan(); + if (ABSL_PREDICT_FALSE(span == nullptr)) { + return 0; + } + + const uint64_t alloc_time = forwarder_.clock_now(); + int result = + span->BuildFreelist(object_size_, objects_per_span_, batch, + forwarder_.max_span_cache_size(), alloc_time); + TC_ASSERT_GT(result, 0); + // This is a cheaper check than using FreelistEmpty(). + bool span_empty = result == objects_per_span_; + + lock_.Lock(); + + // Update the histogram once we populate the span. + const uint16_t allocated = result; + TC_ASSERT_EQ(allocated, span->Allocated()); + const uint8_t bitwidth = absl::bit_width(allocated); + RecordSpanUtil(bitwidth, /*increase=*/true); + if (!span_empty) { + const uint8_t index = IndexFor(allocated, bitwidth); + nonempty_.Add(span, index); + span->set_nonempty_index(index); + } + RecordSpanAllocated(); + return result; +} + +template +Span* CentralFreeList::AllocateSpan() { + Span* span = + forwarder_.AllocateSpan(size_class_, objects_per_span_, pages_per_span_); + if (ABSL_PREDICT_FALSE(span == nullptr)) { + TC_LOG("tcmalloc: allocation failed %v", pages_per_span_); + } + return span; +} + +template +inline size_t CentralFreeList::OverheadBytes() const { + if (ABSL_PREDICT_FALSE(object_size_ == 0)) { + return 0; + } + const size_t overhead_per_span = pages_per_span_.in_bytes() % object_size_; + return num_spans() * overhead_per_span; +} + +template +inline SpanStats CentralFreeList::GetSpanStats() const { + SpanStats stats; + if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) { + return stats; + } + stats.num_spans_requested = static_cast(num_spans_requested_.value()); + stats.num_spans_returned = static_cast(num_spans_returned_.value()); + stats.obj_capacity = stats.num_live_spans() * objects_per_span_; + return stats; +} + +template +inline size_t CentralFreeList::NumSpansWith( + uint16_t bitwidth) const { + TC_ASSERT_GT(bitwidth, 0); + const int bucket = bitwidth - 1; + return objects_to_spans_[bucket].value(); +} + +template +inline void CentralFreeList::PrintSpanUtilStats(Printer& out) { + out.printf("class %3d [ %8zu bytes ] : ", size_class_, object_size_); + for (size_t i = 1; i <= kSpanUtilBucketCapacity; ++i) { + out.printf("%6zu < %zu", NumSpansWith(i), 1 << i); + if (i < kSpanUtilBucketCapacity) { + out.printf(","); + } + } + out.printf("\n"); + out.printf("class %3d [ %8zu bytes ] : ", size_class_, object_size_); + for (size_t i = 0; i < kNumLists; ++i) { + out.printf("%6zu: %zu", i, NumSpansInList(i)); + if (i < kNumLists - 1) { + out.printf(","); + } + } + out.printf("\n"); +} + +template +inline void CentralFreeList::PrintSpanLifetimeStats(Printer& out) { + // We do not log allocation time when bitmap is used for spans. + if (Span::UseBitmapForSize(object_size_)) return; + + uint64_t now = forwarder_.clock_now(); + double frequency = forwarder_.clock_frequency(); + LifetimeHistogram lifetime_histo{}; + + { + AllocationGuardSpinLockHolder h(&lock_); + nonempty_.Iter( + [&](const Span& s) GOOGLE_MALLOC_SECTION { + const double elapsed = std::max( + now - s.AllocTime(size_class_, forwarder_.max_span_cache_size()), + 0); + const absl::Duration lifetime = + absl::Milliseconds(elapsed * 1000 / frequency); + ++lifetime_histo[LifetimeBucketNum(lifetime)]; + }, + 0); + } + + out.printf("class %3d [ %8zu bytes ] : ", size_class_, object_size_); + for (size_t i = 0; i < kLifetimeBuckets; ++i) { + out.printf("%3zu ms < %6zu", lifetime_bucket_bounds_[i], lifetime_histo[i]); + if (i < kLifetimeBuckets - 1) { + out.printf(","); + } + } + out.printf("\n"); +} + +template +inline void CentralFreeList::PrintSpanUtilStatsInPbtxt( + PbtxtRegion& region) { + for (size_t i = 1; i <= kSpanUtilBucketCapacity; ++i) { + PbtxtRegion histogram = region.CreateSubRegion("span_util_histogram"); + histogram.PrintI64("lower_bound", 1 << (i - 1)); + histogram.PrintI64("upper_bound", 1 << i); + histogram.PrintI64("value", NumSpansWith(i)); + } + + for (size_t i = 0; i < kNumLists; ++i) { + PbtxtRegion occupancy = + region.CreateSubRegion("prioritization_list_occupancy"); + occupancy.PrintI64("list_index", i); + occupancy.PrintI64("value", NumSpansInList(i)); + } +} + +template +inline void CentralFreeList::PrintSpanLifetimeStatsInPbtxt( + PbtxtRegion& region) { + // We do not log allocation time when bitmap is used for spans. + if (Span::UseBitmapForSize(object_size_)) return; + + uint64_t now = forwarder_.clock_now(); + double frequency = forwarder_.clock_frequency(); + LifetimeHistogram lifetime_histo{}; + + { + AllocationGuardSpinLockHolder h(&lock_); + nonempty_.Iter( + [&](const Span& s) GOOGLE_MALLOC_SECTION { + const double elapsed = std::max( + now - s.AllocTime(size_class_, forwarder_.max_span_cache_size()), + 0); + const absl::Duration lifetime = + absl::Milliseconds(elapsed * 1000 / frequency); + ++lifetime_histo[LifetimeBucketNum(lifetime)]; + }, + 0); + } + + for (size_t i = 0; i < kLifetimeBuckets; ++i) { + PbtxtRegion histogram = region.CreateSubRegion("span_lifetime_histogram"); + histogram.PrintI64("lower_bound", lifetime_bucket_bounds_[i]); + histogram.PrintI64("upper_bound", (i == kLifetimeBuckets - 1 + ? lifetime_bucket_bounds_[i] + : lifetime_bucket_bounds_[i + 1])); + histogram.PrintI64("value", lifetime_histo[i]); + } +} + +} // namespace central_freelist_internal + +using CentralFreeList = central_freelist_internal::CentralFreeList< + central_freelist_internal::StaticForwarder>; + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc index a80d5807539e..56de549ebda1 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc @@ -19,9 +19,9 @@ #include "absl/algorithm/container.h" #include "absl/random/random.h" +#include "absl/types/span.h" #include "benchmark/benchmark.h" #include "tcmalloc/central_freelist.h" -#include "tcmalloc/common.h" #include "tcmalloc/static_vars.h" #include "tcmalloc/tcmalloc_policy.h" @@ -34,25 +34,26 @@ namespace { // to minimize the time it takes to free them. void BM_Populate(benchmark::State& state) { size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); + size_t size_class = tc_globals.sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = tc_globals.sizemap().num_objects_to_move(size_class); int num_objects = 64 * 1024 * 1024 / object_size; + const int num_batches = num_objects / batch_size; CentralFreeList cfl; // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); + cfl.Init(size_class); // Allocate an array large enough to hold 64 MiB of objects. std::vector buffer(num_objects); int64_t items_processed = 0; absl::BitGen rnd; - for (auto s : state) { + while (state.KeepRunningBatch(num_batches)) { int index = 0; // The cost of fetching objects will include the cost of fetching and // populating the span. while (index < num_objects) { int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); + int got = cfl.RemoveRange(absl::MakeSpan(buffer).subspan(index, count)); index += got; } @@ -83,23 +84,24 @@ BENCHMARK(BM_Populate) // them is usually done spread over many active spans. void BM_MixAndReturn(benchmark::State& state) { size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); + size_t size_class = tc_globals.sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = tc_globals.sizemap().num_objects_to_move(size_class); int num_objects = 64 * 1024 * 1024 / object_size; + const int num_batches = num_objects / batch_size; CentralFreeList cfl; // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); + cfl.Init(size_class); // Allocate an array large enough to hold 64 MiB of objects. std::vector buffer(num_objects); int64_t items_processed = 0; absl::BitGen rnd; - for (auto s : state) { + while (state.KeepRunningBatch(num_batches)) { int index = 0; while (index < num_objects) { int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); + int got = cfl.RemoveRange(absl::MakeSpan(buffer).subspan(index, count)); index += got; } @@ -131,19 +133,21 @@ BENCHMARK(BM_MixAndReturn) // code, and avoids timing the pageheap code. void BM_SpanReuse(benchmark::State& state) { size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); + size_t size_class = tc_globals.sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = tc_globals.sizemap().num_objects_to_move(size_class); int num_objects = 64 * 1024 * 1024 / object_size; + const int num_batches = num_objects / batch_size; CentralFreeList cfl; // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); + cfl.Init(size_class); // Array used to hold onto half of the objects std::vector held_objects(2 * num_objects); // Request twice the objects we need for (int index = 0; index < 2 * num_objects;) { int count = std::min(batch_size, 2 * num_objects - index); - int got = cfl.RemoveRange(&held_objects[index], count); + int got = + cfl.RemoveRange(absl::MakeSpan(held_objects).subspan(index, count)); index += got; } @@ -158,11 +162,11 @@ void BM_SpanReuse(benchmark::State& state) { int64_t items_processed = 0; absl::BitGen rnd; - for (auto s : state) { + while (state.KeepRunningBatch(num_batches)) { int index = 0; while (index < num_objects) { int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); + int got = cfl.RemoveRange(absl::MakeSpan(buffer).subspan(index, count)); index += got; } diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_fuzz.cc new file mode 100644 index 000000000000..6c2a42aac3b3 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_fuzz.cc @@ -0,0 +1,161 @@ +// Copyright 2020 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "fuzztest/fuzztest.h" +#include "absl/log/check.h" +#include "absl/types/span.h" +#include "tcmalloc/central_freelist.h" +#include "tcmalloc/common.h" +#include "tcmalloc/mock_static_forwarder.h" +#include "tcmalloc/sizemap.h" +#include "tcmalloc/span_stats.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { +namespace { + +using CentralFreeList = central_freelist_internal::CentralFreeList< + tcmalloc_internal::MockStaticForwarder>; +using CentralFreelistEnv = FakeCentralFreeListEnvironment; + +void FuzzCFL(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); + + if (size < 11 || size > 100000) { + // size < 11 for bare minimum fuzz test for a single operation. + // Avoid overly large inputs as we perform some shuffling and checking. + return; + } + // object_size can be at most kMaxSize. The current maximum value of kMaxSize + // is 2^18. So we use the first 24 bits to set object_size. + // + // TODO(b/271282540): Convert these to strongly typed fuzztest parameters. + const size_t object_size = data[0] | (data[1] << 8) | (data[2] << 16); + const size_t num_pages = data[3]; + const size_t num_objects_to_move = data[4]; + // data[5] & 0x1: reserved + const bool use_large_spans = data[5] & 0x2; + data += 6; + size -= 6; + if (!SizeMap::IsValidSizeClass(object_size, num_pages, num_objects_to_move)) { + return; + } + CentralFreelistEnv env(object_size, num_pages, num_objects_to_move, + use_large_spans); + std::vector objects; + + for (int i = 0; i + 5 < size; i += 5) { + // data[N] : choose the operation. + const uint8_t op = data[i]; + // We only use data[N+1] right now. data[N+4:N+2] are currently reserved. + // TODO(271282540): Add support for multiple size classes for fuzzing. + uint32_t value; + memcpy(&value, &data[i + 1], sizeof(value)); + + switch (op & 0x7) { + case 0: { + // Allocate objects. + // value[7:0] : number of objects to allocate. + const uint8_t num_objects = value & 0x00FF; + void* batch[kMaxObjectsToMove]; + const size_t n = num_objects % kMaxObjectsToMove + 1; + int allocated = + env.central_freelist().RemoveRange(absl::MakeSpan(batch, n)); + objects.insert(objects.end(), batch, batch + allocated); + break; + } + case 1: { + // Deallocate objects if number of previously allocated objects is + // non-empty. value[7:0] : number of objects to deallocate. + if (objects.empty()) break; + + const uint8_t num_objects = value & 0x00FF; + const size_t n = std::min(num_objects % kMaxObjectsToMove + 1, + objects.size()); + env.central_freelist().InsertRange({&objects[objects.size() - n], n}); + objects.resize(objects.size() - n); + break; + } + case 2: { + // Shuffle allocated objects such that we don't return them in the + // same order we allocated them. + const int seed = value & 0x00FF; + std::mt19937 rng(seed); + // Limit number of elements to shuffle so that we don't spend a lot of + // time in shuffling a large number of objects. + constexpr int kMaxToShuffle = 10 * kMaxObjectsToMove; + if (objects.size() <= kMaxToShuffle) { + std::shuffle(objects.begin(), objects.end(), rng); + } else { + std::shuffle(objects.end() - kMaxToShuffle, objects.end(), rng); + } + break; + } + case 3: { + // Check stats. + tcmalloc_internal::SpanStats stats = + env.central_freelist().GetSpanStats(); + // Spans with objects_per_span = 1 skip most of the logic in the + // central freelist including stats updates. So skip the check for + // objects_per_span = 1. + if (env.objects_per_span() != 1) { + CHECK_EQ(env.central_freelist().length() + objects.size(), + stats.obj_capacity); + if (objects.empty()) { + CHECK_EQ(stats.num_live_spans(), 0); + } else { + CHECK_GT(stats.num_live_spans(), 0); + } + } + break; + } + case 4: { + std::string s; + s.resize(1 << 20); + Printer p(&s[0], s.size()); + env.central_freelist().PrintSpanUtilStats(p); + env.central_freelist().PrintSpanLifetimeStats(p); + + PbtxtRegion region(p, kTop); + env.central_freelist().PrintSpanUtilStatsInPbtxt(region); + env.central_freelist().PrintSpanLifetimeStatsInPbtxt(region); + break; + } + } + } + + // Clean up. + const size_t allocated = objects.size(); + size_t returned = 0; + while (returned < allocated) { + const size_t to_return = std::min(allocated - returned, kMaxObjectsToMove); + env.central_freelist().InsertRange({&objects[returned], to_return}); + returned += to_return; + } +} + +FUZZ_TEST(CentralFreeListTest, FuzzCFL) + ; + +} // namespace +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc index de5960120d72..1cec1ec27a00 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc @@ -14,75 +14,810 @@ #include "tcmalloc/central_freelist.h" +#include +#include +#include + #include +#include +#include +#include +#include +#include +#include +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/algorithm/container.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/container/fixed_array.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/numeric/bits.h" #include "absl/random/random.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/mock_static_forwarder.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/size_class_info.h" +#include "tcmalloc/sizemap.h" +#include "tcmalloc/span.h" +#include "tcmalloc/span_stats.h" #include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" +#include "tcmalloc/testing/thread_manager.h" namespace tcmalloc { namespace tcmalloc_internal { -namespace { -// TODO(b/162552708) Mock out the page heap to interact with CFL instead -class CFLTest : public testing::TestWithParam { +namespace central_freelist_internal { + +class StaticForwarderTest : public testing::TestWithParam { protected: - size_t cl_; + size_t size_class_; + size_t object_size_; + Length pages_per_span_; size_t batch_size_; size_t objects_per_span_; - CentralFreeList cfl_; + uint32_t size_reciprocal_; private: void SetUp() override { - cl_ = GetParam(); - size_t object_size = Static::sizemap().class_to_size(cl_); - if (object_size == 0) { + size_class_ = GetParam(); + if (IsExpandedSizeClass(size_class_)) { +#if ABSL_HAVE_THREAD_SANITIZER + GTEST_SKIP() << "Skipping test under sanitizers that conflict with " + "address placement"; +#endif + + if (!ColdFeatureActive()) { + // If !ColdFeatureActive(), we will use the normal page heap, which will + // keep us from seeing memory get the expected tags. + GTEST_SKIP() + << "Skipping expanded size classes without cold experiment"; + } + } + object_size_ = tc_globals.sizemap().class_to_size(size_class_); + if (object_size_ == 0) { GTEST_SKIP() << "Skipping empty size class."; } - auto pages_per_span = Length(Static::sizemap().class_to_pages(cl_)); - batch_size_ = Static::sizemap().num_objects_to_move(cl_); - objects_per_span_ = pages_per_span.in_bytes() / object_size; - cfl_.Init(cl_); + pages_per_span_ = Length(tc_globals.sizemap().class_to_pages(size_class_)); + batch_size_ = tc_globals.sizemap().num_objects_to_move(size_class_); + objects_per_span_ = pages_per_span_.in_bytes() / object_size_; + size_reciprocal_ = Span::CalcReciprocal(object_size_); + } +}; + +TEST_P(StaticForwarderTest, Simple) { + Span* span = StaticForwarder::AllocateSpan(size_class_, objects_per_span_, + pages_per_span_); + ASSERT_NE(span, nullptr); + + absl::FixedArray batch(objects_per_span_); + const uint32_t max_span_cache_size = StaticForwarder::max_span_cache_size(); + const uint64_t alloc_time = StaticForwarder::clock_now(); + size_t allocated = span->BuildFreelist(object_size_, objects_per_span_, + absl::MakeSpan(batch), + max_span_cache_size, alloc_time); + ASSERT_EQ(allocated, objects_per_span_); + + EXPECT_EQ(size_class_, tc_globals.pagemap().sizeclass(span->first_page())); + EXPECT_EQ(size_class_, tc_globals.pagemap().sizeclass(span->last_page())); + + // span_test.cc provides test coverage for Span, but we need to obtain several + // objects to confirm we can map back to the Span pointer from the PageMap. + for (void* ptr : batch) { + Span* got; + StaticForwarder::MapObjectsToSpans({&ptr, 1}, &got, size_class_); + EXPECT_EQ(span, got); + } + + for (void* ptr : batch) { + EXPECT_EQ(span->FreelistPush(ptr, object_size_, size_reciprocal_, + max_span_cache_size), + ptr != batch.back()); + } + + StaticForwarder::DeallocateSpans(objects_per_span_, absl::MakeSpan(&span, 1)); +} + +class StaticForwarderEnvironment { + struct SpanData { + Span* span; + void* batch[kMaxObjectsToMove]; + }; + + public: + StaticForwarderEnvironment(int size_class, size_t object_size, + size_t objects_per_span, Length pages_per_span, + int batch_size) + : size_class_(size_class), + object_size_(object_size), + objects_per_span_(objects_per_span), + pages_per_span_(pages_per_span), + batch_size_(batch_size) {} + + ~StaticForwarderEnvironment() { Drain(); } + + void RandomlyPoke() { + absl::BitGen rng; + double coin = absl::Uniform(rng, 0.0, 1.0); + + if (coin < 0.5) { + Grow(); + } else if (coin < 0.9) { + // Deallocate Spans. We may deallocate more than 1 span, so we bias + // towards allocating Spans more often than we deallocate. + Shrink(); + } else { + Shuffle(rng); + } + } + + void Drain() { + std::vector> spans; + + { + absl::MutexLock l(&mu_); + if (data_.empty()) { + return; + } + + spans = std::move(data_); + data_.clear(); + } + + // Check mappings. + std::vector free_spans; + for (const auto& data : spans) { + EXPECT_EQ(size_class_, + tc_globals.pagemap().sizeclass(data->span->first_page())); + EXPECT_EQ(size_class_, + tc_globals.pagemap().sizeclass(data->span->last_page())); + // Confirm we can map at least one object back. + Span* got; + StaticForwarder::MapObjectsToSpans({&data->batch[0], 1}, &got, + size_class_); + EXPECT_EQ(data->span, got); + + free_spans.push_back(data->span); + } + + StaticForwarder::DeallocateSpans(objects_per_span_, + absl::MakeSpan(free_spans)); + } + + void Grow() { + // Allocate a Span + Span* span = StaticForwarder::AllocateSpan(size_class_, objects_per_span_, + pages_per_span_); + ASSERT_NE(span, nullptr); + + auto d = std::make_unique(); + d->span = span; + + size_t allocated = span->BuildFreelist( + object_size_, objects_per_span_, absl::MakeSpan(d->batch, batch_size_), + StaticForwarder::max_span_cache_size(), StaticForwarder::clock_now()); + EXPECT_LE(allocated, objects_per_span_); + + EXPECT_EQ(size_class_, tc_globals.pagemap().sizeclass(span->first_page())); + EXPECT_EQ(size_class_, tc_globals.pagemap().sizeclass(span->last_page())); + // Confirm we can map at least one object back. + Span* got; + StaticForwarder::MapObjectsToSpans({&d->batch[0], 1}, &got, size_class_); + EXPECT_EQ(span, got); + + absl::MutexLock l(&mu_); + spans_allocated_++; + data_.push_back(std::move(d)); + } + + void Shrink() { + absl::BitGen rng; + std::vector> spans; + + { + absl::MutexLock l(&mu_); + if (data_.empty()) { + return; + } + + size_t count = absl::LogUniform(rng, 1, data_.size()); + spans.reserve(count); + + for (int i = 0; i < count; i++) { + spans.push_back(std::move(data_.back())); + data_.pop_back(); + } + } + + // Check mappings. + std::vector free_spans; + for (auto& data : spans) { + EXPECT_EQ(size_class_, + tc_globals.pagemap().sizeclass(data->span->first_page())); + EXPECT_EQ(size_class_, + tc_globals.pagemap().sizeclass(data->span->last_page())); + // Confirm we can map at least one object back. + Span* got; + StaticForwarder::MapObjectsToSpans({&data->batch[0], 1}, &got, + size_class_); + EXPECT_EQ(data->span, got); + + free_spans.push_back(data->span); + } + + StaticForwarder::DeallocateSpans(objects_per_span_, + absl::MakeSpan(free_spans)); + } + + void Shuffle(absl::BitGen& rng) { + // Shuffle the shared vector. + absl::MutexLock l(&mu_); + absl::c_shuffle(data_, rng); + } + + int64_t BytesAllocated() { + absl::MutexLock l(&mu_); + return pages_per_span_.in_bytes() * spans_allocated_; } - void TearDown() override { EXPECT_EQ(cfl_.length(), 0); } + private: + int size_class_; + size_t object_size_; + size_t objects_per_span_; + Length pages_per_span_; + int batch_size_; + + absl::Mutex mu_; + int64_t spans_allocated_ ABSL_GUARDED_BY(mu_) = 0; + std::vector> data_ ABSL_GUARDED_BY(mu_); }; -TEST_P(CFLTest, SingleBatch) { +static BackingStats PageHeapStats() { + PageHeapSpinLockHolder l; + return tc_globals.page_allocator().stats(); +} + +TEST_P(StaticForwarderTest, Fuzz) { +#if ABSL_HAVE_THREAD_SANITIZER + // TODO(b/193887621): Enable this test under TSan after addressing benign + // true positives. + GTEST_SKIP() << "Skipping test under Thread Sanitizer."; +#endif // ABSL_HAVE_THREAD_SANITIZER + + const auto page_heap_before = PageHeapStats(); + + StaticForwarderEnvironment env(size_class_, object_size_, objects_per_span_, + pages_per_span_, batch_size_); + ThreadManager threads; + threads.Start(10, [&](int) { env.RandomlyPoke(); }); + + absl::SleepFor(absl::Seconds(0.2)); + + threads.Stop(); + + const auto page_heap_after = PageHeapStats(); + // Confirm we did not leak Spans by ensuring the page heap did not grow nearly + // 1:1 by the total number of Spans we ever allocated. + // + // Since we expect to allocate a significant number of spans, we apply a + // factor of 1/2 (which is unlikely to be flaky) to avoid false negatives + // if/when a background thread triggers a deallocation. + const int64_t bytes_allocated = env.BytesAllocated(); + EXPECT_GT(bytes_allocated, 0); + EXPECT_LE(static_cast(page_heap_after.system_bytes) - + static_cast(page_heap_before.system_bytes), + bytes_allocated / 2); +} + +INSTANTIATE_TEST_SUITE_P(All, StaticForwarderTest, + testing::Range(size_t(1), kNumClasses)); + +} // namespace central_freelist_internal + +namespace { + +using central_freelist_internal::kNumLists; +using TypeParam = FakeCentralFreeListEnvironment< + central_freelist_internal::CentralFreeList>; +using CentralFreeListTest = + ::testing::TestWithParam>; + +TEST_P(CentralFreeListTest, IsolatedSmoke) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + EXPECT_CALL(e.forwarder(), AllocateSpan).Times(1); + + absl::FixedArray batch(e.batch_size()); + int allocated = e.central_freelist().RemoveRange( + absl::MakeSpan(&batch[0], e.batch_size())); + ASSERT_GT(allocated, 0); + EXPECT_LE(allocated, e.batch_size()); + + // We should observe span's utilization captured in the histogram. The number + // of spans in rest of the buckets should be zero. + const int bitwidth = absl::bit_width(static_cast(allocated)); + for (int i = 1; i <= absl::bit_width(e.objects_per_span()); ++i) { + // Skip the check for objects_per_span = 1 since such spans skip most of the + // central freelist's logic. + if (i == bitwidth && e.objects_per_span() != 1) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 1); + } else { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } + } + + EXPECT_CALL(e.forwarder(), MapObjectsToSpans).Times(1); + EXPECT_CALL(e.forwarder(), DeallocateSpans).Times(1); + + // Skip the check for objects_per_span = 1 since such spans skip most of the + // central freelist's logic. + SpanStats stats = e.central_freelist().GetSpanStats(); + if (e.objects_per_span() != 1) { + EXPECT_EQ(stats.num_spans_requested, 1); + EXPECT_EQ(stats.num_spans_returned, 0); + EXPECT_EQ(stats.obj_capacity, e.objects_per_span()); + } + + e.central_freelist().InsertRange(absl::MakeSpan(&batch[0], allocated)); + // Skip the check for objects_per_span = 1 since such spans skip most of the + // central freelist's logic. + if (e.objects_per_span() != 1) { + SpanStats stats = e.central_freelist().GetSpanStats(); + EXPECT_EQ(stats.num_spans_requested, 1); + EXPECT_EQ(stats.num_spans_returned, 1); + EXPECT_EQ(stats.obj_capacity, 0); + } + + // Span captured in the histogram with the earlier utilization should have + // been removed. + for (int i = 1; i <= absl::bit_width(e.objects_per_span()); ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } +} + +TEST_P(CentralFreeListTest, SpanUtilizationHistogram) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + constexpr size_t kNumSpans = 10; + + // Request kNumSpans spans. void* batch[kMaxObjectsToMove]; - uint64_t got = cfl_.RemoveRange(batch, batch_size_); - ASSERT_GT(got, 0); - cfl_.InsertRange({batch, got}); - SpanStats stats = cfl_.GetSpanStats(); - EXPECT_EQ(stats.num_spans_requested, 1); - EXPECT_EQ(stats.num_spans_returned, 1); - EXPECT_EQ(stats.obj_capacity, 0); + const int num_objects_to_fetch = kNumSpans * e.objects_per_span(); + int total_fetched = 0; + // Tracks object and corresponding span from which it was allocated. + std::vector> object_to_span; + // Tracks number of objects allocated per span. + absl::flat_hash_map allocated_per_span; + int span_idx = 0; + + while (total_fetched < num_objects_to_fetch) { + size_t n = num_objects_to_fetch - total_fetched; + int got = e.central_freelist().RemoveRange( + absl::MakeSpan(batch, std::min(n, e.batch_size()))); + total_fetched += got; + + // Increment span_idx if current objects have been fetched from the new + // span. + if (total_fetched > (span_idx + 1) * e.objects_per_span()) { + ++span_idx; + } + // Record fetched object and the associated span. + for (int i = 0; i < got; ++i) { + Span* s = e.forwarder().MapObjectToSpan(batch[i]); + object_to_span.emplace_back(batch[i], s); + allocated_per_span[s] += 1; + } + TC_ASSERT_LT(span_idx, kNumSpans); + } + + // Make sure that we have fetched exactly from kNumSpans spans. + EXPECT_EQ(span_idx + 1, kNumSpans); + + // We should have kNumSpans spans in the histogram with number of allocated + // objects equal to e.objects_per_span() (i.e. in the last bucket). + // Rest of the buckets should be empty. + const int expected_bitwidth = absl::bit_width(e.objects_per_span()); + // Skip the check when objects_per_span = 1 as those spans skip most of the + // central freelist's logic. + if (e.objects_per_span() != 1) { + EXPECT_EQ(e.central_freelist().NumSpansWith(expected_bitwidth), kNumSpans); + } + for (int i = 1; i < expected_bitwidth; ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } + + // Shuffle. + absl::BitGen rng; + std::shuffle(object_to_span.begin(), object_to_span.end(), rng); + + // Return objects, a fraction at a time, each time checking that histogram is + // correct. + int total_returned = 0; + const int last_bucket = absl::bit_width(e.objects_per_span()) - 1; + while (total_returned < num_objects_to_fetch) { + uint64_t size_to_pop = + std::min(object_to_span.size() - total_returned, e.batch_size()); + + for (int i = 0; i < size_to_pop; ++i) { + const auto [ptr, span] = object_to_span[i + total_returned]; + batch[i] = ptr; + --allocated_per_span[span]; + } + total_returned += size_to_pop; + e.central_freelist().InsertRange({batch, size_to_pop}); + + // Calculate expected histogram. + std::vector expected(absl::bit_width(e.objects_per_span()), 0); + for (const auto& span_and_count : allocated_per_span) { + // If span has non-zero allocated objects, include it in the histogram. + if (span_and_count.second > 0) { + const size_t bucket = absl::bit_width(span_and_count.second) - 1; + TC_ASSERT_LE(bucket, last_bucket); + ++expected[bucket]; + } + } + + // Fetch number of spans logged in the histogram and compare it with the + // expected histogram that we calculated using the tracked allocated + // objects per span. + for (int i = 1; i <= last_bucket; ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), expected[i - 1]); + } + } + + // Since no span is live here, histogram must be empty. + for (int i = 1; i <= last_bucket; ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } +} + +// Confirms that a call to RemoveRange returns at most kObjectsPerSpan objects +// in cases when there are no non-empty spans in the central freelist. This +// makes sure that we populate, and subsequently allocate from a single span. +// This avoids memory regression due to multiple Populate calls observed in +// b/225880278. +TEST_P(CentralFreeListTest, SinglePopulate) { + // Make sure that we allocate up to kObjectsPerSpan objects in both the span + // prioritization states. + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + // Try to fetch sufficiently large number of objects at startup. + const int num_objects_to_fetch = 10 * e.objects_per_span(); + std::vector objects(num_objects_to_fetch, nullptr); + const size_t got = e.central_freelist().RemoveRange( + absl::MakeSpan(objects.data(), num_objects_to_fetch)); + // Confirm we allocated at most kObjectsPerSpan number of objects. + EXPECT_GT(got, 0); + EXPECT_LE(got, e.objects_per_span()); + size_t returned = 0; + while (returned < got) { + const size_t to_return = std::min(got - returned, e.batch_size()); + e.central_freelist().InsertRange({&objects[returned], to_return}); + returned += to_return; + } +} + +// Tests whether the index generated by the input indexing function matches the +// index of the span on which allocations and deallocation operations are +// carried out. The test first allocates objects and deallocates them. After +// each operation, the actual index is matched against the expected one. +template +void TestIndexing(TypeParam& e, IndexingFunc f) { + TC_ASSERT_GT(kNumLists, 0); + const int num_objects_to_fetch = e.objects_per_span(); + std::vector objects(num_objects_to_fetch); + size_t fetched = 0; + int expected_idx = kNumLists - 1; + + // Fetch one object at a time from a span and confirm that the span is moved + // through the nonempty_ lists as we allocate more objects from it. + while (fetched < num_objects_to_fetch) { + // Try to fetch one object from the span. + int got = + e.central_freelist().RemoveRange(absl::MakeSpan(&objects[fetched], 1)); + fetched += got; + TC_ASSERT(fetched); + if (fetched % num_objects_to_fetch == 0) { + // Span should have been removed from nonempty_ lists because we have + // allocated all the objects from it. + EXPECT_EQ(e.central_freelist().NumSpansInList(expected_idx), 0); + } else { + expected_idx = f(fetched); + TC_ASSERT_GE(expected_idx, 0); + TC_ASSERT_LT(expected_idx, kNumLists); + // Check that the span exists in the corresponding nonempty_ list. + EXPECT_EQ(e.central_freelist().NumSpansInList(expected_idx), 1); + } + } + + // Similar to our previous test, we now make sure that the span is moved + // through the nonempty_ lists when we deallocate objects back to it. + size_t remaining = fetched; + while (--remaining > 0) { + // Return objects back to the span one at a time. + e.central_freelist().InsertRange({&objects[remaining], 1}); + TC_ASSERT(remaining); + // When allocated objects are more than the threshold, the span is indexed + // to nonempty_ list 0. + expected_idx = f(remaining); + EXPECT_LT(expected_idx, kNumLists); + EXPECT_EQ(e.central_freelist().NumSpansInList(expected_idx), 1); + } + + // When the last object is returned, we release the span to the page heap. So, + // nonempty_[0] should also be empty. + e.central_freelist().InsertRange({&objects[remaining], 1}); + EXPECT_EQ(e.central_freelist().NumSpansInList(0), 0); +} + +TEST_P(CentralFreeListTest, BitwidthIndexedNonEmptyLists) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + if (e.objects_per_span() <= 2 * kNumLists) { + GTEST_SKIP() + << "Skipping test as one hot encoding used for few object spans."; + } + auto bitwidth_indexing = [](size_t allocated) { + size_t bitwidth = absl::bit_width(allocated); + return kNumLists - std::min(bitwidth, kNumLists); + }; + TestIndexing(e, bitwidth_indexing); +} + +TEST_P(CentralFreeListTest, DirectIndexedEncodedNonEmptyLists) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + if (e.objects_per_span() > 2 * kNumLists) { + GTEST_SKIP() << "Skipping test as one hot encoding not required."; + } + auto direct_indexing = [](int allocated) { + if (allocated <= kNumLists) return kNumLists - allocated; + return 0UL; + }; + TestIndexing(e, direct_indexing); +} + +// Checks if we are indexing a span in the nonempty_ lists as expected. We also +// check if the spans are correctly being prioritized. That is, we create a +// scenario where we have two live spans, and one span has more allocated +// objects than the other span. On subsequent allocations, we confirm that the +// objects are allocated from the span with a higher number of allocated objects +// as enforced by our prioritization scheme. +TEST_P(CentralFreeListTest, SpanPriority) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + + // If the number of objects per span is less than 2, we do not use more than + // one nonempty_ lists. So, we can not prioritize the spans based on how many + // objects were allocated from them. + const int objects_per_span = e.objects_per_span(); + if (objects_per_span < 3 || kNumLists < 2) return; + + constexpr int kNumSpans = 2; + // Track objects allocated per span. + absl::FixedArray> objects(kNumSpans); + void* batch[kMaxObjectsToMove]; + + const size_t to_fetch = objects_per_span; + // Allocate all objects from kNumSpans. + for (int span = 0; span < kNumSpans; ++span) { + size_t fetched = 0; + while (fetched < to_fetch) { + const size_t n = to_fetch - fetched; + int got = e.central_freelist().RemoveRange( + absl::MakeSpan(batch, std::min(n, e.batch_size()))); + for (int i = 0; i < got; ++i) { + objects[span].push_back(batch[i]); + } + fetched += got; + } + } + + // Perform deallocations so that each span contains only two objects. + size_t to_release = to_fetch - 2; + for (int span = 0; span < kNumSpans; ++span) { + size_t released = 0; + while (released < to_release) { + uint64_t n = std::min(to_release - released, e.batch_size()); + for (int i = 0; i < n; ++i) { + batch[i] = objects[span][i + released]; + } + released += n; + e.central_freelist().InsertRange({batch, n}); + } + objects[span].erase(objects[span].begin(), + objects[span].begin() + released); + } + + // Make sure we have kNumSpans in the expected second-last nonempty_ list. + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 2), kNumSpans); + + // Release an additional object from all but one spans so that they are + // deprioritized for subsequent allocations. + to_release = 1; + for (int span = 1; span < kNumSpans; ++span) { + size_t released = 0; + while (released < to_release) { + uint64_t n = std::min(to_release - released, e.batch_size()); + for (int i = 0; i < n; ++i) { + batch[i] = objects[span][i + released]; + } + released += n; + e.central_freelist().InsertRange({batch, n}); + } + objects[span].erase(objects[span].begin(), + objects[span].begin() + released); + } + + // Make sure we have kNumSpans-1 spans in the last nonempty_ list and just one + // span in the second-last list. + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 1), kNumSpans - 1); + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 2), 1); + + // Allocate one object to ensure that it is being allocated from the span with + // the highest number of allocated objects. + int got = e.central_freelist().RemoveRange(absl::MakeSpan(batch, 1)); + EXPECT_EQ(got, 1); + // Number of spans in the last nonempty_ list should be unchanged (i.e. + // kNumSpans-1). + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 1), kNumSpans - 1); + if (e.objects_per_span() == 3) { + // Since we allocated another object from the span that had two objects + // allocated from it, so the span would no longer be there in the span list. + for (int i = kNumLists - 2; i >= 0; --i) { + EXPECT_EQ(e.central_freelist().NumSpansInList(i), 0); + } + } else if (e.objects_per_span() <= 2 * kNumLists) { + // We should have only one span in the third-last nonempty_ list; this is + // the span from which we should have allocated the last object. + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 3), 1); + } else { + // We should have only one span in the second-last nonempty_ list; this is + // the span from which we should have allocated the last object. + EXPECT_EQ(e.central_freelist().NumSpansInList(kNumLists - 2), 1); + } + // Return previously allocated object. + e.central_freelist().InsertRange({batch, 1}); + + // Return rest of the objects. + for (int span = 0; span < kNumSpans; ++span) { + for (int i = 0; i < objects[span].size(); ++i) { + e.central_freelist().InsertRange({&objects[span][i], 1}); + } + } } -TEST_P(CFLTest, MultipleSpans) { +TEST_P(CentralFreeListTest, SpanLifetime) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + + const uint32_t max_span_cache_size = e.forwarder().max_span_cache_size(); + if (max_span_cache_size != Span::kLargeCacheSize) { + GTEST_SKIP() << "Skipping test when cache size is small. We do not " + "record lifetime telemetry."; + } + const size_t object_size = + e.central_freelist().forwarder().class_to_size(TypeParam::kSizeClass); + if (Span::UseBitmapForSize(object_size)) { + GTEST_SKIP() << "Bitmap is used for size class. We do not " + "record lifetime telemetry."; + } std::vector all_objects; - const size_t num_spans = 10; + // Request kNumSpans spans. + void* batch[kMaxObjectsToMove]; + ASSERT_GT(e.objects_per_span(), 0); + int got = e.central_freelist().RemoveRange(absl::MakeSpan(batch, 1)); + ASSERT_EQ(got, 1); + + e.forwarder().AdvanceClock(absl::Seconds(1)); + + { + std::string buffer(1024 * 1024, '\0'); + Printer printer(&*buffer.begin(), buffer.size()); + e.central_freelist().PrintSpanLifetimeStats(printer); + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT( + buffer, + testing::HasSubstr( + R"(0 ms < 0, 1 ms < 0, 10 ms < 0,100 ms < 0,1000 ms < 1,10000 ms < 0,100000 ms < 0,1000000 ms < 0)")); + } + + e.forwarder().AdvanceClock(absl::Seconds(10)); + { + std::string buffer(1024 * 1024, '\0'); + Printer printer(&*buffer.begin(), buffer.size()); + e.central_freelist().PrintSpanLifetimeStats(printer); + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT( + buffer, + testing::HasSubstr( + R"(0 ms < 0, 1 ms < 0, 10 ms < 0,100 ms < 0,1000 ms < 0,10000 ms < 1,100000 ms < 0,1000000 ms < 0)")); + } + + e.forwarder().AdvanceClock(absl::Seconds(100)); + { + std::string buffer(1024 * 1024, '\0'); + Printer printer(&*buffer.begin(), buffer.size()); + e.central_freelist().PrintSpanLifetimeStats(printer); + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT( + buffer, + testing::HasSubstr( + R"(0 ms < 0, 1 ms < 0, 10 ms < 0,100 ms < 0,1000 ms < 0,10000 ms < 0,100000 ms < 1,1000000 ms < 0)")); + } + + e.forwarder().AdvanceClock(absl::Seconds(1000)); + { + std::string buffer(1024 * 1024, '\0'); + Printer printer(&*buffer.begin(), buffer.size()); + e.central_freelist().PrintSpanLifetimeStats(printer); + buffer.resize(strlen(buffer.c_str())); - // Request num_spans spans + EXPECT_THAT( + buffer, + testing::HasSubstr( + R"(0 ms < 0, 1 ms < 0, 10 ms < 0,100 ms < 0,1000 ms < 0,10000 ms < 0,100000 ms < 0,1000000 ms < 1)")); + } + + e.central_freelist().InsertRange({batch, 1}); +} + +TEST_P(CentralFreeListTest, MultipleSpans) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + std::vector all_objects; + constexpr size_t kNumSpans = 10; + + // Request kNumSpans spans. void* batch[kMaxObjectsToMove]; - const int num_objects_to_fetch = num_spans * objects_per_span_; + ASSERT_GT(e.objects_per_span(), 0); + const int num_objects_to_fetch = kNumSpans * e.objects_per_span(); int total_fetched = 0; while (total_fetched < num_objects_to_fetch) { size_t n = num_objects_to_fetch - total_fetched; - int got = cfl_.RemoveRange(batch, std::min(n, batch_size_)); + int got = e.central_freelist().RemoveRange( + absl::MakeSpan(batch, std::min(n, e.batch_size()))); for (int i = 0; i < got; ++i) { all_objects.push_back(batch[i]); } total_fetched += got; } - SpanStats stats = cfl_.GetSpanStats(); - EXPECT_EQ(stats.num_spans_requested, num_spans); - EXPECT_EQ(stats.num_spans_returned, 0); + // We should have kNumSpans spans in the histogram with number of + // allocated objects equal to e.objects_per_span() (i.e. in the last + // bucket). Rest of the buckets should be empty. + const int expected_bitwidth = absl::bit_width(e.objects_per_span()); + // Skip the check for objects_per_span = 1 since such spans skip most of the + // central freelist's logic. + if (e.objects_per_span() != 1) { + EXPECT_EQ(e.central_freelist().NumSpansWith(expected_bitwidth), kNumSpans); + } + for (int i = 1; i < expected_bitwidth; ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } + + // Skip the check for objects_per_span = 1 since such spans skip most of the + // central freelist's logic. + if (e.objects_per_span() != 1) { + SpanStats stats = e.central_freelist().GetSpanStats(); + EXPECT_EQ(stats.num_spans_requested, kNumSpans); + EXPECT_EQ(stats.num_spans_returned, 0); + } EXPECT_EQ(all_objects.size(), num_objects_to_fetch); @@ -92,30 +827,108 @@ TEST_P(CFLTest, MultipleSpans) { // Return all int total_returned = 0; - bool checked_half = false; while (total_returned < num_objects_to_fetch) { uint64_t size_to_pop = - std::min(all_objects.size() - total_returned, batch_size_); + std::min(all_objects.size() - total_returned, e.batch_size()); for (int i = 0; i < size_to_pop; ++i) { batch[i] = all_objects[i + total_returned]; } total_returned += size_to_pop; - cfl_.InsertRange({batch, size_to_pop}); + e.central_freelist().InsertRange({batch, size_to_pop}); // sanity check - if (!checked_half && total_returned >= (num_objects_to_fetch / 2)) { - stats = cfl_.GetSpanStats(); + if (e.objects_per_span() != 1 && total_returned < num_objects_to_fetch) { + SpanStats stats = e.central_freelist().GetSpanStats(); EXPECT_GT(stats.num_spans_requested, stats.num_spans_returned); EXPECT_NE(stats.obj_capacity, 0); - checked_half = true; + // Total spans recorded in the histogram must be equal to the number of + // live spans. + size_t spans_in_histogram = 0; + for (int i = 1; i <= absl::bit_width(e.objects_per_span()); ++i) { + spans_in_histogram += e.central_freelist().NumSpansWith(i); + } + EXPECT_EQ(spans_in_histogram, stats.num_live_spans()); } } - stats = cfl_.GetSpanStats(); + SpanStats stats = e.central_freelist().GetSpanStats(); EXPECT_EQ(stats.num_spans_requested, stats.num_spans_returned); + // Since no span is live, histogram must be empty. + for (int i = 1; i <= absl::bit_width(e.objects_per_span()); ++i) { + EXPECT_EQ(e.central_freelist().NumSpansWith(i), 0); + } EXPECT_EQ(stats.obj_capacity, 0); } -INSTANTIATE_TEST_SUITE_P(All, CFLTest, testing::Range(size_t(1), kNumClasses)); +TEST_P(CentralFreeListTest, PassSpanDensityToPageheap) { + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + ASSERT_GE(e.objects_per_span(), 1); + auto test_function = [&](size_t num_objects, + AccessDensityPrediction density) { + std::vector objects(e.objects_per_span()); + EXPECT_CALL(e.forwarder(), AllocateSpan(testing::_, testing::_, testing::_)) + .Times(1); + const size_t to_fetch = std::min(e.objects_per_span(), e.batch_size()); + const size_t fetched = + e.central_freelist().RemoveRange(absl::MakeSpan(&objects[0], to_fetch)); + size_t returned = 0; + while (returned < fetched) { + EXPECT_CALL(e.forwarder(), DeallocateSpans(testing::_, testing::_)) + .Times(1); + const size_t to_return = std::min(fetched - returned, e.batch_size()); + e.central_freelist().InsertRange({&objects[returned], to_return}); + returned += to_return; + } + }; + test_function(1, AccessDensityPrediction::kDense); + test_function(e.objects_per_span(), AccessDensityPrediction::kDense); +} + +TEST_P(CentralFreeListTest, SpanFragmentation) { + // This test is primarily exercising Span itself to model how tcmalloc.cc uses + // it, but this gives us a self-contained (and sanitizable) implementation of + // the CentralFreeList. + TypeParam e(std::get<0>(GetParam()).size, std::get<0>(GetParam()).pages, + std::get<0>(GetParam()).num_to_move, std::get<1>(GetParam())); + // Allocate one object from the CFL to allocate a span. + void* initial; + int got = e.central_freelist().RemoveRange(absl::MakeSpan(&initial, 1)); + ASSERT_EQ(got, 1); + + Span* const span = e.central_freelist().forwarder().MapObjectToSpan(initial); + const size_t object_size = + e.central_freelist().forwarder().class_to_size(TypeParam::kSizeClass); + + ThreadManager fragmentation; + fragmentation.Start(1, [&](int) { + if (e.objects_per_span() != 1) { + benchmark::DoNotOptimize(span->Fragmentation(object_size)); + } + }); + + ThreadManager cfl; + cfl.Start(1, [&](int) { + void* next; + int got = e.central_freelist().RemoveRange(absl::MakeSpan(&next, 1)); + e.central_freelist().InsertRange(absl::MakeSpan(&next, got)); + }); + + absl::SleepFor(absl::Seconds(0.1)); + + fragmentation.Stop(); + cfl.Stop(); + + e.central_freelist().InsertRange(absl::MakeSpan(&initial, 1)); +} + +INSTANTIATE_TEST_SUITE_P( + CentralFreeList, CentralFreeListTest, + testing::Combine( + // We skip the first size class since it is set to 0. + testing::ValuesIn(kSizeClasses.classes.begin() + 1, + kSizeClasses.classes.end()), + /*use_large_spans=*/testing::Values(false, true))); + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/common.cc b/contrib/libs/tcmalloc/tcmalloc/common.cc index 38443040cad2..155893724841 100644 --- a/contrib/libs/tcmalloc/tcmalloc/common.cc +++ b/contrib/libs/tcmalloc/tcmalloc/common.cc @@ -14,189 +14,17 @@ #include "tcmalloc/common.h" -#include "tcmalloc/experiment.h" -#include "tcmalloc/internal/environment.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/optimization.h" -#include "tcmalloc/pages.h" -#include "tcmalloc/runtime_size_classes.h" -#include "tcmalloc/sampler.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -absl::string_view MemoryTagToLabel(MemoryTag tag) { - switch (tag) { - case MemoryTag::kNormal: - return "NORMAL"; - case MemoryTag::kNormalP1: - return "NORMAL_P1"; - case MemoryTag::kSampled: - return "SAMPLED"; - default: - ASSUME(false); - } -} - -// Load sizes classes from environment variable if present -// and valid, then returns True. If not found or valid, returns -// False. -bool SizeMap::MaybeRunTimeSizeClasses() { - SizeClassInfo parsed[kNumClasses]; - int num_classes = MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed); - if (!ValidSizeClasses(num_classes, parsed)) { - return false; - } - - if (num_classes != kSizeClassesCount) { - // TODO(b/122839049) - Add tests for num_classes < kSizeClassesCount before - // allowing that case. - Log(kLog, __FILE__, __LINE__, "Can't change the number of size classes", - num_classes, kSizeClassesCount); - return false; - } - - SetSizeClasses(num_classes, parsed); - Log(kLog, __FILE__, __LINE__, "Loaded valid Runtime Size classes"); - return true; -} - -void SizeMap::SetSizeClasses(int num_classes, const SizeClassInfo* parsed) { - class_to_size_[0] = 0; - class_to_pages_[0] = 0; - num_objects_to_move_[0] = 0; - - for (int c = 1; c < num_classes; c++) { - class_to_size_[c] = parsed[c].size; - class_to_pages_[c] = parsed[c].pages; - num_objects_to_move_[c] = parsed[c].num_to_move; - } - - // Fill any unspecified size classes with 0. - for (int x = num_classes; x < kNumBaseClasses; x++) { - class_to_size_[x] = 0; - class_to_pages_[x] = 0; - num_objects_to_move_[x] = 0; - } - - // Copy selected size classes into the upper registers. - for (int i = 1; i < (kNumClasses / kNumBaseClasses); i++) { - std::copy(&class_to_size_[0], &class_to_size_[kNumBaseClasses], - &class_to_size_[kNumBaseClasses * i]); - std::copy(&class_to_pages_[0], &class_to_pages_[kNumBaseClasses], - &class_to_pages_[kNumBaseClasses * i]); - std::copy(&num_objects_to_move_[0], &num_objects_to_move_[kNumBaseClasses], - &num_objects_to_move_[kNumBaseClasses * i]); - } -} - -// Return true if all size classes meet the requirements for alignment -// ordering and min and max values. -bool SizeMap::ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) { - if (num_classes <= 0) { - return false; - } - if (kHasExpandedClasses && num_classes > kNumBaseClasses) { - num_classes = kNumBaseClasses; - } - - for (int c = 1; c < num_classes; c++) { - size_t class_size = parsed[c].size; - size_t pages = parsed[c].pages; - size_t num_objects_to_move = parsed[c].num_to_move; - // Each size class must be larger than the previous size class. - if (class_size <= parsed[c - 1].size) { - Log(kLog, __FILE__, __LINE__, "Non-increasing size class", c, - parsed[c - 1].size, class_size); - return false; - } - if (class_size > kMaxSize) { - Log(kLog, __FILE__, __LINE__, "size class too big", c, class_size, - kMaxSize); - return false; - } - // Check required alignment - size_t alignment = 128; - if (class_size <= kMultiPageSize) { - alignment = kAlignment; - } else if (class_size <= SizeMap::kMaxSmallSize) { - alignment = kMultiPageAlignment; - } - if ((class_size & (alignment - 1)) != 0) { - Log(kLog, __FILE__, __LINE__, "Not aligned properly", c, class_size, - alignment); - return false; - } - if (class_size <= kMultiPageSize && pages != 1) { - Log(kLog, __FILE__, __LINE__, "Multiple pages not allowed", class_size, - pages, kMultiPageSize); - return false; - } - if (pages >= 256) { - Log(kLog, __FILE__, __LINE__, "pages limited to 255", pages); - return false; - } - if (num_objects_to_move > kMaxObjectsToMove) { - Log(kLog, __FILE__, __LINE__, "num objects to move too large", - num_objects_to_move, kMaxObjectsToMove); - return false; - } - } - // Last size class must be able to hold kMaxSize. - if (parsed[num_classes - 1].size < kMaxSize) { - Log(kLog, __FILE__, __LINE__, "last class doesn't cover kMaxSize", - num_classes - 1, parsed[num_classes - 1].size, kMaxSize); - return false; - } - return true; -} - -int ABSL_ATTRIBUTE_WEAK default_want_legacy_spans(); - -// Initialize the mapping arrays -void SizeMap::Init() { - // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] - if (ClassIndex(0) != 0) { - Crash(kCrash, __FILE__, __LINE__, "Invalid class index for size 0", - ClassIndex(0)); - } - if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { - Crash(kCrash, __FILE__, __LINE__, "Invalid class index for kMaxSize", - ClassIndex(kMaxSize)); - } - - static_assert(kAlignment <= 16, "kAlignment is too large"); - - if (IsExperimentActive(Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS)) { - SetSizeClasses(kExperimentalPow2SizeClassesCount, - kExperimentalPow2SizeClasses); - } else if (IsExperimentActive( - Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS)) { - SetSizeClasses(kExperimentalPow2Below64SizeClassesCount, - kExperimentalPow2Below64SizeClasses); - } else { - if (default_want_legacy_spans != nullptr && - default_want_legacy_spans() > 0 - ) { - SetSizeClasses(kLegacySizeClassesCount, kLegacySizeClasses); - } else { - SetSizeClasses(kSizeClassesCount, kSizeClasses); - } - } - MaybeRunTimeSizeClasses(); - - int next_size = 0; - for (int c = 1; c < kNumClasses; c++) { - const int max_size_in_class = class_to_size_[c]; - - for (int s = next_size; s <= max_size_in_class; s += kAlignment) { - class_array_[ClassIndex(s)] = c; - } - next_size = max_size_in_class + kAlignment; - if (next_size > kMaxSize) { - break; - } - } +// This only provides correct answer for TCMalloc-allocated memory, +// and may give a false positive for non-allocated block. +extern "C" bool TCMalloc_Internal_PossiblyCold(const void* ptr) { + return GetMemoryTag(ptr) == MemoryTag::kCold; } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/common.h b/contrib/libs/tcmalloc/tcmalloc/common.h index d44811c72662..c9de12a5bb89 100644 --- a/contrib/libs/tcmalloc/tcmalloc/common.h +++ b/contrib/libs/tcmalloc/tcmalloc/common.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,27 +21,28 @@ #include #include +#include +#include #include +#include #include -#include "absl/base/attributes.h" -#include "absl/base/dynamic_annotations.h" #include "absl/base/internal/spinlock.h" -#include "absl/base/macros.h" #include "absl/base/optimization.h" #include "absl/numeric/bits.h" #include "absl/strings/string_view.h" -#include "absl/types/span.h" -#include "tcmalloc/experiment.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/memory_tag.h" #include "tcmalloc/internal/optimization.h" -#include "tcmalloc/size_class_info.h" +#include "tcmalloc/malloc_extension.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +static_assert(sizeof(void*) == 8); + //------------------------------------------------------------------- // Configuration //------------------------------------------------------------------- @@ -52,12 +54,12 @@ namespace tcmalloc_internal { // The default configuration strives for good performance while trying to // minimize fragmentation. It uses a smaller page size to reduce // fragmentation, but allocates per-thread and per-cpu capacities similar to -// TCMALLOC_LARGE_PAGES / TCMALLOC_256K_PAGES. +// TCMALLOC_INTERNAL_32K_PAGES / TCMALLOC_INTERNAL_256K_PAGES. // -// TCMALLOC_LARGE_PAGES: -// Larger page sizes increase the bookkeeping granularity used by TCMalloc for -// its allocations. This can reduce PageMap size and traffic to the -// innermost cache (the page heap), but can increase memory footprints. As +// TCMALLOC_INTERNAL_32K_PAGES: +// Larger page sizes (32KB) increase the bookkeeping granularity used by +// TCMalloc for its allocations. This can reduce PageMap size and traffic to +// the innermost cache (the page heap), but can increase memory footprints. As // TCMalloc will not reuse a page for a different allocation size until the // entire page is deallocated, this can be a source of increased memory // fragmentation. @@ -68,11 +70,11 @@ namespace tcmalloc_internal { // (https://isocpp.org/files/papers/n3778.html), this optimization is less // significant. // -// TCMALLOC_256K_PAGES +// TCMALLOC_INTERNAL_256K_PAGES // This configuration uses an even larger page size (256KB) as the unit of // accounting granularity. // -// TCMALLOC_SMALL_BUT_SLOW: +// TCMALLOC_INTERNAL_SMALL_BUT_SLOW: // Used for situations where minimizing the memory footprint is the most // desirable attribute, even at the cost of performance. // @@ -88,17 +90,18 @@ namespace tcmalloc_internal { // kStealAmount - The number of bytes one ThreadCache will steal from another // when the first ThreadCache is forced to Scavenge(), delaying the next // call to Scavenge for this thread. +// kDefaultProfileSamplingInterval - Bytes between sampled allocations. // Older configurations had their own customized macros. Convert them into // a page-shift parameter that is checked below. #ifndef TCMALLOC_PAGE_SHIFT -#ifdef TCMALLOC_SMALL_BUT_SLOW +#ifdef TCMALLOC_INTERNAL_SMALL_BUT_SLOW #define TCMALLOC_PAGE_SHIFT 12 #define TCMALLOC_USE_PAGEMAP3 -#elif defined(TCMALLOC_256K_PAGES) +#elif defined(TCMALLOC_INTERNAL_256K_PAGES) #define TCMALLOC_PAGE_SHIFT 18 -#elif defined(TCMALLOC_LARGE_PAGES) +#elif defined(TCMALLOC_INTERNAL_32K_PAGES) #define TCMALLOC_PAGE_SHIFT 15 #else #define TCMALLOC_PAGE_SHIFT 13 @@ -107,6 +110,14 @@ namespace tcmalloc_internal { #error "TCMALLOC_PAGE_SHIFT is an internal macro!" #endif +#if defined(TCMALLOC_INTERNAL_SMALL_BUT_SLOW) + \ + defined(TCMALLOC_INTERNAL_8K_PAGES) + \ + defined(TCMALLOC_INTERNAL_256K_PAGES) + \ + defined(TCMALLOC_INTERNAL_32K_PAGES) > \ + 1 +#error "At most 1 variant configuration must be used." +#endif + #if TCMALLOC_PAGE_SHIFT == 12 inline constexpr size_t kPageShift = 12; inline constexpr size_t kNumBaseClasses = 46; @@ -114,11 +125,10 @@ inline constexpr bool kHasExpandedClasses = false; inline constexpr size_t kMaxSize = 8 << 10; inline constexpr size_t kMinThreadCacheSize = 4 * 1024; inline constexpr size_t kMaxThreadCacheSize = 64 * 1024; -inline constexpr size_t kMaxCpuCacheSize = 20 * 1024; +inline constexpr size_t kMaxCpuCacheSize = 10 * 1024; inline constexpr size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; inline constexpr size_t kStealAmount = kMinThreadCacheSize; -inline constexpr size_t kDefaultProfileSamplingRate = 1 << 19; -inline constexpr size_t kMinPages = 2; +inline constexpr size_t kDefaultProfileSamplingInterval = 1 << 19; #elif TCMALLOC_PAGE_SHIFT == 15 inline constexpr size_t kPageShift = 15; inline constexpr size_t kNumBaseClasses = 78; @@ -126,12 +136,11 @@ inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; inline constexpr size_t kMaxThreadCacheSize = 4 << 20; -inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +inline constexpr size_t kMaxCpuCacheSize = 1.5 * 1024 * 1024; inline constexpr size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; inline constexpr size_t kStealAmount = 1 << 16; -inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21; -inline constexpr size_t kMinPages = 8; +inline constexpr size_t kDefaultProfileSamplingInterval = 1 << 21; #elif TCMALLOC_PAGE_SHIFT == 18 inline constexpr size_t kPageShift = 18; inline constexpr size_t kNumBaseClasses = 89; @@ -139,12 +148,11 @@ inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; inline constexpr size_t kMaxThreadCacheSize = 4 << 20; -inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +inline constexpr size_t kMaxCpuCacheSize = 1.5 * 1024 * 1024; inline constexpr size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; inline constexpr size_t kStealAmount = 1 << 16; -inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21; -inline constexpr size_t kMinPages = 8; +inline constexpr size_t kDefaultProfileSamplingInterval = 1 << 21; #elif TCMALLOC_PAGE_SHIFT == 13 inline constexpr size_t kPageShift = 13; inline constexpr size_t kNumBaseClasses = 86; @@ -152,22 +160,18 @@ inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; inline constexpr size_t kMaxThreadCacheSize = 4 << 20; -inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +inline constexpr size_t kMaxCpuCacheSize = 1.5 * 1024 * 1024; inline constexpr size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; inline constexpr size_t kStealAmount = 1 << 16; -inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21; -inline constexpr size_t kMinPages = 8; +inline constexpr size_t kDefaultProfileSamplingInterval = 1 << 21; #else #error "Unsupported TCMALLOC_PAGE_SHIFT value!" #endif -// Sanitizers constrain the memory layout which causes problems with the -// enlarged tags required to represent NUMA partitions. Disable NUMA awareness -// to avoid failing to mmap memory. -#if defined(TCMALLOC_NUMA_AWARE) && !defined(MEMORY_SANITIZER) && \ - !defined(THREAD_SANITIZER) -inline constexpr size_t kNumaPartitions = 2; +// Disable NUMA awareness under Sanitizers to avoid failing to mmap memory. +#if defined(TCMALLOC_INTERNAL_NUMA_AWARE) +inline constexpr size_t kNumaPartitions = kSanitizerAddressSpace ? 1 : 2; #else inline constexpr size_t kNumaPartitions = 1; #endif @@ -199,97 +203,56 @@ inline constexpr size_t kMinObjectsToMove = 2; inline constexpr size_t kMaxObjectsToMove = 128; inline constexpr size_t kPageSize = 1 << kPageShift; -// Verify that the page size used is at least 8x smaller than the maximum -// element size in the thread cache. This guarantees at most 12.5% internal -// fragmentation (1/8). When page size is 256k (kPageShift == 18), the benefit -// of increasing kMaxSize to be multiple of kPageSize is unclear. Object size -// profile data indicates that the number of simultaneously live objects (of -// size >= 256k) tends to be very small. Keeping those objects as 'large' -// objects won't cause too much memory waste, while heap memory reuse can be -// improved. Increasing kMaxSize to be too large has another bad side effect -- -// the thread cache pressure is increased, which will in turn increase traffic -// between central cache and thread cache, leading to performance degradation. -static_assert((kMaxSize / kPageSize) >= kMinPages || kPageShift >= 18, - "Ratio of kMaxSize / kPageSize is too small"); - -inline constexpr size_t kAlignment = 8; + +inline constexpr std::align_val_t kAlignment{8}; // log2 (kAlignment) -inline constexpr size_t kAlignmentShift = absl::bit_width(kAlignment - 1u); +inline constexpr size_t kAlignmentShift = + absl::bit_width(static_cast(kAlignment) - 1u); // The number of times that a deallocation can cause a freelist to // go over its max_length() before shrinking max_length(). inline constexpr int kMaxOverages = 3; // Maximum length we allow a per-thread free-list to have before we -// move objects from it into the corresponding central free-list. We -// want this big to avoid locking the central free-list too often. It +// move objects from it into the corresponding transfer cache. We +// want this big to avoid locking the transfer cache too often. It // should not hurt to make this list somewhat big because the // scavenging code will shrink it down when its contents are not in use. -inline constexpr int kMaxDynamicFreeListLength = 8192; - -enum class MemoryTag : uint8_t { - // Sampled, infrequently allocated - kSampled = 0x0, - // Not sampled, NUMA partition 0 - kNormalP0 = 0x1, - // Not sampled, NUMA partition 1 - kNormalP1 = (kNumaPartitions > 1) ? 0x2 : 0xff, - // Not sampled - kNormal = kNormalP0, -}; +inline constexpr size_t kMaxDynamicFreeListLength = 8192; -inline constexpr uintptr_t kTagShift = std::min(kAddressBits - 4, 42); -inline constexpr uintptr_t kTagMask = uintptr_t{0x3} << kTagShift; +inline constexpr bool ColdFeatureActive() { return kHasExpandedClasses; } -// Returns true if ptr is tagged. -ABSL_DEPRECATED("Replace with specific tests") -inline bool IsTaggedMemory(const void* ptr) { - return (reinterpret_cast(ptr) & kTagMask) == 0; +inline constexpr bool IsExpandedSizeClass(unsigned size_class) { + return kHasExpandedClasses && (size_class >= kExpandedClassesStart); } -inline bool IsSampledMemory(const void* ptr) { - constexpr uintptr_t kSampledNormalMask = kNumaPartitions > 1 ? 0x3 : 0x1; - - static_assert(static_cast(MemoryTag::kNormalP0) & - kSampledNormalMask); - static_assert(static_cast(MemoryTag::kNormalP1) & - kSampledNormalMask); - - const uintptr_t tag = - (reinterpret_cast(ptr) & kTagMask) >> kTagShift; - return (tag & kSampledNormalMask) == - static_cast(MemoryTag::kSampled); -} - -inline bool IsNormalMemory(const void* ptr) { return !IsSampledMemory(ptr); } - -inline MemoryTag GetMemoryTag(const void* ptr) { - return static_cast((reinterpret_cast(ptr) & kTagMask) >> - kTagShift); -} - -absl::string_view MemoryTagToLabel(MemoryTag tag); - -inline constexpr bool IsExpandedSizeClass(unsigned cl) { - return kHasExpandedClasses && (cl >= kExpandedClassesStart); -} - -#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __SIZEOF_POINTER__ != 4 -// Always allocate at least a huge page -inline constexpr size_t kMinSystemAlloc = kHugePageSize; +#if !defined(TCMALLOC_INTERNAL_SMALL_BUT_SLOW) inline constexpr size_t kMinMmapAlloc = 1 << 30; // mmap() in 1GiB ranges. #else -// Allocate in units of 2MiB. This is the size of a huge page for x86, but -// not for Power. -inline constexpr size_t kMinSystemAlloc = 2 << 20; // mmap() in units of 32MiB. This is a multiple of huge page size for // both x86 (2MiB) and Power (16MiB) inline constexpr size_t kMinMmapAlloc = 32 << 20; #endif -static_assert(kMinMmapAlloc % kMinSystemAlloc == 0, - "Minimum mmap allocation size is not a multiple of" - " minimum system allocation size"); +static_assert( + kMinMmapAlloc % kHugePageSize == 0, + "Minimum mmap allocation size is not a multiple of the huge page size"); + +enum class AllocationAccess { + kHot, + kCold, +}; + +inline AllocationAccess AccessFromPointer(void* ptr) { + if (!kHasExpandedClasses) { + TC_ASSERT_NE(GetMemoryTag(ptr), MemoryTag::kCold); + return AllocationAccess::kHot; + } + + return ABSL_PREDICT_FALSE(GetMemoryTag(ptr) == MemoryTag::kCold) + ? AllocationAccess::kCold + : AllocationAccess::kHot; +} inline MemoryTag NumaNormalTag(size_t numa_partition) { switch (numa_partition) { @@ -316,206 +279,29 @@ inline size_t NumaPartitionFromPointer(void* ptr) { } } -// Size-class information + mapping -class SizeMap { - public: - // All size classes <= 512 in all configs always have 1 page spans. - static constexpr size_t kMultiPageSize = 512; - // Min alignment for all size classes > kMultiPageSize in all configs. - static constexpr size_t kMultiPageAlignment = 64; - // log2 (kMultiPageAlignment) - static constexpr size_t kMultiPageAlignmentShift = - absl::bit_width(kMultiPageAlignment - 1u); - - private: - //------------------------------------------------------------------- - // Mapping from size to size_class and vice versa - //------------------------------------------------------------------- - - // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an - // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. - // So for these larger sizes we have an array indexed by ceil(size/128). - // - // We flatten both logical arrays into one physical array and use - // arithmetic to compute an appropriate index. The constants used by - // ClassIndex() were selected to make the flattening work. - // - // Examples: - // Size Expression Index - // ------------------------------------------------------- - // 0 (0 + 7) / 8 0 - // 1 (1 + 7) / 8 1 - // ... - // 1024 (1024 + 7) / 8 128 - // 1025 (1025 + 127 + (120<<7)) / 128 129 - // ... - // 32768 (32768 + 127 + (120<<7)) / 128 376 - static constexpr int kMaxSmallSize = 1024; - static constexpr size_t kClassArraySize = - ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; - - // Batch size is the number of objects to move at once. - typedef unsigned char BatchSize; - - // class_array_ is accessed on every malloc, so is very hot. We make it the - // first member so that it inherits the overall alignment of a SizeMap - // instance. In particular, if we create a SizeMap instance that's cache-line - // aligned, this member is also aligned to the width of a cache line. - CompactSizeClass - class_array_[kClassArraySize * (kHasExpandedClasses ? 2 : 1)] = {0}; - - // Number of objects to move between a per-thread list and a central - // list in one shot. We want this to be not too small so we can - // amortize the lock overhead for accessing the central list. Making - // it too big may temporarily cause unnecessary memory wastage in the - // per-thread free list until the scavenger cleans up the list. - BatchSize num_objects_to_move_[kNumClasses] = {0}; - - // If size is no more than kMaxSize, compute index of the - // class_array[] entry for it, putting the class index in output - // parameter idx and returning true. Otherwise return false. - static inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE - ClassIndexMaybe(size_t s, uint32_t* idx) { - if (ABSL_PREDICT_TRUE(s <= kMaxSmallSize)) { - *idx = (static_cast(s) + 7) >> 3; - return true; - } else if (s <= kMaxSize) { - *idx = (static_cast(s) + 127 + (120 << 7)) >> 7; - return true; - } - return false; - } - - static inline size_t ClassIndex(size_t s) { - uint32_t ret; - CHECK_CONDITION(ClassIndexMaybe(s, &ret)); - return ret; - } - - // Mapping from size class to number of pages to allocate at a time - unsigned char class_to_pages_[kNumClasses] = {0}; - - // Mapping from size class to max size storable in that class - uint32_t class_to_size_[kNumClasses] = {0}; - - // If environment variable defined, use it to override sizes classes. - // Returns true if all classes defined correctly. - bool MaybeRunTimeSizeClasses(); - - protected: - // Set the give size classes to be used by TCMalloc. - void SetSizeClasses(int num_classes, const SizeClassInfo* parsed); - - // Check that the size classes meet all requirements. - bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed); - - // Definition of size class that is set in size_classes.cc - static const SizeClassInfo kSizeClasses[]; - static const int kSizeClassesCount; - - static const SizeClassInfo kExperimentalPow2Below64SizeClasses[]; - static const int kExperimentalPow2Below64SizeClassesCount; - // kExperimentalPowBelow64SizeClassesCount - static const SizeClassInfo kExperimentalPow2SizeClasses[]; - static const int kExperimentalPow2SizeClassesCount; - - // Definition of size class that is set in size_classes.cc - static const SizeClassInfo kLegacySizeClasses[]; - static const int kLegacySizeClassesCount; +// Linker initialized, so this lock can be accessed at any time. +// Note: `CpuCache::ResizeInfo::lock` must be taken before the `pageheap_lock` +// if both are going to be held simultaneously. +extern absl::base_internal::SpinLock pageheap_lock; +class ABSL_SCOPED_LOCKABLE PageHeapSpinLockHolder { public: - // constexpr constructor to guarantee zero-initialization at compile-time. We - // rely on Init() to populate things. - constexpr SizeMap() = default; - - // Initialize the mapping arrays - void Init(); - - // Returns the size class for size `size` respecting the alignment - // requirements of `policy`. - // - // Returns true on success. Returns false if either: - // - the size exceeds the maximum size class size. - // - the align size is greater or equal to the default page size - // - no matching properly aligned size class is available - // - // Requires that policy.align() returns a non-zero power of 2. - // - // When policy.align() = 1 the default alignment of the size table will be - // used. If policy.align() is constexpr 1 (e.g. when using - // DefaultAlignPolicy) then alignment-related code will optimize away. - // - // TODO(b/171978365): Replace the output parameter with returning - // absl::optional. - template - inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(Policy policy, - size_t size, - uint32_t* cl) { - const size_t align = policy.align(); - ASSERT(absl::has_single_bit(align)); - - if (ABSL_PREDICT_FALSE(align >= kPageSize)) { - // TODO(b/172060547): Consider changing this to align > kPageSize. - ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); - return false; - } - - uint32_t idx; - if (ABSL_PREDICT_FALSE(!ClassIndexMaybe(size, &idx))) { - ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); - return false; - } - *cl = class_array_[idx] + policy.scaled_numa_partition(); - - // Predict that size aligned allocs most often directly map to a proper - // size class, i.e., multiples of 32, 64, etc, matching our class sizes. - const size_t mask = (align - 1); - do { - if (ABSL_PREDICT_TRUE((class_to_size(*cl) & mask) == 0)) { - return true; - } - } while ((++*cl % kNumBaseClasses) != 0); - - ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); - return false; - } + PageHeapSpinLockHolder() + ABSL_EXCLUSIVE_LOCK_FUNCTION(pageheap_lock) = default; + ~PageHeapSpinLockHolder() ABSL_UNLOCK_FUNCTION() = default; - // Returns size class for given size, or 0 if this instance has not been - // initialized yet. REQUIRES: size <= kMaxSize. - template - inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(Policy policy, - size_t size) { - ASSERT(size <= kMaxSize); - uint32_t ret = 0; - GetSizeClass(policy, size, &ret); - return ret; - } - - // Get the byte-size for a specified class. REQUIRES: cl <= kNumClasses. - inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE class_to_size(size_t cl) { - ASSERT(cl < kNumClasses); - return class_to_size_[cl]; - } - - // Mapping from size class to number of pages to allocate at a time - inline size_t class_to_pages(size_t cl) { - ASSERT(cl < kNumClasses); - return class_to_pages_[cl]; - } - - // Number of objects to move between a per-thread list and a central - // list in one shot. We want this to be not too small so we can - // amortize the lock overhead for accessing the central list. Making - // it too big may temporarily cause unnecessary memory wastage in the - // per-thread free list until the scavenger cleans up the list. - inline SizeMap::BatchSize num_objects_to_move(size_t cl) { - ASSERT(cl < kNumClasses); - return num_objects_to_move_[cl]; - } + private: + AllocationGuardSpinLockHolder lock_{&pageheap_lock}; }; -// Linker initialized, so this lock can be accessed at any time. -extern absl::base_internal::SpinLock pageheap_lock; +// Evaluates a/b, avoiding division by zero. +inline double safe_div(double a, double b) { + if (b == 0) { + return 0.; + } else { + return a / b; + } +} } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/copts.bzl b/contrib/libs/tcmalloc/tcmalloc/copts.bzl new file mode 100644 index 000000000000..81cfe8fa3367 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/copts.bzl @@ -0,0 +1,49 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This package provides default compiler warning flags for the OSS release""" + +TCMALLOC_LLVM_FLAGS = [ + # Ensure TCMalloc itself builds without errors, even if its dependencies + # aren't necessarily -Werror clean. + "-Werror", + "-Wno-deprecated-declarations", + "-Wno-deprecated-volatile", + "-Wno-implicit-int-float-conversion", + "-Wno-sign-compare", + "-Wno-uninitialized", + "-Wno-unused-function", + "-Wno-unused-variable", +] + +TCMALLOC_GCC_FLAGS = [ + # Ensure TCMalloc itself builds without errors, even if its dependencies + # aren't necessarily -Werror clean. + "-Werror", + "-Wno-array-bounds", + "-Wno-attribute-alias", + "-Wno-deprecated-declarations", + "-Wno-sign-compare", + "-Wno-stringop-overflow", + "-Wno-uninitialized", + "-Wno-unused-function", + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 + "-Wno-unused-result", + "-Wno-unused-variable", +] + +TCMALLOC_DEFAULT_COPTS = select({ + "//tcmalloc:llvm": TCMALLOC_LLVM_FLAGS, + "//conditions:default": TCMALLOC_GCC_FLAGS, +}) diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc index 8ae02b38e9ce..15c7e65f3daf 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc @@ -15,1090 +15,31 @@ #include "tcmalloc/cpu_cache.h" #include -#include -#include -#include +#include +#include -#include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/spinlock.h" -#include "absl/base/internal/sysinfo.h" -#include "absl/base/macros.h" -#include "absl/base/thread_annotations.h" -#include "absl/container/fixed_array.h" -#include "tcmalloc/arena.h" -#include "tcmalloc/common.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" #include "tcmalloc/internal_malloc_extension.h" #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" -#include "tcmalloc/transfer_cache.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -static cpu_set_t FillActiveCpuMask() { - cpu_set_t allowed_cpus; - if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) { - CPU_ZERO(&allowed_cpus); - } - -#ifdef PERCPU_USE_RSEQ - const bool real_cpus = !subtle::percpu::UsingFlatVirtualCpus(); -#else - const bool real_cpus = true; -#endif - - if (real_cpus) { - return allowed_cpus; - } - - const int virtual_cpu_count = CPU_COUNT(&allowed_cpus); - CPU_ZERO(&allowed_cpus); - for (int cpu = 0; cpu < virtual_cpu_count; ++cpu) { - CPU_SET(cpu, &allowed_cpus); - } - return allowed_cpus; -} - -// MaxCapacity() determines how we distribute memory in the per-cpu cache -// to the various class sizes. -static size_t MaxCapacity(size_t cl) { - // The number of size classes that are commonly used and thus should be - // allocated more slots in the per-cpu cache. - static constexpr size_t kNumSmall = 10; - - // The memory used for each per-CPU slab is the sum of: - // sizeof(std::atomic) * kNumClasses - // sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall - // sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge - // - // Class size 0 has MaxCapacity() == 0, which is the reason for using - // kNumClasses - 1 above instead of kNumClasses. - // - // Each Size class region in the slab is preceded by one padding pointer that - // points to itself, because prefetch instructions of invalid pointers are - // slow. That is accounted for by the +1 for object depths. -#if defined(TCMALLOC_SMALL_BUT_SLOW) - // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we - // allocate: - // == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096 - static const uint16_t kSmallObjectDepth = 16; - static const uint16_t kLargeObjectDepth = 6; -#else - // We allocate 256KiB per-cpu for pointers to cached per-cpu memory. - // Each 256KiB is a subtle::percpu::TcmallocSlab::Slabs - // Max(kNumClasses) is 89, so the maximum footprint per CPU is: - // 89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78 + 88) = 254 KiB - static const uint16_t kSmallObjectDepth = 2048; - static const uint16_t kLargeObjectDepth = 152; -#endif - if (cl == 0 || cl >= kNumClasses) return 0; - - if (Static::sharded_transfer_cache().should_use(cl)) { - return 0; - } - - if (Static::sizemap().class_to_size(cl) == 0) { - return 0; - } - - if (!IsExpandedSizeClass(cl) && (cl % kNumBaseClasses) <= kNumSmall) { - // Small object sizes are very heavily used and need very deep caches for - // good performance (well over 90% of malloc calls are for cl <= 10.) - return kSmallObjectDepth; - } - - if (IsExpandedSizeClass(cl)) { - return 0; - } - - return kLargeObjectDepth; -} - -static void *SlabAlloc(size_t size) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - return Static::arena().Alloc(size); -} - -void CPUCache::Activate(ActivationMode mode) { - ASSERT(Static::IsInited()); - int num_cpus = absl::base_internal::NumCPUs(); - - size_t per_cpu_shift = kPerCpuShift; - const auto &topology = Static::numa_topology(); - if (topology.numa_aware()) { - per_cpu_shift += absl::bit_ceil(topology.active_partitions() - 1); - } - - const size_t kBytesAvailable = (1 << per_cpu_shift); - size_t bytes_required = sizeof(std::atomic) * kNumClasses; - - // Deal with size classes that correspond only to NUMA partitions that are in - // use. If NUMA awareness is disabled then we may have a smaller shift than - // would suffice for all of the unused size classes. - for (int cl = 0; - cl < Static::numa_topology().active_partitions() * kNumBaseClasses; - ++cl) { - const uint16_t mc = MaxCapacity(cl); - max_capacity_[cl] = mc; - bytes_required += sizeof(void *) * mc; - } - - // Deal with expanded size classes. - for (int cl = kExpandedClassesStart; cl < kNumClasses; ++cl) { - const uint16_t mc = MaxCapacity(cl); - max_capacity_[cl] = mc; - bytes_required += sizeof(void *) * mc; - } - - // As we may make certain size classes no-ops by selecting "0" at runtime, - // using a compile-time calculation overestimates the worst-case memory usage. - if (ABSL_PREDICT_FALSE(bytes_required > kBytesAvailable)) { - Crash(kCrash, __FILE__, __LINE__, "per-CPU memory exceeded, have ", - kBytesAvailable, " need ", bytes_required); - } - - absl::base_internal::SpinLockHolder h(&pageheap_lock); - - resize_ = reinterpret_cast( - Static::arena().Alloc(sizeof(ResizeInfo) * num_cpus)); - lazy_slabs_ = Parameters::lazy_per_cpu_caches(); - - auto max_cache_size = Parameters::max_per_cpu_cache_size(); - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - for (int cl = 1; cl < kNumClasses; ++cl) { - resize_[cpu].per_class[cl].Init(); - } - resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed); - resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed); - resize_[cpu].last_steal.store(1, std::memory_order_relaxed); - } - - freelist_.Init(SlabAlloc, MaxCapacityHelper, lazy_slabs_, per_cpu_shift); - if (mode == ActivationMode::FastPathOn) { - Static::ActivateCPUCache(); - } -} - -// Fetch more items from the central cache, refill our local cache, -// and try to grow it if necessary. -// -// This is complicated by the fact that we can only tweak the cache on -// our current CPU and we might get migrated whenever (in fact, we -// might already have been migrated since failing to get memory...) -// -// So make sure only to make changes to one CPU's cache; at all times, -// it must be safe to find ourselves migrated (at which point we atomically -// return memory to the correct CPU.) -void *CPUCache::Refill(int cpu, size_t cl) { - const size_t batch_length = Static::sizemap().num_objects_to_move(cl); - - // UpdateCapacity can evict objects from other size classes as it tries to - // increase capacity of this size class. The objects are returned in - // to_return, we insert them into transfer cache at the end of function - // (to increase possibility that we stay on the current CPU as we are - // refilling the list). - ObjectsToReturn to_return; - const size_t target = - UpdateCapacity(cpu, cl, batch_length, false, &to_return); - - // Refill target objects in batch_length batches. - size_t total = 0; - size_t got; - size_t i; - void *result = nullptr; - void *batch[kMaxObjectsToMove]; - do { - const size_t want = std::min(batch_length, target - total); - got = Static::transfer_cache().RemoveRange(cl, batch, want); - if (got == 0) { - break; - } - total += got; - i = got; - if (result == nullptr) { - i--; - result = batch[i]; - } - if (i) { - i -= freelist_.PushBatch(cl, batch, i); - if (i != 0) { - static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, - "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, absl::Span(batch, i)); - } - } - } while (got == batch_length && i == 0 && total < target && - cpu == freelist_.GetCurrentVirtualCpuUnsafe()); - - for (int i = to_return.count; i < kMaxToReturn; ++i) { - Static::transfer_cache().InsertRange( - to_return.cl[i], absl::Span(&(to_return.obj[i]), 1)); - } - - return result; -} - -size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length, - bool overflow, ObjectsToReturn *to_return) { - // Freelist size balancing strategy: - // - We grow a size class only on overflow/underflow. - // - We shrink size classes in Steal as it scans all size classes. - // - If overflows/underflows happen on a size class, we want to grow its - // capacity to at least 2 * batch_length. It enables usage of the - // transfer cache and leaves the list half-full after we insert/remove - // a batch from the transfer cache. - // - We increase capacity beyond 2 * batch_length only when an overflow is - // followed by an underflow. That's the only case when we could benefit - // from larger capacity -- the overflow and the underflow would collapse. - // - // Note: we can't understand when we have a perfectly-sized list, because for - // a perfectly-sized list we don't hit any slow paths which looks the same as - // inactive list. Eventually we will shrink a perfectly-sized list a bit and - // then it will grow back. This won't happen very frequently for the most - // important small sizes, because we will need several ticks before we shrink - // it again. Also we will shrink it by 1, but grow by a batch. So we should - // have lots of time until we need to grow it again. - - const size_t max_capacity = max_capacity_[cl]; - size_t capacity = freelist_.Capacity(cpu, cl); - // We assert that the return value, target, is non-zero, so starting from an - // initial capacity of zero means we may be populating this core for the - // first time. - absl::base_internal::LowLevelCallOnce( - &resize_[cpu].initialized, - [](CPUCache *cache, int cpu) { - if (cache->lazy_slabs_) { - absl::base_internal::SpinLockHolder h(&cache->resize_[cpu].lock); - cache->freelist_.InitCPU(cpu, MaxCapacityHelper); - } - - // While we could unconditionally store, a lazy slab population - // implementation will require evaluating a branch. - cache->resize_[cpu].populated.store(true, std::memory_order_relaxed); - }, - this, cpu); - const bool grow_by_one = capacity < 2 * batch_length; - uint32_t successive = 0; - bool grow_by_batch = - resize_[cpu].per_class[cl].Update(overflow, grow_by_one, &successive); - if ((grow_by_one || grow_by_batch) && capacity != max_capacity) { - size_t increase = 1; - if (grow_by_batch) { - increase = std::min(batch_length, max_capacity - capacity); - } else if (!overflow && capacity < batch_length) { - // On underflow we want to grow to at least batch size, because that's - // what we want to request from transfer cache. - increase = batch_length - capacity; - } - Grow(cpu, cl, increase, to_return); - capacity = freelist_.Capacity(cpu, cl); - } - // Calculate number of objects to return/request from transfer cache. - // Generally we prefer to transfer a single batch, because transfer cache - // handles it efficiently. Except for 2 special cases: - size_t target = batch_length; - // "capacity + 1" because on overflow we already have one object from caller, - // so we can return a whole batch even if capacity is one less. Similarly, - // on underflow we need to return one object to caller, so we can request - // a whole batch even if capacity is one less. - if ((capacity + 1) < batch_length) { - // If we don't have a full batch, return/request just half. We are missing - // transfer cache anyway, and cost of insertion into central freelist is - // ~O(number of objects). - target = std::max(1, (capacity + 1) / 2); - } else if (successive > 0 && capacity >= 3 * batch_length) { - // If the freelist is large and we are hitting series of overflows or - // underflows, return/request several batches at once. On the first overflow - // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to - // half of the batches we have. We do this to save on the cost of hitting - // malloc/free slow path, reduce instruction cache pollution, avoid cache - // misses when accessing transfer/central caches, etc. - size_t num_batches = - std::min(1 << std::min(successive, 10), - ((capacity / batch_length) + 1) / 2); - target = num_batches * batch_length; - } - ASSERT(target != 0); - return target; -} - -void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase, - ObjectsToReturn *to_return) { - const size_t size = Static::sizemap().class_to_size(cl); - const size_t desired_bytes = desired_increase * size; - size_t acquired_bytes; - - // First, there might be unreserved slack. Take what we can. - size_t before, after; - do { - before = resize_[cpu].available.load(std::memory_order_relaxed); - acquired_bytes = std::min(before, desired_bytes); - after = before - acquired_bytes; - } while (!resize_[cpu].available.compare_exchange_strong( - before, after, std::memory_order_relaxed, std::memory_order_relaxed)); - - if (acquired_bytes < desired_bytes) { - acquired_bytes += Steal(cpu, cl, desired_bytes - acquired_bytes, to_return); - } - - // We have all the memory we could reserve. Time to actually do the growth. - - // We might have gotten more than we wanted (stealing from larger sizeclasses) - // so don't grow _too_ much. - size_t actual_increase = acquired_bytes / size; - actual_increase = std::min(actual_increase, desired_increase); - // Remember, Grow may not give us all we ask for. - size_t increase = freelist_.Grow(cpu, cl, actual_increase, max_capacity_[cl]); - size_t increased_bytes = increase * size; - if (increased_bytes < acquired_bytes) { - // return whatever we didn't use to the slack. - size_t unused = acquired_bytes - increased_bytes; - resize_[cpu].available.fetch_add(unused, std::memory_order_relaxed); - } -} - -void CPUCache::TryReclaimingCaches() { - const int num_cpus = absl::base_internal::NumCPUs(); - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - // Nothing to reclaim if the cpu is not populated. - if (!HasPopulated(cpu)) { - continue; - } - - uint64_t used_bytes = UsedBytes(cpu); - uint64_t prev_used_bytes = - resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed); - - // Get reclaim miss and used bytes stats that were captured at the end of - // the previous interval. - const CpuCacheMissStats miss_stats = GetReclaimCacheMissStats(cpu); - uint64_t misses = - uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}; - - // Reclaim the cache if the number of used bytes and total number of misses - // stayed constant since the last interval. - if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) { - Reclaim(cpu); - } - - // Takes a snapshot of used bytes in the cache at the end of this interval - // so that we can calculate if cache usage changed in the next interval. - // - // Reclaim occurs on a single thread. So, the relaxed store to used_bytes - // is safe. - resize_[cpu].reclaim_used_bytes.store(used_bytes, - std::memory_order_relaxed); - } -} - -void CPUCache::ShuffleCpuCaches() { - // Knobs that we can potentially tune depending on the workloads. - constexpr double kBytesToStealPercent = 5.0; - constexpr int kMaxNumStealCpus = 5; - - const int num_cpus = absl::base_internal::NumCPUs(); - absl::FixedArray> misses(num_cpus); - - // Record the cumulative misses for the caches so that we can select the - // caches with the highest misses as the candidates to steal the cache for. - int max_populated_cpu = -1; - int num_populated_cpus = 0; - for (int cpu = 0; cpu < num_cpus; ++cpu) { - if (!HasPopulated(cpu)) { - continue; - } - const CpuCacheMissStats miss_stats = GetIntervalCacheMissStats(cpu); - misses[num_populated_cpus] = { - cpu, uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}}; - max_populated_cpu = cpu; - ++num_populated_cpus; - } - if (max_populated_cpu == -1) { - return; - } - - // Sorts misses to identify cpus with highest misses. - // - // TODO(vgogte): We can potentially sort the entire misses array and use that - // in StealFromOtherCache to determine cpus to steal from. That is, [0, - // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus) - // may be cpus we may steal from. We can iterate through the array in a - // descending order to steal from them. The upside of this mechanism is that - // we would be able to do a more fair stealing, starting with cpus with lowest - // misses. The downside of this mechanism is that we would have to sort the - // entire misses array. This might be compute intensive on servers with high - // number of cpus (eg. Rome, Milan). We need to investigate the compute - // required to implement this. - const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus); - std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus, - misses.end(), - [](std::pair a, std::pair b) { - if (a.second == b.second) { - return a.first < b.first; - } - return a.second > b.second; - }); - - // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for - // each destination cpu cache. - size_t to_steal = - kBytesToStealPercent / 100.0 * Parameters::max_per_cpu_cache_size(); - for (int i = 0; i < num_dest_cpus; ++i) { - StealFromOtherCache(misses[i].first, max_populated_cpu, to_steal); - } - - // Takes a snapshot of underflows and overflows at the end of this interval - // so that we can calculate the misses that occurred in the next interval. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - size_t underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - - // Shuffle occurs on a single thread. So, the relaxed stores to - // prev_underflow and pre_overflow counters are safe. - resize_[cpu].shuffle_underflows.store(underflows, - std::memory_order_relaxed); - resize_[cpu].shuffle_overflows.store(overflows, std::memory_order_relaxed); - } -} - -static void ShrinkHandler(void *arg, size_t cl, void **batch, size_t count) { - const size_t batch_length = Static::sizemap().num_objects_to_move(cl); - for (size_t i = 0; i < count; i += batch_length) { - size_t n = std::min(batch_length, count - i); - Static::transfer_cache().InsertRange(cl, absl::Span(batch + i, n)); - } -} - -void CPUCache::StealFromOtherCache(int cpu, int max_populated_cpu, - size_t bytes) { - constexpr double kCacheMissThreshold = 0.80; - - const CpuCacheMissStats dest_misses = GetIntervalCacheMissStats(cpu); - - // If both underflows and overflows are 0, we should not need to steal. - if (dest_misses.underflows == 0 && dest_misses.overflows == 0) return; - - size_t acquired = 0; - - // We use last_cpu_cache_steal_ as a hint to start our search for cpu ids to - // steal from so that we can iterate through the cpus in a nice round-robin - // fashion. - int src_cpu = std::min(last_cpu_cache_steal_.load(std::memory_order_relaxed), - max_populated_cpu); - - // We iterate through max_populate_cpus number of cpus to steal from. - // max_populate_cpus records the max cpu id that has been populated. Note - // that, any intermediate changes since the max_populated_cpus was measured - // may have populated higher cpu ids, but we do not include those in the - // search. The approximation prevents us from doing another pass through the - // cpus to just find the latest populated cpu id. - // - // We break from the loop once we iterate through all the cpus once, or if the - // total number of acquired bytes is higher than or equal to the desired bytes - // we want to steal. - for (int cpu_offset = 1; cpu_offset <= max_populated_cpu && acquired < bytes; - ++cpu_offset) { - if (--src_cpu < 0) { - src_cpu = max_populated_cpu; - } - ASSERT(0 <= src_cpu); - ASSERT(src_cpu <= max_populated_cpu); - - // We do not steal from the same CPU. Maybe we can explore combining this - // with stealing from the same CPU later. - if (src_cpu == cpu) continue; - - // We do not steal from the cache that hasn't been populated yet. - if (!HasPopulated(src_cpu)) continue; - - // We do not steal from cache that has capacity less than our lower - // capacity threshold. - if (Capacity(src_cpu) < - kCacheCapacityThreshold * Parameters::max_per_cpu_cache_size()) - continue; - - const CpuCacheMissStats src_misses = GetIntervalCacheMissStats(src_cpu); - - // If underflows and overflows from the source cpu are higher, we do not - // steal from that cache. We consider the cache as a candidate to steal from - // only when its misses are lower than 0.8x that of the dest cache. - if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows || - src_misses.overflows > kCacheMissThreshold * dest_misses.overflows) - continue; - - size_t start_cl = - resize_[src_cpu].last_steal.load(std::memory_order_relaxed); - - ASSERT(start_cl < kNumClasses); - ASSERT(0 < start_cl); - size_t source_cl = start_cl; - for (size_t offset = 1; offset < kNumClasses; ++offset) { - source_cl = start_cl + offset; - if (source_cl >= kNumClasses) { - source_cl -= kNumClasses - 1; - } - ASSERT(0 < source_cl); - ASSERT(source_cl < kNumClasses); - - const size_t capacity = freelist_.Capacity(src_cpu, source_cl); - if (capacity == 0) { - // Nothing to steal. - continue; - } - const size_t length = freelist_.Length(src_cpu, source_cl); - - // TODO(vgogte): Currently, scoring is similar to stealing from the - // same cpu in CpuCache::Steal(). Revisit this later to tune the - // knobs. - const size_t batch_length = - Static::sizemap().num_objects_to_move(source_cl); - size_t size = Static::sizemap().class_to_size(source_cl); - - // Clock-like algorithm to prioritize size classes for shrinking. - // - // Each size class has quiescent ticks counter which is incremented as we - // pass it, the counter is reset to 0 in UpdateCapacity on grow. - // If the counter value is 0, then we've just tried to grow the size - // class, so it makes little sense to shrink it back. The higher counter - // value the longer ago we grew the list and the more probable it is that - // the full capacity is unused. - // - // Then, we calculate "shrinking score", the higher the score the less we - // we want to shrink this size class. The score is considerably skewed - // towards larger size classes: smaller classes are usually used more - // actively and we also benefit less from shrinking smaller classes (steal - // less capacity). Then, we also avoid shrinking full freelists as we will - // need to evict an object and then go to the central freelist to return - // it. Then, we also avoid shrinking freelists that are just above batch - // size, because shrinking them will disable transfer cache. - // - // Finally, we shrink if the ticks counter is >= the score. - uint32_t qticks = resize_[src_cpu].per_class[source_cl].Tick(); - uint32_t score = 0; - // Note: the following numbers are based solely on intuition, common sense - // and benchmarking results. - if (size <= 144) { - score = 2 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= 1024) { - score = 1 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= (64 << 10)) { - score = (length >= capacity); - } - if (score > qticks) { - continue; - } - - // Finally, try to shrink (can fail if we were migrated). - // We always shrink by 1 object. The idea is that inactive lists will be - // shrunk to zero eventually anyway (or they just would not grow in the - // first place), but for active lists it does not make sense to - // aggressively shuffle capacity all the time. - // - // If the list is full, ShrinkOtherCache first tries to pop enough items - // to make space and then shrinks the capacity. - // TODO(vgogte): Maybe we can steal more from a single list to avoid - // frequent locking overhead. - { - absl::base_internal::SpinLockHolder h(&resize_[src_cpu].lock); - if (freelist_.ShrinkOtherCache(src_cpu, source_cl, 1, nullptr, - ShrinkHandler) == 1) { - acquired += size; - resize_[src_cpu].capacity.fetch_sub(size, std::memory_order_relaxed); - } - } - - if (acquired >= bytes) { - break; - } - } - resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed); - } - // Record the last cpu id we stole from, which would provide a hint to the - // next time we iterate through the cpus for stealing. - last_cpu_cache_steal_.store(src_cpu, std::memory_order_relaxed); - - // Increment the capacity of the destination cpu cache by the amount of bytes - // acquired from source caches. - if (acquired) { - size_t before = resize_[cpu].available.load(std::memory_order_relaxed); - size_t bytes_with_stolen; - do { - bytes_with_stolen = before + acquired; - } while (!resize_[cpu].available.compare_exchange_weak( - before, bytes_with_stolen, std::memory_order_relaxed, - std::memory_order_relaxed)); - resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed); - } -} - -// There are rather a lot of policy knobs we could tweak here. -size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes, - ObjectsToReturn *to_return) { - // Steal from other sizeclasses. Try to go in a nice circle. - // Complicated by sizeclasses actually being 1-indexed. - size_t acquired = 0; - size_t start = resize_[cpu].last_steal.load(std::memory_order_relaxed); - ASSERT(start < kNumClasses); - ASSERT(0 < start); - size_t source_cl = start; - for (size_t offset = 1; offset < kNumClasses; ++offset) { - source_cl = start + offset; - if (source_cl >= kNumClasses) { - source_cl -= kNumClasses - 1; - } - ASSERT(0 < source_cl); - ASSERT(source_cl < kNumClasses); - // Decide if we want to steal source_cl. - if (source_cl == dest_cl) { - // First, no sense in picking your own pocket. - continue; - } - const size_t capacity = freelist_.Capacity(cpu, source_cl); - if (capacity == 0) { - // Nothing to steal. - continue; - } - const size_t length = freelist_.Length(cpu, source_cl); - const size_t batch_length = - Static::sizemap().num_objects_to_move(source_cl); - size_t size = Static::sizemap().class_to_size(source_cl); - - // Clock-like algorithm to prioritize size classes for shrinking. - // - // Each size class has quiescent ticks counter which is incremented as we - // pass it, the counter is reset to 0 in UpdateCapacity on grow. - // If the counter value is 0, then we've just tried to grow the size class, - // so it makes little sense to shrink it back. The higher counter value - // the longer ago we grew the list and the more probable it is that - // the full capacity is unused. - // - // Then, we calculate "shrinking score", the higher the score the less we - // we want to shrink this size class. The score is considerably skewed - // towards larger size classes: smaller classes are usually used more - // actively and we also benefit less from shrinking smaller classes (steal - // less capacity). Then, we also avoid shrinking full freelists as we will - // need to evict an object and then go to the central freelist to return it. - // Then, we also avoid shrinking freelists that are just above batch size, - // because shrinking them will disable transfer cache. - // - // Finally, we shrink if the ticks counter is >= the score. - uint32_t qticks = resize_[cpu].per_class[source_cl].Tick(); - uint32_t score = 0; - // Note: the following numbers are based solely on intuition, common sense - // and benchmarking results. - if (size <= 144) { - score = 2 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= 1024) { - score = 1 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= (64 << 10)) { - score = (length >= capacity); - } - if (score > qticks) { - continue; - } - - if (length >= capacity) { - // The list is full, need to evict an object to shrink it. - if (to_return == nullptr) { - continue; - } - if (to_return->count == 0) { - // Can't steal any more because the to_return set is full. - break; - } - void *obj = freelist_.Pop(source_cl, NoopUnderflow); - if (obj) { - --to_return->count; - to_return->cl[to_return->count] = source_cl; - to_return->obj[to_return->count] = obj; - } - } - - // Finally, try to shrink (can fail if we were migrated). - // We always shrink by 1 object. The idea is that inactive lists will be - // shrunk to zero eventually anyway (or they just would not grow in the - // first place), but for active lists it does not make sense to aggressively - // shuffle capacity all the time. - if (freelist_.Shrink(cpu, source_cl, 1) == 1) { - acquired += size; - } - - if (cpu != freelist_.GetCurrentVirtualCpuUnsafe() || acquired >= bytes) { - // can't steal any more or don't need to - break; - } - } - // update the hint - resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed); - return acquired; -} - -int CPUCache::Overflow(void *ptr, size_t cl, int cpu) { - const size_t batch_length = Static::sizemap().num_objects_to_move(cl); - const size_t target = UpdateCapacity(cpu, cl, batch_length, true, nullptr); - // Return target objects in batch_length batches. - size_t total = 0; - size_t count = 1; - void *batch[kMaxObjectsToMove]; - batch[0] = ptr; - do { - size_t want = std::min(batch_length, target - total); - if (count < want) { - count += freelist_.PopBatch(cl, batch + count, want - count); - } - if (!count) break; - - total += count; - static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, - "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, absl::Span(batch, count)); - if (count != batch_length) break; - count = 0; - } while (total < target && cpu == freelist_.GetCurrentVirtualCpuUnsafe()); - tracking::Report(kFreeTruncations, cl, 1); - return 1; -} - -uint64_t CPUCache::Allocated(int target_cpu) const { - ASSERT(target_cpu >= 0); - if (!HasPopulated(target_cpu)) { - return 0; - } - - uint64_t total = 0; - for (int cl = 1; cl < kNumClasses; cl++) { - int size = Static::sizemap().class_to_size(cl); - total += size * freelist_.Capacity(target_cpu, cl); - } - return total; -} - -uint64_t CPUCache::UsedBytes(int target_cpu) const { - ASSERT(target_cpu >= 0); - if (!HasPopulated(target_cpu)) { - return 0; - } - - uint64_t total = 0; - for (int cl = 1; cl < kNumClasses; cl++) { - int size = Static::sizemap().class_to_size(cl); - total += size * freelist_.Length(target_cpu, cl); - } - return total; -} - -bool CPUCache::HasPopulated(int target_cpu) const { - ASSERT(target_cpu >= 0); - return resize_[target_cpu].populated.load(std::memory_order_relaxed); -} - -PerCPUMetadataState CPUCache::MetadataMemoryUsage() const { - return freelist_.MetadataMemoryUsage(); -} - -uint64_t CPUCache::TotalUsedBytes() const { - uint64_t total = 0; - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - total += UsedBytes(cpu); - } - return total; -} - -uint64_t CPUCache::TotalObjectsOfClass(size_t cl) const { - ASSERT(cl < kNumClasses); - uint64_t total_objects = 0; - if (cl > 0) { - for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; cpu++) { - if (!HasPopulated(cpu)) { - continue; - } - total_objects += freelist_.Length(cpu, cl); - } - } - return total_objects; -} - -uint64_t CPUCache::Unallocated(int cpu) const { - return resize_[cpu].available.load(std::memory_order_relaxed); -} - -uint64_t CPUCache::Capacity(int cpu) const { - return resize_[cpu].capacity.load(std::memory_order_relaxed); -} - -uint64_t CPUCache::CacheLimit() const { - return Parameters::max_per_cpu_cache_size(); -} - -struct DrainContext { - std::atomic *available; - uint64_t bytes; -}; - -static void DrainHandler(void *arg, size_t cl, void **batch, size_t count, - size_t cap) { - DrainContext *ctx = static_cast(arg); - const size_t size = Static::sizemap().class_to_size(cl); - const size_t batch_length = Static::sizemap().num_objects_to_move(cl); - ctx->bytes += count * size; - // Drain resets capacity to 0, so return the allocated capacity to that - // CPU's slack. - ctx->available->fetch_add(cap * size, std::memory_order_relaxed); - for (size_t i = 0; i < count; i += batch_length) { - size_t n = std::min(batch_length, count - i); - Static::transfer_cache().InsertRange(cl, absl::Span(batch + i, n)); - } -} - -uint64_t CPUCache::Reclaim(int cpu) { - absl::base_internal::SpinLockHolder h(&resize_[cpu].lock); - - // If we haven't populated this core, freelist_.Drain() will touch the memory - // (for writing) as part of its locking process. Avoid faulting new pages as - // part of a release process. - if (!resize_[cpu].populated.load(std::memory_order_relaxed)) { - return 0; - } - - DrainContext ctx{&resize_[cpu].available, 0}; - freelist_.Drain(cpu, &ctx, DrainHandler); - - // Record that the reclaim occurred for this CPU. - resize_[cpu].num_reclaims.store( - resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1, - std::memory_order_relaxed); - return ctx.bytes; -} - -uint64_t CPUCache::GetNumReclaims(int cpu) const { - return resize_[cpu].num_reclaims.load(std::memory_order_relaxed); -} - -void CPUCache::RecordCacheMissStat(const int cpu, const bool is_malloc) { - CPUCache &cpu_cache = Static::cpu_cache(); - if (is_malloc) { - cpu_cache.resize_[cpu].total_underflows.fetch_add( - 1, std::memory_order_relaxed); - } else { - cpu_cache.resize_[cpu].total_overflows.fetch_add(1, - std::memory_order_relaxed); - } -} - -CPUCache::CpuCacheMissStats CPUCache::GetReclaimCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - size_t total_underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t prev_reclaim_underflows = - resize_[cpu].reclaim_underflows.load(std::memory_order_relaxed); - // Takes a snapshot of underflows at the end of this interval so that we can - // calculate the misses that occurred in the next interval. - // - // Reclaim occurs on a single thread. So, a relaxed store to the reclaim - // underflow stat is safe. - resize_[cpu].reclaim_underflows.store(total_underflows, - std::memory_order_relaxed); - - // In case of a size_t overflow, we wrap around to 0. - stats.underflows = total_underflows > prev_reclaim_underflows - ? total_underflows - prev_reclaim_underflows - : 0; - - size_t total_overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - size_t prev_reclaim_overflows = - resize_[cpu].reclaim_overflows.load(std::memory_order_relaxed); - // Takes a snapshot of overflows at the end of this interval so that we can - // calculate the misses that occurred in the next interval. - // - // Reclaim occurs on a single thread. So, a relaxed store to the reclaim - // overflow stat is safe. - resize_[cpu].reclaim_overflows.store(total_overflows, - std::memory_order_relaxed); - - // In case of a size_t overflow, we wrap around to 0. - stats.overflows = total_overflows > prev_reclaim_overflows - ? total_overflows - prev_reclaim_overflows - : 0; - - return stats; -} - -CPUCache::CpuCacheMissStats CPUCache::GetIntervalCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - size_t total_underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t shuffle_underflows = - resize_[cpu].shuffle_underflows.load(std::memory_order_relaxed); - // In case of a size_t overflow, we wrap around to 0. - stats.underflows = total_underflows > shuffle_underflows - ? total_underflows - shuffle_underflows - : 0; - - size_t total_overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - size_t shuffle_overflows = - resize_[cpu].shuffle_overflows.load(std::memory_order_relaxed); - // In case of a size_t overflow, we wrap around to 0. - stats.overflows = total_overflows > shuffle_overflows - ? total_overflows - shuffle_overflows - : 0; - - return stats; -} - -CPUCache::CpuCacheMissStats CPUCache::GetTotalCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - stats.underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - stats.overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - return stats; -} - -void CPUCache::Print(Printer *out) const { - out->printf("------------------------------------------------\n"); - out->printf("Bytes in per-CPU caches (per cpu limit: %" PRIu64 " bytes)\n", - Static::cpu_cache().CacheLimit()); - out->printf("------------------------------------------------\n"); - - const cpu_set_t allowed_cpus = FillActiveCpuMask(); - - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - static constexpr double MiB = 1048576.0; - - uint64_t rbytes = UsedBytes(cpu); - bool populated = HasPopulated(cpu); - uint64_t unallocated = Unallocated(cpu); - out->printf("cpu %3d: %12" PRIu64 - " bytes (%7.1f MiB) with" - "%12" PRIu64 " bytes unallocated %s%s\n", - cpu, rbytes, rbytes / MiB, unallocated, - CPU_ISSET(cpu, &allowed_cpus) ? " active" : "", - populated ? " populated" : ""); - } - - out->printf("------------------------------------------------\n"); - out->printf("Number of per-CPU cache underflows, overflows and reclaims\n"); - out->printf("------------------------------------------------\n"); - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); - uint64_t reclaims = GetNumReclaims(cpu); - out->printf( - "cpu %3d:" - "%12" PRIu64 - " underflows," - "%12" PRIu64 - " overflows," - "%12" PRIu64 " reclaims\n", - cpu, miss_stats.underflows, miss_stats.overflows, reclaims); - } -} - -void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { - const cpu_set_t allowed_cpus = FillActiveCpuMask(); - - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - PbtxtRegion entry = region->CreateSubRegion("cpu_cache"); - uint64_t rbytes = UsedBytes(cpu); - bool populated = HasPopulated(cpu); - uint64_t unallocated = Unallocated(cpu); - CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); - uint64_t reclaims = GetNumReclaims(cpu); - entry.PrintI64("cpu", uint64_t(cpu)); - entry.PrintI64("used", rbytes); - entry.PrintI64("unused", unallocated); - entry.PrintBool("active", CPU_ISSET(cpu, &allowed_cpus)); - entry.PrintBool("populated", populated); - entry.PrintI64("underflows", miss_stats.underflows); - entry.PrintI64("overflows", miss_stats.overflows); - entry.PrintI64("reclaims", reclaims); - } -} - -void CPUCache::AcquireInternalLocks() { - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - resize_[cpu].lock.Lock(); - } -} - -void CPUCache::ReleaseInternalLocks() { - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - resize_[cpu].lock.Unlock(); - } -} - -void CPUCache::PerClassResizeInfo::Init() { - state_.store(0, std::memory_order_relaxed); -} - -bool CPUCache::PerClassResizeInfo::Update(bool overflow, bool grow, - uint32_t *successive) { - int32_t raw = state_.load(std::memory_order_relaxed); - State state; - memcpy(&state, &raw, sizeof(state)); - const bool overflow_then_underflow = !overflow && state.overflow; - grow |= overflow_then_underflow; - // Reset quiescent ticks for Steal clock algorithm if we are going to grow. - State new_state; - new_state.overflow = overflow; - new_state.quiescent_ticks = grow ? 0 : state.quiescent_ticks; - new_state.successive = overflow == state.overflow ? state.successive + 1 : 0; - memcpy(&raw, &new_state, sizeof(raw)); - state_.store(raw, std::memory_order_relaxed); - *successive = new_state.successive; - return overflow_then_underflow; -} - -uint32_t CPUCache::PerClassResizeInfo::Tick() { - int32_t raw = state_.load(std::memory_order_relaxed); - State state; - memcpy(&state, &raw, sizeof(state)); - state.quiescent_ticks++; - memcpy(&raw, &state, sizeof(raw)); - state_.store(raw, std::memory_order_relaxed); - return state.quiescent_ticks - 1; -} - -#ifdef ABSL_HAVE_THREAD_SANITIZER -extern "C" int RunningOnValgrind(); -#endif - -static void ActivatePerCPUCaches() { - if (tcmalloc::tcmalloc_internal::Static::CPUCacheActive()) { +static void ActivatePerCpuCaches() { + if (tcmalloc::tcmalloc_internal::tc_globals.CpuCacheActive()) { // Already active. return; } -#ifdef ABSL_HAVE_THREAD_SANITIZER - // RunningOnValgrind is a proxy for "is something intercepting malloc." - // - // If Valgrind, et. al., are in use, TCMalloc isn't in use and we shouldn't - // activate our per-CPU caches. - if (RunningOnValgrind()) { - return; - } -#endif if (Parameters::per_cpu_caches() && subtle::percpu::IsFast()) { - Static::InitIfNecessary(); - Static::cpu_cache().Activate(CPUCache::ActivationMode::FastPathOn); + tc_globals.InitIfNecessary(); + tc_globals.cpu_cache().Activate(); + tc_globals.ActivateCpuCache(); // no need for this thread cache anymore, I guess. ThreadCache::BecomeIdle(); // If there's a problem with this code, let's notice it right away: @@ -1109,7 +50,7 @@ static void ActivatePerCPUCaches() { class PerCPUInitializer { public: PerCPUInitializer() { - ActivatePerCPUCaches(); + ActivatePerCpuCaches(); } }; static PerCPUInitializer module_enter_exit; @@ -1119,16 +60,11 @@ static PerCPUInitializer module_enter_exit; GOOGLE_MALLOC_SECTION_END extern "C" void TCMalloc_Internal_ForceCpuCacheActivation() { - tcmalloc::tcmalloc_internal::ActivatePerCPUCaches(); + tcmalloc::tcmalloc_internal::ActivatePerCpuCaches(); } extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { - return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); -} - -extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { - tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); - tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); + return tcmalloc::tcmalloc_internal::tc_globals.CpuCacheActive(); } extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h index dab7d1891018..26732291cf7e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,52 +18,324 @@ #include #include +#include +#include #include +#include +#include +#include +#include +#include "absl/algorithm/container.h" #include "absl/base/attributes.h" #include "absl/base/call_once.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/cycleclock.h" #include "absl/base/internal/spinlock.h" #include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/container/fixed_array.h" +#include "absl/functional/function_ref.h" +#include "absl/time/time.h" +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/numa.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/internal/percpu.h" #include "tcmalloc/internal/percpu_tcmalloc.h" +#include "tcmalloc/internal/sysinfo.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" #include "tcmalloc/thread_cache.h" -#include "tcmalloc/tracking.h" +#include "tcmalloc/transfer_cache.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +class CpuCachePeer; -class CPUCache { +namespace cpu_cache_internal { +template +struct DrainHandler; + +// Determine number of bits we should use for allocating per-cpu cache. +// The amount of per-cpu cache is 2 ^ per-cpu-shift. +// When dynamic slab size is enabled, we start with kInitialPerCpuShift and +// grow as needed up to kMaxPerCpuShift. When dynamic slab size is disabled, +// we always use kMaxPerCpuShift. +#if defined(TCMALLOC_INTERNAL_SMALL_BUT_SLOW) +constexpr inline uint8_t kInitialBasePerCpuShift = 12; +constexpr inline uint8_t kMaxBasePerCpuShift = 12; +#else +constexpr inline uint8_t kInitialBasePerCpuShift = 14; +constexpr inline uint8_t kMaxBasePerCpuShift = 18; +#endif +constexpr inline uint8_t kNumPossiblePerCpuShifts = + kMaxBasePerCpuShift - kInitialBasePerCpuShift + 1; + +constexpr inline uint8_t kResizeSlabCopies = 2; +constexpr inline uint8_t kTotalPossibleSlabs = + kNumPossiblePerCpuShifts * kResizeSlabCopies; +// StaticForwarder provides access to the SizeMap and transfer caches. +// +// This is a class, rather than namespaced globals, so that it can be mocked for +// testing. +class StaticForwarder { + public: + [[nodiscard]] static void* Alloc(size_t size, std::align_val_t alignment) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + TC_ASSERT(tc_globals.IsInited()); + // TODO(b/373944374): Arena is thread-safe, but we take the pageheap_lock to + // present a consistent view of memory usage. + PageHeapSpinLockHolder l; + return tc_globals.arena().Alloc(size, alignment); + } + [[nodiscard]] static void* AllocReportedImpending(size_t size, + std::align_val_t alignment) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + TC_ASSERT(tc_globals.IsInited()); + // TODO(b/373944374): Arena is thread-safe, but we take the pageheap_lock to + // present a consistent view of memory usage. + PageHeapSpinLockHolder l; + // Negate previous update to allocated that accounted for this allocation. + tc_globals.arena().UpdateAllocatedAndNonresident( + -static_cast(size), 0); + return tc_globals.arena().Alloc(size, alignment); + } + + static void Dealloc(void* ptr, size_t size, std::align_val_t alignment) { + TC_ASSERT(false); + } + + static void ArenaUpdateAllocatedAndNonresident(int64_t allocated, + int64_t nonresident) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + TC_ASSERT(tc_globals.IsInited()); + // TODO(b/373944374): Arena is thread-safe, but we take the pageheap_lock to + // present a consistent view of memory usage. + PageHeapSpinLockHolder l; + if (allocated > 0) { + tc_globals.page_allocator().ShrinkToUsageLimit(Length(allocated)); + } + tc_globals.arena().UpdateAllocatedAndNonresident(allocated, nonresident); + } + + static bool per_cpu_caches_dynamic_slab_enabled() { + return Parameters::per_cpu_caches_dynamic_slab_enabled(); + } + + static double per_cpu_caches_dynamic_slab_grow_threshold() { + return Parameters::per_cpu_caches_dynamic_slab_grow_threshold(); + } + + static double per_cpu_caches_dynamic_slab_shrink_threshold() { + return Parameters::per_cpu_caches_dynamic_slab_shrink_threshold(); + } + + static bool reuse_size_classes() { + return tc_globals.size_class_configuration() == + SizeClassConfiguration::kReuse; + } + + static size_t class_to_size(int size_class) { + return tc_globals.sizemap().class_to_size(size_class); + } + + static absl::Span cold_size_classes() { + return tc_globals.sizemap().ColdSizeClasses(); + } + + static size_t num_objects_to_move(int size_class) { + return tc_globals.sizemap().num_objects_to_move(size_class); + } + + static const NumaTopology& numa_topology() { + return tc_globals.numa_topology(); + } + + static ShardedTransferCacheManager& sharded_transfer_cache() { + return tc_globals.sharded_transfer_cache(); + } + + static TransferCacheManager& transfer_cache() { + return tc_globals.transfer_cache(); + } + + static bool UseGenericShardedCache() { + return tc_globals.sharded_transfer_cache().UseGenericCache(); + } + + static bool UseShardedCacheForLargeClassesOnly() { + return tc_globals.sharded_transfer_cache().UseCacheForLargeClassesOnly(); + } + + static bool HaveHooks() { return tc_globals.HaveHooks(); } +}; + +template +uint8_t NumaShift(const NumaTopology& topology) { + return topology.numa_aware() + ? absl::bit_ceil(topology.active_partitions() - 1) + : 0; +} + +// Translates from a shift value to the offset of that shift in arrays of +// possible shift values. +inline uint8_t ShiftOffset(uint8_t shift, uint8_t initial_shift) { + TC_ASSERT_GE(shift, initial_shift); + return shift - initial_shift; +} + +// Tracks the range of allowed slab shifts. +struct SlabShiftBounds { + uint8_t initial_shift; + uint8_t max_shift; +}; + +struct GetShiftMaxCapacity { + size_t operator()(size_t size_class) const { + TC_ASSERT_GE(shift_bounds.max_shift, shift); + const uint8_t relative_shift = shift_bounds.max_shift - shift; + if (relative_shift == 0) + return max_capacities[size_class].load(std::memory_order_relaxed); + int mc = max_capacities[size_class].load(std::memory_order_relaxed) >> + relative_shift; + // We decrement by 3 because of (1) cost of per-size-class header, (2) cost + // of per-size-class padding pointer, (3) there are a lot of empty size + // classes that have headers and whose max capacities can't be decremented. + // TODO(b/272085443): try using size_class_to_header_idx array to allow for + // not having headers for empty size classes. + // TODO(b/219565872): try not doing prefetching for large size classes to + // allow for not having padding pointers for large size classes. + mc = std::max(mc - 3, 0); + return mc; + } + + const std::atomic* max_capacities; + uint8_t shift; + SlabShiftBounds shift_bounds; +}; + +template +class CpuCache { public: - constexpr CPUCache() = default; + struct CpuCacheMissStats { + size_t underflows = 0; + size_t overflows = 0; + + CpuCacheMissStats& operator+=(const CpuCacheMissStats rhs) { + underflows += rhs.underflows; + overflows += rhs.overflows; + return *this; + } + }; + + enum class DynamicSlabResize { + kNoop = 0, + kShrink, + kGrow, + }; + + enum class PerClassMissType { + // Tracks total number of capacity misses. + kCapacityTotal = 0, + // Tracks number of misses recorded as of the end of the last per-class + // resize interval. + kCapacityResize, + // Tracks total number of misses due to insufficient max_capacity. + kMaxCapacityTotal, + // Tracks number of misses recorded as of the end of the last per-class + // max capacity resize interval. + kMaxCapacityResize, + kNumTypes, + }; + + // We track the number of overflows/underflows for each of these cases. + enum class MissCount { + // Tracks total number of misses. + kTotal = 0, + // Tracks number of misses recorded as of the end of the last shuffle + // interval. + kShuffle, + // Tracks number of misses recorded as of the end of the last resize + // interval. + kReclaim, + // Tracks number of misses recorded as of the end of the last slab resize + // interval. + kSlabResize, + kNumCounts, + }; + + struct SizeClassCapacityStats { + size_t min_capacity = 0; + double avg_capacity = 0; + size_t max_capacity = 0; + size_t max_capacity_misses = 0; + absl::Duration min_last_underflow = absl::InfiniteDuration(); + absl::Duration max_last_underflow; + absl::Duration min_last_overflow = absl::InfiniteDuration(); + absl::Duration max_last_overflow; + int min_last_underflow_cpu_id = -1; + int max_last_underflow_cpu_id = -1; + int min_last_overflow_cpu_id = -1; + int max_last_overflow_cpu_id = -1; + }; - enum class ActivationMode { - FastPathOn, - FastPathOffTestOnly, + struct DynamicSlabInfo { + std::atomic grow_count[kNumPossiblePerCpuShifts]; + std::atomic shrink_count[kNumPossiblePerCpuShifts]; + std::atomic madvise_failed_bytes; }; + // Sets the lower limit on the capacity that can be stolen from the cpu cache. + static constexpr double kCacheCapacityThreshold = 0.20; + + constexpr CpuCache() = default; + // tcmalloc explicitly initializes its global state (to be safe for // use in global constructors) so our constructor must be trivial; // do all initialization here instead. - void Activate(ActivationMode mode); - - // Allocate an object of the given size class. When allocation fails - // (from this cache and after running Refill), OOMHandler(size) is - // called and its return value is returned from - // Allocate. OOMHandler is used to parameterize out-of-memory - // handling (raising exception, returning nullptr, calling - // new_handler or anything else). "Passing" OOMHandler in this way - // allows Allocate to be used in tail-call position in fast-path, - // making Allocate use jump (tail-call) to slow path code. - template - void* Allocate(size_t cl); + void Activate(); + + // For testing + void Deactivate(); + + // Allocate an object of the given size class. + // Returns nullptr when allocation fails. + [[nodiscard]] void* Allocate(size_t size_class); + // Separate allocation fast/slow paths. + // The fast path succeeds iff the thread has already cached the slab pointer + // (done by AllocateSlow) and there is an available object in the slab. + [[nodiscard]] void* AllocateFast(size_t size_class); + [[nodiscard]] void* AllocateSlow(size_t size_class); + // A slightly faster version of AllocateSlow that may be called only + // when it's known that no hooks are installed. + [[nodiscard]] void* AllocateSlowNoHooks(size_t size_class); // Free an object of the given class. - void Deallocate(void* ptr, size_t cl); + void Deallocate(void* ptr, size_t size_class); + // Separate deallocation fast/slow paths. + // The fast path succeeds iff the thread has already cached the slab pointer + // (done by DeallocateSlow) and there is free space in the slab. + bool DeallocateFast(void* ptr, size_t size_class); + void DeallocateSlow(void* ptr, size_t size_class); + // A slightly faster version of DeallocateSlow that may be called only + // when it's known that no hooks are installed. + void DeallocateSlowNoHooks(void* ptr, size_t size_class); + + // Force all Allocate/DeallocateFast to fail in the current thread + // if malloc hooks are installed. + void MaybeForceSlowPath(); // Give the number of bytes in 's cache uint64_t UsedBytes(int cpu) const; @@ -79,7 +352,7 @@ class CPUCache { uint64_t TotalUsedBytes() const; // Give the number of objects of a given class in all cpu caches. - uint64_t TotalObjectsOfClass(size_t cl) const; + uint64_t TotalObjectsOfClass(size_t size_class) const; // Give the number of bytes unallocated to any sizeclass in 's cache. uint64_t Unallocated(int cpu) const; @@ -92,6 +365,7 @@ class CPUCache { // Give the per-cpu limit of cache size. uint64_t CacheLimit() const; + void SetCacheLimit(uint64_t v); // Shuffles per-cpu caches using the number of underflows and overflows that // occurred in the prior interval. It selects the top per-cpu caches @@ -103,29 +377,50 @@ class CPUCache { // ShuffleCpuCaches. void ShuffleCpuCaches(); - // Sets the lower limit on the capacity that can be stolen from the cpu cache. - static constexpr double kCacheCapacityThreshold = 0.20; - - // Tries to steal for the destination . It iterates through the - // the set of populated cpu caches and steals the bytes from them. A cpu is - // considered a good candidate to steal from if: - // (1) the cache is populated - // (2) the numbers of underflows and overflows are both less than 0.8x those - // of the destination per-cpu cache - // (3) source cpu is not the same as the destination cpu - // (4) capacity of the source cpu/cl is non-zero - // - // For a given source cpu, we iterate through the size classes to steal from - // them. Currently, we use a similar clock-like algorithm from Steal() to - // identify the cl to steal from. - void StealFromOtherCache(int cpu, int max_populated_cpu, size_t bytes); - // Tries to reclaim inactive per-CPU caches. It iterates through the set of // populated cpu caches and reclaims the caches that: // (1) had same number of used bytes since the last interval, // (2) had no change in the number of misses since the last interval. void TryReclaimingCaches(); + // Resize size classes for up to kNumCpuCachesToResize cpu caches per + // interval. + static constexpr int kNumCpuCachesToResize = 10; + // Resizes size classes within up to kNumCpuCachesToResize per-cpu caches per + // iteration in a round-robin fashion. Per cpu cache, it iterates through the + // size classes and attempts to grow up to kMaxSizeClassesToResize number of + // classes by stealing capacity from rest of them. Per iteration, it resizes + // size classes for up to kNumCpuCachesToResize number of per-cpu caches. + void ResizeSizeClasses(); + + // Gets the max capacity for the size class using the current per-cpu shift. + uint16_t GetMaxCapacity(int size_class, uint8_t shift) const; + + // Gets the current capacity for the in a cache. + size_t GetCapacityOfSizeClass(int cpu, int size_class) const; + + // Computes maximum capacities that we want to update the size classes to. It + // fetches number of capacity misses obvserved for the size classes, and + // computes increases to the maximum capacities for the size classes with the + // highest misses. It computes maximum capacities for kNumBaseClasses number + // of size classes, starting with . It records the resized + // size classes and capacities in starting from index + // . + // Returns total number of valid size classes recorded in + // array. + int GetUpdatedMaxCapacities(int start_size_class, + PerSizeClassMaxCapacity* max_capacity, + int valid_entries); + + // Resizes maximum capacities for the size classes. First, it computes + // candidates to resize using GetUpdatedMaxCapacities(...), and then updates + // maximum capacities for size classes for all per-cpu caches. Resizing is a + // global operation. It stops all per-cpu caches, drains them, updates maximum + // capacities and begin, current and end indices for the slabs and then + // restarts the per-cpu caches. Because it's a global operation that involves + // stopping all per-cpu caches, this mechanism should be used sparingly. + void ResizeSizeClassMaxCapacities(); + // Empty out the cache on ; move all objects to the central // cache. (If other threads run concurrently on that cpu, we can't // guarantee it will be fully empty on return, but if the cpu is @@ -133,41 +428,88 @@ class CPUCache { // of bytes we sent back. This function is thread safe. uint64_t Reclaim(int cpu); + // Reports number of times the size classes were resized for . + uint64_t GetNumResizes(int cpu) const; + + // Reports total number of times size classes were resized. + uint64_t GetNumResizes() const; + // Reports number of times the has been reclaimed. uint64_t GetNumReclaims(int cpu) const; - // Determine number of bits we should use for allocating per-cpu cache - // The amount of per-cpu cache is 2 ^ kPerCpuShift -#if defined(TCMALLOC_SMALL_BUT_SLOW) - static const size_t kPerCpuShift = 12; -#else - static constexpr size_t kPerCpuShift = 18; -#endif + // Reports total number of times any CPU has been reclaimed. + uint64_t GetNumReclaims() const; - struct CpuCacheMissStats { - size_t underflows; - size_t overflows; - }; + // When dynamic slab size is enabled, checks if there is a need to resize + // the slab based on miss-counts and resizes if so. + void ResizeSlabIfNeeded(); // Reports total cache underflows and overflows for . CpuCacheMissStats GetTotalCacheMissStats(int cpu) const; - // Reports the cache underflows and overflows for that were recorded at - // the end of the previous interval. It also records current underflows and - // overflows in the reclaim underflow and overflow stats. - CpuCacheMissStats GetReclaimCacheMissStats(int cpu) const; + // Reports total cache underflows and overflows for all CPUs. + CpuCacheMissStats GetTotalCacheMissStats() const; - // Reports cache underflows and overflows for this interval. - CpuCacheMissStats GetIntervalCacheMissStats(int cpu) const; + // Reports the cache underflows and overflows for that were recorded + // during the previous interval for . + CpuCacheMissStats GetIntervalCacheMissStats(int cpu, + MissCount miss_count) const; + + // Records current underflows and overflows in the underflow and + // overflow stats. + void UpdateIntervalCacheMissStats(int cpu, MissCount miss_count); + + // Reports the cache underflows and overflows for that were recorded + // during the previous interval for . Records current underflows + // and overflows in the underflow and overflow stats. + CpuCacheMissStats GetAndUpdateIntervalCacheMissStats(int cpu, + MissCount miss_count); + + // Scans through populated per-CPU caches, and reports minimum, average and + // maximum capacity for size class . + SizeClassCapacityStats GetSizeClassCapacityStats(size_t size_class) const; + + // Reports the number of misses encountered by a that were + // recorded during the previous interval between and + // kinds of misses. + size_t GetIntervalSizeClassMisses(int cpu, size_t size_class, + PerClassMissType total_type, + PerClassMissType interval_type); + + // Reports if we should use a wider 512KiB slab. + bool UseWiderSlabs() const; + + // Reports allowed slab shift initial and maximum bounds. + SlabShiftBounds GetPerCpuSlabShiftBounds() const; + + size_t GetDynamicSlabFailedBytes() const; // Report statistics - void Print(Printer* out) const; - void PrintInPbtxt(PbtxtRegion* region) const; + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& region) const; void AcquireInternalLocks(); void ReleaseInternalLocks(); + const Forwarder& forwarder() const { return forwarder_; } + + Forwarder& forwarder() { return forwarder_; } + private: + friend struct DrainHandler; + friend class ::tcmalloc::tcmalloc_internal::CpuCachePeer; + + using Freelist = subtle::percpu::TcmallocSlab; + + struct PerClassMissCounts { + std::atomic + misses[static_cast(PerClassMissType::kNumTypes)]; + + std::atomic& operator[](PerClassMissType type) { + return misses[static_cast(type)]; + } + }; + // Per-size-class freelist resizing info. class PerClassResizeInfo { public: @@ -180,6 +522,28 @@ class CPUCache { bool Update(bool overflow, bool grow, uint32_t* successive); uint32_t Tick(); + // Records a miss for a provided . A miss occurs when size class + // attempts to grow it's capacity on underflow/overflow, but we are already + // at the maximum configured per-cpu cache capacity limit. + void RecordMiss(PerClassMissType type); + + // Reports total number of misses recorded for this size class. + size_t GetTotalMisses(PerClassMissType type); + + size_t GetAndUpdateIntervalMisses(PerClassMissType total_type, + PerClassMissType interval_type); + + // Reports the number of misses encountered by this size class that + // were recorded during the previous interval between misses + // and . + size_t GetIntervalMisses(PerClassMissType total_type, + PerClassMissType interval_type); + + // Copies total misses of type encountered by the size class to + // the type . + void UpdateIntervalMisses(PerClassMissType total_type, + PerClassMissType interval_type); + private: std::atomic state_; // state_ layout: @@ -191,173 +555,2209 @@ class CPUCache { // number of successive overflows/underflows uint32_t successive : 16; }; + PerClassMissCounts misses_; static_assert(sizeof(State) == sizeof(std::atomic), "size mismatch"); }; - subtle::percpu::TcmallocSlab freelist_; + // Helper type so we don't need to sprinkle `static_cast`s everywhere. + struct MissCounts { + std::atomic misses[static_cast(MissCount::kNumCounts)]; - struct ResizeInfoUnpadded { + std::atomic& operator[](MissCount miss_count) { + return misses[static_cast(miss_count)]; + } + }; + + struct ABSL_CACHELINE_ALIGNED ResizeInfo { // cache space on this CPU we're not using. Modify atomically; // we don't want to lose space. std::atomic available; - // this is just a hint - std::atomic last_steal; - // Track whether we have initialized this CPU. - absl::once_flag initialized; + // Size class to steal from for the clock-wise algorithm. + size_t next_steal = 1; // Track whether we have ever populated this CPU. std::atomic populated; - // For cross-cpu operations. - absl::base_internal::SpinLock lock; + // For cross-cpu operations. We can't allocate while holding one of these so + // please use AllocationGuardSpinLockHolder to hold it. + absl::base_internal::SpinLock lock ABSL_ACQUIRED_BEFORE(pageheap_lock){ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; PerClassResizeInfo per_class[kNumClasses]; - // tracks number of underflows on allocate. - std::atomic total_underflows; - // tracks number of overflows on deallocate. - std::atomic total_overflows; - // tracks number of underflows recorded as of the end of the last shuffle - // interval. - std::atomic shuffle_underflows; - // tracks number of overflows recorded as of the end of the last shuffle - // interval. - std::atomic shuffle_overflows; + std::atomic num_size_class_resizes; + // Tracks number of underflows on allocate. + MissCounts underflows; + // Tracks number of overflows on deallocate. + MissCounts overflows; + std::atomic last_miss_cycles[2][kNumClasses]; // total cache space available on this CPU. This tracks the total // allocated and unallocated bytes on this CPU cache. std::atomic capacity; - // Number of underflows as of the end of the last resize interval. - std::atomic reclaim_underflows; - // Number of overflows as of the end of the last resize interval. - std::atomic reclaim_overflows; // Used bytes in the cache as of the end of the last resize interval. std::atomic reclaim_used_bytes; // Tracks number of times this CPU has been reclaimed. std::atomic num_reclaims; + // Tracks last time this CPU was reclaimed. If last underflow/overflow data + // appears before this point in time, we ignore the CPU. + std::atomic last_reclaim; + }; + + // Determines how we distribute memory in the per-cpu cache to the various + // class sizes. + size_t MaxCapacity(size_t size_class) const; + + // Updates maximum capacity for the to . + void UpdateMaxCapacity(int size_class, uint16_t cap); + + GetShiftMaxCapacity GetMaxCapacityFunctor(uint8_t shift) const; + + // Fetches objects from backing transfer cache. + [[nodiscard]] int FetchFromBackingCache(size_t size_class, + absl::Span batch); + + // Releases free batch of objects to the backing transfer cache. + void ReleaseToBackingCache(size_t size_class, absl::Span batch); + + [[nodiscard]] void* Refill(int cpu, size_t size_class); + std::pair CacheCpuSlab(); + void Populate(int cpu); + + // Returns true if we bypass cpu cache for a . We may bypass + // per-cpu cache when we enable certain configurations of sharded transfer + // cache. + bool BypassCpuCache(size_t size_class) const; + + // Returns true if we use sharded transfer cache as a backing cache for + // per-cpu caches. If a sharded transfer cache is used, we fetch/release + // from/to a sharded transfer cache. Else, we use a legacy transfer cache. + bool UseBackingShardedTransferCache(size_t size_class) const; + + // Called on freelist on to record overflow/underflow + // Returns number of objects to return/request from transfer cache. + size_t UpdateCapacity(int cpu, size_t size_class, bool overflow); + + // Tries to grow freelist on the current by up to + // objects if there is available capacity. + void Grow(int cpu, size_t size_class, size_t desired_increase); + + // Depending on the number of misses that cpu caches encountered in the + // previous resize interval, returns if slabs should be grown, shrunk or + // remain the same. + DynamicSlabResize ShouldResizeSlab(); + + // Determine if the is a good candidate to be shrunk. We use + // clock-like algorithm to prioritize size classes for shrinking. + bool IsGoodCandidateForShrinking(int cpu, size_t size_class); + + struct SizeClassMissStat { + size_t size_class; + size_t misses; }; - struct ResizeInfo : ResizeInfoUnpadded { - char pad[ABSL_CACHELINE_SIZE - - sizeof(ResizeInfoUnpadded) % ABSL_CACHELINE_SIZE]; + struct CpuMissStat { + int cpu; + size_t misses; }; + + // Tries to steal for on from other size classes on + // that CPU. Returns acquired bytes. + size_t StealCapacityForSizeClassWithinCpu( + int cpu, absl::Span dest_size_classes, size_t bytes); + + // Records a cache underflow or overflow on , increments underflow or + // overflow by 1. + // determines whether the associated count corresponds to an + // underflow or overflow. + void RecordCacheMissStat(int cpu, bool is_alloc); + + // Tries to steal for the destination . It iterates through the + // the set of populated cpu caches and steals the bytes from them. A cpu is + // considered a good candidate to steal from if: + // (1) the cache is populated + // (2) the numbers of underflows and overflows are both less than 0.8x those + // of the destination per-cpu cache + // (3) source cpu is not the same as the destination cpu + // (4) capacity of the source cpu/size_class is non-zero + // + // For a given source cpu, we iterate through the size classes to steal from + // them. Currently, we use a clock-like algorithm to identify the size_class + // to steal from. + void StealFromOtherCache(int cpu, int max_populated_cpu, + absl::Span skip_cpus, size_t bytes); + + // Try to steal one object from cpu/size_class. Return bytes stolen. + size_t ShrinkOtherCache(int cpu, size_t size_class); + + // Resizes capacities of up to kMaxSizeClassesToResize size classes for a + // single . + void ResizeCpuSizeClasses(int cpu); + + // is the offset of the shift in slabs_by_shift_. Note that we + // can't calculate this from `shift` directly due to numa shift. + // Returns the allocated slabs and the number of reused bytes. + [[nodiscard]] std::pair AllocOrReuseSlabs( + absl::FunctionRef alloc, + subtle::percpu::Shift shift, int num_cpus, uint8_t shift_offset, + uint8_t resize_offset); + + // madvise-away slab memory, pointed to by of size . + void MadviseAwaySlabs(void* slab_addr, size_t slab_size); + + Freelist freelist_; + // Tracking data for each CPU's cache resizing efforts. ResizeInfo* resize_ = nullptr; - // Track whether we are lazily initializing slabs. We cannot use the latest - // value in Parameters, as it can change after initialization. - bool lazy_slabs_ = false; + // Tracks initial and maximum slab shift bounds. + SlabShiftBounds shift_bounds_{}; + // The maximum capacity of each size class within the slab. - uint16_t max_capacity_[kNumClasses] = {0}; + std::atomic max_capacity_[kNumClasses] = {0}; // Provides a hint to StealFromOtherCache() so that we can steal from the // caches in a round-robin fashion. - std::atomic last_cpu_cache_steal_ = 0; - - // Return a set of objects to be returned to the Transfer Cache. - static constexpr int kMaxToReturn = 16; - struct ObjectsToReturn { - // The number of slots available for storing objects. - int count = kMaxToReturn; - // The size class of the returned object. kNumClasses is the - // largest value that needs to be stored in cl. - CompactSizeClass cl[kMaxToReturn]; - void* obj[kMaxToReturn]; - }; + int next_cpu_cache_steal_ = 0; + + // Provides a hint to ResizeSizeClasses() that records the last CPU for which + // we resized size classes. We use this to resize size classes for CPUs in a + // round-robin fashion. + std::atomic last_cpu_size_class_resize_ = 0; + + // Records the slab copy currently in use. We maintain kResizeSlabCopies + // sets of kNumPossiblePerCpuShifts slabs. While resizing maximum size class + // capacity, we choose a new slab from one of the copies. resize_slab_offset_ + // is an index into the copy currently in use. + std::atomic resize_slab_offset_ = 0; - static size_t MaxCapacityHelper(size_t cl) { - CPUCache& cpu_cache = Static::cpu_cache(); - // Heuristic that the CPUCache has been activated. - ASSERT(cpu_cache.resize_ != nullptr); - return cpu_cache.max_capacity_[cl]; + // Per-core cache limit in bytes. + std::atomic max_per_cpu_cache_size_{kMaxCpuCacheSize}; + + ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Forwarder forwarder_; + + DynamicSlabInfo dynamic_slab_info_{}; + + // Pointers to allocations for slabs of each shift value for use in + // ResizeSlabs. This memory is allocated on the arena, and it is nonresident + // while not in use. + void* slabs_by_shift_[kTotalPossibleSlabs] = {nullptr}; +}; + +template +void* CpuCache::Allocate(size_t size_class) { + void* ret = AllocateFast(size_class); + if (ABSL_PREDICT_TRUE(ret != nullptr)) { + return ret; } + TCMALLOC_MUSTTAIL return AllocateSlow(size_class); +} - void* Refill(int cpu, size_t cl); +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* CpuCache::AllocateFast( + size_t size_class) { + TC_ASSERT_GT(size_class, 0); + return freelist_.Pop(size_class); +} - // This is called after finding a full freelist when attempting to push - // on the freelist for sizeclass . The last arg should indicate which - // CPU's list was full. Returns 1. - int Overflow(void* ptr, size_t cl, int cpu); +template +void CpuCache::Deallocate(void* ptr, size_t size_class) { + if (ABSL_PREDICT_FALSE(!DeallocateFast(ptr, size_class))) { + TCMALLOC_MUSTTAIL return DeallocateSlow(ptr, size_class); + } +} - // Called on freelist overflow/underflow on to balance cache - // capacity between size classes. Returns number of objects to return/request - // from transfer cache. will contain objects that need to be - // freed. - size_t UpdateCapacity(int cpu, size_t cl, size_t batch_length, bool overflow, - ObjectsToReturn* to_return); +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool CpuCache::DeallocateFast( + void* ptr, size_t size_class) { + TC_ASSERT_GT(size_class, 0); + return freelist_.Push(size_class, ptr); +} - // Tries to obtain up to bytes of freelist space on - // for from other . will contain objects that need to be - // freed. - void Grow(int cpu, size_t cl, size_t desired_increase, - ObjectsToReturn* to_return); +template +void CpuCache::MaybeForceSlowPath() { + if (ABSL_PREDICT_FALSE(forwarder_.HaveHooks())) { + freelist_.UncacheCpuSlab(); + } +} - // Tries to steal for on from other size classes on that - // CPU. Returns acquired bytes. will contain objects that need to - // be freed. - size_t Steal(int cpu, size_t cl, size_t bytes, ObjectsToReturn* to_return); +static CpuSet FillActiveCpuMask() { + CpuSet allowed_cpus; + if (!allowed_cpus.GetAffinity(0)) { + allowed_cpus.Zero(); + } - // Records a cache underflow or overflow on , increments underflow or - // overflow by 1. - // determines whether the associated count corresponds to an - // underflow or overflow. - void RecordCacheMissStat(const int cpu, const bool is_malloc); +#ifdef PERCPU_USE_RSEQ + const bool real_cpus = !subtle::percpu::UsingVirtualCpus(); +#else + const bool real_cpus = true; +#endif - static void* NoopUnderflow(int cpu, size_t cl) { return nullptr; } - static int NoopOverflow(int cpu, size_t cl, void* item) { return -1; } -}; + if (real_cpus) { + return allowed_cpus; + } -template -inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Allocate(size_t cl) { - ASSERT(cl > 0); - - tracking::Report(kMallocHit, cl, 1); - struct Helper { - static void* ABSL_ATTRIBUTE_NOINLINE Underflow(int cpu, size_t cl) { - // we've optimistically reported hit in Allocate, lets undo it and - // report miss instead. - tracking::Report(kMallocHit, cl, -1); - void* ret = nullptr; - if (Static::sharded_transfer_cache().should_use(cl)) { - ret = Static::sharded_transfer_cache().Pop(cl); - } else { - tracking::Report(kMallocMiss, cl, 1); - CPUCache& cache = Static::cpu_cache(); - cache.RecordCacheMissStat(cpu, true); - ret = cache.Refill(cpu, cl); - } - if (ABSL_PREDICT_FALSE(ret == nullptr)) { - size_t size = Static::sizemap().class_to_size(cl); - return OOMHandler(size); - } - return ret; + const int virtual_cpu_count = allowed_cpus.Count(); + allowed_cpus.Zero(); + for (int cpu = 0; cpu < virtual_cpu_count; ++cpu) { + allowed_cpus.Set(cpu); + } + return allowed_cpus; +} + +template +inline size_t CpuCache::MaxCapacity(size_t size_class) const { + // The number of size classes that are commonly used and thus should be + // allocated more slots in the per-cpu cache. + static constexpr size_t kNumSmall = 10; + + // When we use wider slabs, we also want to double the maximum capacities for + // size classes to use that slab. + const size_t kWiderSlabMultiplier = UseWiderSlabs() ? 2 : 1; + + // The memory used for each per-CPU slab is the sum of: + // sizeof(std::atomic) * kNumClasses + // sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall + // sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge + // + // Class size 0 has MaxCapacity() == 0, which is the reason for using + // kNumClasses - 1 above instead of kNumClasses. + // + // Each Size class region in the slab is preceded by one padding pointer that + // points to itself, because prefetch instructions of invalid pointers are + // slow. That is accounted for by the +1 for object depths. +#if defined(TCMALLOC_INTERNAL_SMALL_BUT_SLOW) + // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we + // allocate: + // == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096 + static const uint16_t kSmallObjectDepth = 16; + static const uint16_t kLargeObjectDepth = 6; +#else + // We allocate 256KiB per-cpu for pointers to cached per-cpu memory. + // Max(kNumClasses) is 89, so the maximum footprint per CPU for a 256KiB + // slab is: + // 89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78) = 254 KiB + // For 512KiB slab, with a multiplier of 2, maximum footprint is: + // 89 * 8 + 8 * ((4096 + 1) * 10 + (304 + 1) * 78) = 506 KiB + const uint16_t kSmallObjectDepth = 2048 * kWiderSlabMultiplier; + const uint16_t kLargeObjectDepth = 152 * kWiderSlabMultiplier; +#endif + if (size_class == 0 || size_class >= kNumClasses) { + return 0; + } + + if (BypassCpuCache(size_class)) { + return 0; + } + + if (forwarder_.class_to_size(size_class) == 0) { + return 0; + } + + if (!IsExpandedSizeClass(size_class) && + (size_class % kNumBaseClasses) <= kNumSmall) { + // Small object sizes are very heavily used and need very deep caches for + // good performance (well over 90% of malloc calls are for size_class + // <= 10.) + return kSmallObjectDepth; + } + + if (ColdFeatureActive()) { + // We reduce the number of cached objects for some sizes to fit into the + // slab. + // + // We use fewer number of size classes when using reuse size classes. So, + // we may use larger capacity for some sizes. + const uint16_t kLargeUninterestingObjectDepth = + forwarder_.reuse_size_classes() ? 246 * kWiderSlabMultiplier + : 133 * kWiderSlabMultiplier; + const uint16_t kLargeInterestingObjectDepth = + forwarder_.reuse_size_classes() ? 46 * kWiderSlabMultiplier + : 28 * kWiderSlabMultiplier; + + absl::Span cold = forwarder_.cold_size_classes(); + if (absl::c_binary_search(cold, size_class)) { + return kLargeInterestingObjectDepth; + } else if (!IsExpandedSizeClass(size_class)) { + return kLargeUninterestingObjectDepth; + } else { + return 0; } - }; - return freelist_.Pop(cl, &Helper::Underflow); -} - -inline void ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Deallocate(void* ptr, - size_t cl) { - ASSERT(cl > 0); - tracking::Report(kFreeHit, cl, 1); // Be optimistic; correct later if needed. - - struct Helper { - static int ABSL_ATTRIBUTE_NOINLINE Overflow(int cpu, size_t cl, void* ptr) { - // When we reach here we've already optimistically bumped FreeHits. - // Fix that. - tracking::Report(kFreeHit, cl, -1); - if (Static::sharded_transfer_cache().should_use(cl)) { - Static::sharded_transfer_cache().Push(cl, ptr); - return 1; - } - tracking::Report(kFreeMiss, cl, 1); - CPUCache& cache = Static::cpu_cache(); - cache.RecordCacheMissStat(cpu, false); - return cache.Overflow(ptr, cl, cpu); + } + + if (IsExpandedSizeClass(size_class)) { + return 0; + } + + return kLargeObjectDepth; +} + +// Returns estimated bytes required and the bytes available. +inline std::pair EstimateSlabBytes( + GetShiftMaxCapacity get_shift_capacity) { + size_t bytes_required = sizeof(std::atomic) * kNumClasses; + + for (int size_class = 0; size_class < kNumClasses; ++size_class) { + // Each non-empty size class region in the slab is preceded by one padding + // pointer that points to itself. (We do this because prefetches of invalid + // pointers are slow.) + size_t num_pointers = get_shift_capacity(size_class); + if (num_pointers > 0) ++num_pointers; + bytes_required += sizeof(void*) * num_pointers; + } + + const size_t bytes_available = 1 << get_shift_capacity.shift; + return {bytes_required, bytes_available}; +} + +template +inline uint16_t CpuCache::GetMaxCapacity(int size_class, + uint8_t shift) const { + return GetMaxCapacityFunctor(shift)(size_class); +} + +template +inline size_t CpuCache::GetCapacityOfSizeClass( + int cpu, int size_class) const { + return freelist_.Capacity(cpu, size_class); +} + +template +inline GetShiftMaxCapacity CpuCache::GetMaxCapacityFunctor( + uint8_t shift) const { + return {max_capacity_, shift, shift_bounds_}; +} + +template +inline void CpuCache::UpdateMaxCapacity(int size_class, + uint16_t cap) { + max_capacity_[size_class].store(cap, std::memory_order_relaxed); +} + +template +inline bool CpuCache::UseWiderSlabs() const { + // We use wider 512KiB slab only when NUMA partitioning is not enabled. NUMA + // increases shift by 1 by itself, so we can not increase it further. + return !forwarder_.numa_topology().numa_aware(); +} + +template +inline SlabShiftBounds CpuCache::GetPerCpuSlabShiftBounds() const { + return shift_bounds_; +} + +template +inline size_t CpuCache::GetDynamicSlabFailedBytes() const { + return dynamic_slab_info_.madvise_failed_bytes.load( + std::memory_order_relaxed); +} + +template +inline void CpuCache::Activate() { + int num_cpus = NumCPUs(); + + shift_bounds_.initial_shift = kInitialBasePerCpuShift; + shift_bounds_.max_shift = kMaxBasePerCpuShift; + uint8_t per_cpu_shift = forwarder_.per_cpu_caches_dynamic_slab_enabled() + ? kInitialBasePerCpuShift + : kMaxBasePerCpuShift; + + const auto& topology = forwarder_.numa_topology(); + const uint8_t numa_shift = NumaShift(topology); + const uint8_t wider_slab_shift = UseWiderSlabs() ? 1 : 0; + + shift_bounds_.initial_shift += numa_shift + wider_slab_shift; + shift_bounds_.max_shift += numa_shift + wider_slab_shift; + per_cpu_shift += numa_shift + wider_slab_shift; + + TC_CHECK_LE(shift_bounds_.initial_shift, shift_bounds_.max_shift); + TC_CHECK_GE(per_cpu_shift, shift_bounds_.initial_shift); + TC_CHECK_LE(per_cpu_shift, shift_bounds_.max_shift); + TC_CHECK_EQ(shift_bounds_.max_shift - shift_bounds_.initial_shift + 1, + kNumPossiblePerCpuShifts); + + // Deal with size classes that correspond only to NUMA partitions that are in + // use. If NUMA awareness is disabled then we may have a smaller shift than + // would suffice for all of the unused size classes. + for (int size_class = 0; + size_class < topology.active_partitions() * kNumBaseClasses; + ++size_class) { + max_capacity_[size_class].store(MaxCapacity(size_class), + std::memory_order_relaxed); + } + + // Deal with expanded size classes. + for (int size_class = kExpandedClassesStart; size_class < kNumClasses; + ++size_class) { + max_capacity_[size_class].store(MaxCapacity(size_class), + std::memory_order_relaxed); + } + + // Verify that all the possible shifts will have valid max capacities. + for (uint8_t shift = shift_bounds_.initial_shift; + shift <= shift_bounds_.max_shift; ++shift) { + const auto [bytes_required, bytes_available] = + EstimateSlabBytes({max_capacity_, shift, shift_bounds_}); + // We may make certain size classes no-ops by selecting "0" at runtime, so + // using a compile-time calculation overestimates worst-case memory usage. + if (ABSL_PREDICT_FALSE(bytes_required > bytes_available)) { + TC_BUG("per-CPU memory exceeded, have %v, need %v", bytes_available, + bytes_required); } - }; - freelist_.Push(cl, ptr, Helper::Overflow); + } + + resize_ = reinterpret_cast(forwarder_.Alloc( + sizeof(ResizeInfo) * num_cpus, std::align_val_t{alignof(ResizeInfo)})); + + auto max_cache_size = CacheLimit(); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + new (&resize_[cpu]) ResizeInfo(); + + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + resize_[cpu].per_class[size_class].Init(); + } + resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed); + resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed); + } + + void* slabs = + AllocOrReuseSlabs(&forwarder_.Alloc, + subtle::percpu::ToShiftType(per_cpu_shift), num_cpus, + ShiftOffset(per_cpu_shift, shift_bounds_.initial_shift), + /*resize_offset=*/0) + .first; + freelist_.Init( + &forwarder_.Alloc, slabs, + GetShiftMaxCapacity{max_capacity_, per_cpu_shift, shift_bounds_}, + subtle::percpu::ToShiftType(per_cpu_shift)); +} + +template +inline void CpuCache::Deactivate() { + int num_cpus = NumCPUs(); + for (int i = 0; i < num_cpus; i++) { + Reclaim(i); + } + + freelist_.Destroy(&forwarder_.Dealloc); + static_assert(std::is_trivially_destructible::value, + "ResizeInfo is expected to be trivially destructible"); + forwarder_.Dealloc(resize_, sizeof(*resize_) * num_cpus, + std::align_val_t{alignof(decltype(*resize_))}); +} + +template +inline int CpuCache::FetchFromBackingCache(size_t size_class, + absl::Span batch) { + if (UseBackingShardedTransferCache(size_class)) { + return forwarder_.sharded_transfer_cache().RemoveRange(size_class, batch); + } + return forwarder_.transfer_cache().RemoveRange(size_class, batch); } -inline bool UsePerCpuCache() { +template +inline void CpuCache::ReleaseToBackingCache( + size_t size_class, absl::Span batch) { + if (UseBackingShardedTransferCache(size_class)) { + forwarder_.sharded_transfer_cache().InsertRange(size_class, batch); + return; + } + + forwarder_.transfer_cache().InsertRange(size_class, batch); +} + +template +void* CpuCache::AllocateSlow(size_t size_class) { + void* ret = AllocateSlowNoHooks(size_class); + MaybeForceSlowPath(); + return ret; +} + +template +void* CpuCache::AllocateSlowNoHooks(size_t size_class) { + if (BypassCpuCache(size_class)) { + return forwarder_.sharded_transfer_cache().Pop(size_class); + } + auto [cpu, cached] = CacheCpuSlab(); + if (ABSL_PREDICT_FALSE(cached)) { + if (ABSL_PREDICT_FALSE(cpu < 0)) { + // The cpu is stopped. + void* ptr = nullptr; + int r = FetchFromBackingCache(size_class, absl::MakeSpan(&ptr, 1)); +#ifndef NDEBUG + TC_ASSERT(r == 1 || ptr == nullptr); +#else + (void)r; +#endif + return ptr; + } + if (void* ret = AllocateFast(size_class)) { + return ret; + } + } + RecordCacheMissStat(cpu, true); + return Refill(cpu, size_class); +} + +// Fetch more items from the central cache, refill our local cache, +// and try to grow it if necessary. +// +// This is complicated by the fact that we can only tweak the cache on +// our current CPU and we might get migrated whenever (in fact, we +// might already have been migrated since failing to get memory...) +// +// So make sure only to make changes to one CPU's cache; at all times, +// it must be safe to find ourselves migrated (at which point we atomically +// return memory to the correct CPU.) +template +inline void* CpuCache::Refill(int cpu, size_t size_class) { + const size_t target = UpdateCapacity(cpu, size_class, false); + + // Refill target objects in batch_length batches. + size_t total = 0; + size_t got; + size_t i; + void* result = nullptr; + void* batch[kMaxObjectsToMove]; + + do { + const size_t want = std::min(kMaxObjectsToMove, target - total); + got = FetchFromBackingCache(size_class, absl::MakeSpan(batch, want)); + if (got == 0) { + break; + } + total += got; + i = got; + if (result == nullptr) { + i--; + result = batch[i]; + } + if (i) { + i -= freelist_.PushBatch(size_class, batch, i); + if (i != 0) { + ReleaseToBackingCache(size_class, {batch, i}); + } + } + } while (got == kMaxObjectsToMove && i == 0 && total < target); + return result; +} + +template +inline bool CpuCache::BypassCpuCache(size_t size_class) const { + // We bypass per-cpu cache when sharded transfer cache is enabled for large + // size classes (i.e. when we use the traditional configuration of the sharded + // transfer cache). + return forwarder_.sharded_transfer_cache().should_use(size_class) && + forwarder_.UseShardedCacheForLargeClassesOnly(); +} + +template +inline bool CpuCache::UseBackingShardedTransferCache( + size_t size_class) const { + // Make sure that the thread is registered with rseq. + TC_ASSERT(subtle::percpu::IsFastNoInit()); + // We enable sharded cache as a backing cache for all size classes when + // generic configuration is enabled. + return forwarder_.sharded_transfer_cache().should_use(size_class) && + forwarder_.UseGenericShardedCache(); +} + +// Calculate number of objects to return/request from transfer cache. +inline size_t TargetOverflowRefillCount(size_t capacity, size_t batch_length, + size_t successive) { + // If the freelist is large and we are hitting a series of overflows or + // underflows, return/request several batches at once. On the first overflow + // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to + // half of the batches we have. We do this to save on the cost of hitting + // malloc/free slow path, reduce instruction cache pollution, avoid cache + // misses when accessing transfer/central caches, etc. + const size_t max = (1 << std::min(successive, 10)) * batch_length; + // Aim at returning/refilling roughly half of objects. + // Round up odd sizes, e.g. if the capacity is 3, we want to refill 2 objects. + // Also always add 1 to the result to account for the additional object + // we need to return to the caller on refill, or return on overflow. + size_t target = std::min((capacity + 1) / 2 + 1, max); + if (capacity == 1 && successive < 3) { + // If the capacity is 1, it's generally impossible to avoid bad behavior. + // Consider refills (but the same stands for overflows): if we fetch an + // additional object and put it into the cache, and the caller is doing + // malloc/free in a loop, then we both fetched an unnecessary object and + // we will immediately hit an overflow on the free. On the other hand + // if we don't fetch an additional object, and the caller is allocating + // in a loop, then we also hit underflow again on the next malloc. + // Currently we fetch/return an additional objects only if we are hitting + // successive underflows/overflows. + // But note that this behavior is also easy to compromise: if the caller is + // allocating 3 objects and then freeing 3 objects in a loop, then we always + // do the wrong thing. + target = 1; + } + TC_ASSERT_LE(target, capacity + 1); + TC_ASSERT_NE(target, 0); + return target; +} + +template +inline size_t CpuCache::UpdateCapacity(int cpu, size_t size_class, + bool overflow) { + // Freelist size balancing strategy: + // - We grow a size class only on overflow/underflow. + // - We shrink size classes in Steal as it scans all size classes. + // - If overflows/underflows happen on a size class, we want to grow its + // capacity to at least 2 * batch_length. It enables usage of the + // transfer cache and leaves the list half-full after we insert/remove + // a batch from the transfer cache. + // - We increase capacity beyond 2 * batch_length only when an overflow is + // followed by an underflow. That's the only case when we could benefit + // from larger capacity -- the overflow and the underflow would collapse. + // + // Note: we can't understand when we have a perfectly-sized list, because for + // a perfectly-sized list we don't hit any slow paths which looks the same as + // inactive list. Eventually we will shrink a perfectly-sized list a bit and + // then it will grow back. This won't happen very frequently for the most + // important small sizes, because we will need several ticks before we shrink + // it again. Also we will shrink it by 1, but grow by a batch. So we should + // have lots of time until we need to grow it again. + + // We assert that the return value, target, is non-zero, so starting from an + // initial capacity of zero means we may be populating this core for the + // first time. + size_t batch_length = forwarder_.num_objects_to_move(size_class); + const size_t max_capacity = GetMaxCapacity(size_class, freelist_.GetShift()); + size_t capacity = freelist_.Capacity(cpu, size_class); + const bool grow_by_one = capacity < 2 * batch_length; + uint32_t successive = 0; + ResizeInfo& resize = resize_[cpu]; + const int64_t now = absl::base_internal::CycleClock::Now(); + // TODO(ckennelly): Use a strongly typed enum. + resize.last_miss_cycles[overflow][size_class].store( + now, std::memory_order_relaxed); + bool grow_by_batch = + resize.per_class[size_class].Update(overflow, grow_by_one, &successive); + if ((grow_by_one || grow_by_batch) && capacity != max_capacity) { + size_t increase = 1; + if (grow_by_batch) { + increase = std::min(batch_length, max_capacity - capacity); + } else if (!overflow && capacity < batch_length) { + // On underflow we want to grow to at least batch size, because that's + // what we want to request from transfer cache. + increase = batch_length - capacity; + } + Grow(cpu, size_class, increase); + capacity = freelist_.Capacity(cpu, size_class); + } + // We hit the maximum capacity limit when the size class capacity is equal to + // its maximum allowed capacity. Record a miss due to that so that we can + // potentially grow the max capacity for this size class later. + if (capacity == max_capacity) { + resize_[cpu].per_class[size_class].RecordMiss( + PerClassMissType::kMaxCapacityTotal); + } + return TargetOverflowRefillCount(capacity, batch_length, successive); +} + +template +std::pair CpuCache::CacheCpuSlab() { + auto [cpu, cached] = freelist_.CacheCpuSlab(); + if (ABSL_PREDICT_FALSE(cached) && ABSL_PREDICT_TRUE(cpu >= 0) && + ABSL_PREDICT_FALSE( + !resize_[cpu].populated.load(std::memory_order_acquire))) { + Populate(cpu); + } + return {cpu, cached}; +} + +template +ABSL_ATTRIBUTE_NOINLINE void CpuCache::Populate(int cpu) { + AllocationGuardSpinLockHolder h(&resize_[cpu].lock); + if (resize_[cpu].populated.load(std::memory_order_relaxed)) { + return; + } + freelist_.InitCpu(cpu, GetMaxCapacityFunctor(freelist_.GetShift())); + resize_[cpu].populated.store(true, std::memory_order_release); +} + +inline size_t subtract_at_least(std::atomic* a, size_t min, + size_t max) { + size_t cmp = a->load(std::memory_order_relaxed); + for (;;) { + if (cmp < min) { + return 0; + } + size_t got = std::min(cmp, max); + if (a->compare_exchange_weak(cmp, cmp - got, std::memory_order_relaxed)) { + return got; + } + } +} + +template +inline void CpuCache::Grow(int cpu, size_t size_class, + size_t desired_increase) { + const size_t size = forwarder_.class_to_size(size_class); + const size_t desired_bytes = desired_increase * size; + size_t acquired_bytes = + subtract_at_least(&resize_[cpu].available, size, desired_bytes); + if (acquired_bytes < desired_bytes) { + resize_[cpu].per_class[size_class].RecordMiss( + PerClassMissType::kCapacityTotal); + } + if (acquired_bytes == 0) { + return; + } + size_t actual_increase = acquired_bytes / size; + TC_ASSERT_GT(actual_increase, 0); + TC_ASSERT_LE(actual_increase, desired_increase); + // Remember, Grow may not give us all we ask for. + size_t increase = freelist_.Grow( + cpu, size_class, actual_increase, + [&](uint8_t shift) { return GetMaxCapacity(size_class, shift); }); + if (size_t unused = acquired_bytes - increase * size) { + // return whatever we didn't use to the slack. + resize_[cpu].available.fetch_add(unused, std::memory_order_relaxed); + } +} + +template +inline void CpuCache::TryReclaimingCaches() { + const int num_cpus = NumCPUs(); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + // Nothing to reclaim if the cpu is not populated. + if (!HasPopulated(cpu)) { + continue; + } + + uint64_t used_bytes = UsedBytes(cpu); + uint64_t prev_used_bytes = + resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed); + + // Get reclaim miss and used bytes stats that were captured at the end of + // the previous interval. + const CpuCacheMissStats miss_stats = + GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kReclaim); + uint64_t misses = + uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}; + + // Reclaim the cache if the number of used bytes and total number of misses + // stayed constant since the last interval. + if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) { + Reclaim(cpu); + } + + // Takes a snapshot of used bytes in the cache at the end of this interval + // so that we can calculate if cache usage changed in the next interval. + // + // Reclaim occurs on a single thread. So, the relaxed store to used_bytes + // is safe. + resize_[cpu].reclaim_used_bytes.store(used_bytes, + std::memory_order_relaxed); + } +} + +template +int CpuCache::GetUpdatedMaxCapacities( + int start_size_class, PerSizeClassMaxCapacity* max_capacity, + int valid_entries) { + TC_ASSERT_LT(valid_entries, kNumClasses); + // Collect miss stats incurred during the current resize interval for all the + // size classes. + const int num_cpus = NumCPUs(); + absl::FixedArray total_misses(kNumBaseClasses, 0); + int index = 0; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + index = 0; + if (!HasPopulated(cpu)) continue; + for (size_t size_class = start_size_class; + size_class < start_size_class + kNumBaseClasses; ++size_class) { + total_misses[index] += + resize_[cpu].per_class[size_class].GetAndUpdateIntervalMisses( + PerClassMissType::kMaxCapacityTotal, + PerClassMissType::kMaxCapacityResize); + + ++index; + } + } + + absl::FixedArray miss_stats(kNumBaseClasses); + index = 0; + for (size_t size_class = start_size_class; + size_class < start_size_class + kNumBaseClasses; ++size_class) { + miss_stats[index] = SizeClassMissStat{.size_class = size_class, + .misses = total_misses[index]}; + ++index; + } + + // Sort the collected stats to record size classes with largest number of + // misses in the last interval. + std::sort(miss_stats.begin(), miss_stats.end(), + [](SizeClassMissStat a, SizeClassMissStat b) { + // In case of a conflict, prefer growing smaller size classes. + if (a.misses == b.misses) { + return a.size_class < b.size_class; + } + return a.misses > b.misses; + }); + + // Computing number of size classes to resize is a light-weight operation, but + // resizing size classes involves stopping all per-cpu caches, and hence is a + // heavy-weight operation. So, we try to be aggressive in the number of size + // classes we would like to resize when we can, but perform resizing operation + // sparingly. + constexpr int kMaxCapacitiesToGrow = kNumBaseClasses / 2; + + int grown = 0; + int max_capacity_index = valid_entries; + + // We try to grow size class max capacities by batch_size times the growth + // factor. The growth factor starts with 5 times the batch size for the size + // class that suffers the highest misses, and then gradually shrinks to 1 for + // the size class with fifth-highest misses and onwards. There is nothing + // interesting about this factor; it may be tuned in the future to increase or + // decrease the aggresiveness of the growth. + int growth_factor = 5; + // Indices in miss_stats corresponding to the size classes we aim to grow + // and shrink. + int shrink_index = kNumBaseClasses - 1; + for (int grow_index = 0; grow_index < kNumBaseClasses; ++grow_index) { + // If a size class with largest misses is zero, break. Other size classes + // should also have suffered zero misses as well. + if (miss_stats[grow_index].misses == 0) break; + + // We grow a size class by its batch_size, while trying to shrink max + // capacities of other size classes by the same amount. We shrink each + // size classes' max capacity by its batch size too. + const size_t size_class_to_grow = miss_stats[grow_index].size_class; + const int to_grow = forwarder_.num_objects_to_move(size_class_to_grow); + + // max_capacity_index keeps track of number of entries in max_capacity + // that are valid. If we do not find enough size classes to shrink, we + // give up and return early. So, we only `commit` index in max_capacity + // once we find enough size classes to shrink max capacities equal to the + // target. next_capacity_index records a temporary index in max_capacity. + int next_capacity_index = max_capacity_index; + int target = to_grow * growth_factor; + int shrunk = 0; + + // Loop until we found enough capacity from other size classes, or if we run + // out of size classes to shrink. + while (shrink_index > grow_index && target > 0) { + size_t size_class_to_shrink = miss_stats[shrink_index].size_class; + int batch_size = forwarder_.num_objects_to_move(size_class_to_shrink); + size_t cap = + max_capacity_[size_class_to_shrink].load(std::memory_order_relaxed); + --shrink_index; + + // We retain at least batch_size amount of max capacity for a size + // class. + if (cap <= batch_size) continue; + + int to_shrink = std::min(target, batch_size); + // Do not shrink such that max capacity falls below batch_size. + to_shrink = std::min(to_shrink, cap - batch_size); + if (to_shrink == 0) continue; + + max_capacity[next_capacity_index] = PerSizeClassMaxCapacity{ + .size_class = size_class_to_shrink, .max_capacity = cap - to_shrink}; + ++next_capacity_index; + target -= to_shrink; + shrunk += to_shrink; + } + + // We didn't find any size classes that may be shrunk. Break. + if (shrunk == 0) break; + + // Update maximum capacity for the size class we intend to grow by the + // amount we shrunk from other size classes. + size_t cap = + max_capacity_[size_class_to_grow].load(std::memory_order_relaxed); + max_capacity[next_capacity_index] = PerSizeClassMaxCapacity{ + .size_class = size_class_to_grow, .max_capacity = cap + shrunk}; + ++next_capacity_index; + max_capacity_index = next_capacity_index; + + ++grown; + growth_factor = std::max(growth_factor - 1, 1); + // We have enough candidates to grow. Break. + if (grown == kMaxCapacitiesToGrow) break; + } + + return max_capacity_index; +} + +template +void CpuCache::MadviseAwaySlabs(void* slab_addr, size_t slab_size) { + // It is important that we do not MADV_REMOVE the memory, since file-backed + // pages may SIGSEGV/SIGBUS if another thread sees the previous slab after + // this point and reads it. + // + // TODO(b/214241843): we should be able to remove MADV_NOHUGEPAGE once the + // kernel enables huge zero pages. + // Note: we use bitwise OR to avoid short-circuiting. + ErrnoRestorer errno_restorer; + bool madvise_failed = false; + do { + madvise_failed = madvise(slab_addr, slab_size, MADV_NOHUGEPAGE) | + madvise(slab_addr, slab_size, MADV_DONTNEED); + } while (madvise_failed && errno == EAGAIN); + + int ret = 0; + if (madvise_failed) { + // Try to unlock if madvise fails the first time. + do { + ret = munlock(slab_addr, slab_size); + } while (ret == -1 && errno == EAGAIN); + + do { + madvise_failed = madvise(slab_addr, slab_size, MADV_NOHUGEPAGE) | + madvise(slab_addr, slab_size, MADV_DONTNEED); + } while (madvise_failed && errno == EAGAIN); + } + + if (ret != 0 || madvise_failed) { + dynamic_slab_info_.madvise_failed_bytes.fetch_add( + slab_size, std::memory_order_relaxed); + } +} + +template +void CpuCache::ResizeSizeClassMaxCapacities() + ABSL_NO_THREAD_SAFETY_ANALYSIS { + const int num_cpus = NumCPUs(); + const auto& topology = forwarder_.numa_topology(); + + PerSizeClassMaxCapacity new_max_capacities[kNumClasses]; + size_t start_size_class = 0; + int to_update = 0; + + // Obtain candidates to resize for size classes within each NUMA domain. We do + // not resize across NUMA domains. + for (int i = 0; i < topology.active_partitions(); ++i) { + to_update = GetUpdatedMaxCapacities(start_size_class, new_max_capacities, + to_update); + start_size_class += kNumBaseClasses; + } + + // Obtain candidates to resize within expanded size classes. + if (kHasExpandedClasses) { + to_update = GetUpdatedMaxCapacities(start_size_class, new_max_capacities, + to_update); + } + + // Nothing to update. + if (to_update == 0) return; + + uint8_t per_cpu_shift = freelist_.GetShift(); + const auto shift = subtle::percpu::ToShiftType(per_cpu_shift); + const int64_t new_slabs_size = + subtle::percpu::GetSlabsAllocSize(shift, num_cpus); + // Account for impending allocation/reusing of new slab so that we can avoid + // going over memory limit. + forwarder_.ArenaUpdateAllocatedAndNonresident(new_slabs_size, 0); + + int64_t reused_bytes; + ResizeSlabsInfo info; + for (int cpu = 0; cpu < num_cpus; ++cpu) resize_[cpu].lock.Lock(); + uint8_t new_resize_slab_offset = + resize_slab_offset_.load(std::memory_order_relaxed) + 1; + if (new_resize_slab_offset >= kResizeSlabCopies) { + new_resize_slab_offset = 0; + } + resize_slab_offset_.store(new_resize_slab_offset, std::memory_order_relaxed); + + { + // We can't allocate while holding the per-cpu spinlocks. + AllocationGuard enforce_no_alloc; + void* new_slabs; + std::tie(new_slabs, reused_bytes) = AllocOrReuseSlabs( + [&](size_t size, std::align_val_t align) { + return forwarder_.AllocReportedImpending(size, align); + }, + shift, num_cpus, + ShiftOffset(per_cpu_shift, shift_bounds_.initial_shift), + new_resize_slab_offset); + + info = freelist_.UpdateMaxCapacities( + new_slabs, + GetShiftMaxCapacity{max_capacity_, per_cpu_shift, shift_bounds_}, + [this](int size_class, uint16_t cap) { + UpdateMaxCapacity(size_class, cap); + }, + [this](int cpu) { return HasPopulated(cpu); }, + DrainHandler{*this, nullptr}, new_max_capacities, to_update); + } + for (int cpu = 0; cpu < num_cpus; ++cpu) resize_[cpu].lock.Unlock(); + + MadviseAwaySlabs(info.old_slabs, info.old_slabs_size); + const int64_t old_slabs_size = info.old_slabs_size; + forwarder_.ArenaUpdateAllocatedAndNonresident(-old_slabs_size, + old_slabs_size - reused_bytes); +} + +template +inline void CpuCache::ResizeSizeClasses() { + const int num_cpus = NumCPUs(); + // Start resizing from where we left off the last time, and resize size class + // capacities for up to kNumCpuCachesToResize per-cpu caches. + int cpu = last_cpu_size_class_resize_.load(std::memory_order_relaxed); + int num_cpus_resized = 0; + + // Record the cumulative misses for the caches so that we can select the + // size classes with the highest misses as the candidates to resize. + for (int cpu_offset = 0; cpu_offset < num_cpus; ++cpu_offset) { + if (++cpu >= num_cpus) { + cpu = 0; + } + TC_ASSERT_GE(cpu, 0); + TC_ASSERT_LT(cpu, num_cpus); + + // Nothing to resize if the cache is not populated. + if (!HasPopulated(cpu)) { + continue; + } + + ResizeCpuSizeClasses(cpu); + + // Record full stats in previous full stat counters so that we can collect + // stats per interval. + for (size_t size_class = 1; size_class < kNumClasses; ++size_class) { + resize_[cpu].per_class[size_class].UpdateIntervalMisses( + PerClassMissType::kCapacityTotal, PerClassMissType::kCapacityResize); + } + + if (++num_cpus_resized >= kNumCpuCachesToResize) break; + } + // Record the cpu hint for which the size classes were resized so that we + // can start from the subsequent cpu in the next interval. + last_cpu_size_class_resize_.store(cpu, std::memory_order_relaxed); +} + +template +void CpuCache::ResizeCpuSizeClasses(int cpu) { + if (resize_[cpu].available.load(std::memory_order_relaxed) >= + kMaxCpuCacheSize) { + // We still have enough available capacity, so all size classes can just + // grow as they see fit. + return; + } + + absl::FixedArray miss_stats(kNumClasses - 1); + for (size_t size_class = 1; size_class < kNumClasses; ++size_class) { + miss_stats[size_class - 1] = SizeClassMissStat{ + .size_class = size_class, + .misses = resize_[cpu].per_class[size_class].GetIntervalMisses( + PerClassMissType::kCapacityTotal, + PerClassMissType::kCapacityResize)}; + } + + // Sort the collected stats to record size classes with largest number of + // misses in the last interval. + std::sort(miss_stats.begin(), miss_stats.end(), + [](SizeClassMissStat a, SizeClassMissStat b) { + // In case of a conflict, prefer growing smaller size classes. + if (a.misses == b.misses) { + return a.size_class < b.size_class; + } + return a.misses > b.misses; + }); + + size_t available = + resize_[cpu].available.exchange(0, std::memory_order_relaxed); + size_t num_resizes = 0; + { + AllocationGuardSpinLockHolder h(&resize_[cpu].lock); + subtle::percpu::ScopedSlabCpuStop cpu_stop(freelist_, cpu); + const auto max_capacity = GetMaxCapacityFunctor(freelist_.GetShift()); + size_t size_classes_to_resize = 5; + TC_ASSERT_LT(size_classes_to_resize, kNumClasses); + for (size_t i = 0; i < size_classes_to_resize; ++i) { + // If a size class with largest misses is zero, break. Other size classes + // should also have suffered zero misses as well. + if (miss_stats[i].misses == 0) break; + const size_t size_class_to_grow = miss_stats[i].size_class; + + // If we are already at a maximum capacity, nothing to grow. + const ssize_t can_grow = max_capacity(size_class_to_grow) - + freelist_.Capacity(cpu, size_class_to_grow); + // can_grow can be negative only if slabs were resized, + // but since we hold resize_[cpu].lock it must not happen. + TC_ASSERT_GE(can_grow, 0); + if (can_grow <= 0) { + // If one of the highest miss classes is already at the max capacity, + // we need to try to grow more classes. Otherwise, if first 5 are at + // max capacity, resizing will stop working. + if (size_classes_to_resize < kNumClasses) { + size_classes_to_resize++; + } + continue; + } + + num_resizes++; + + size_t size = forwarder_.class_to_size(size_class_to_grow); + // Get total bytes to steal from other size classes. We would like to grow + // the capacity of the size class by a batch size. + const size_t need_bytes = + std::min(can_grow, + forwarder_.num_objects_to_move(size_class_to_grow)) * + size; + const ssize_t to_steal_bytes = need_bytes - available; + if (to_steal_bytes > 0) { + available += StealCapacityForSizeClassWithinCpu( + cpu, {miss_stats.begin(), size_classes_to_resize}, to_steal_bytes); + } + size_t capacity_acquired = std::min(can_grow, available / size); + if (capacity_acquired != 0) { + size_t got = freelist_.GrowOtherCache( + cpu, size_class_to_grow, capacity_acquired, [&](uint8_t shift) { + return GetMaxCapacity(size_class_to_grow, shift); + }); + available -= got * size; + } + } + } + resize_[cpu].available.fetch_add(available, std::memory_order_relaxed); + resize_[cpu].num_size_class_resizes.fetch_add(num_resizes, + std::memory_order_relaxed); +} + +template +inline void CpuCache::ShuffleCpuCaches() { + // Knobs that we can potentially tune depending on the workloads. + constexpr double kBytesToStealPercent = 5.0; + constexpr int kMaxNumStealCpus = 5; + + const int num_cpus = NumCPUs(); + absl::FixedArray misses(num_cpus); + + // Record the cumulative misses for the caches so that we can select the + // caches with the highest misses as the candidates to steal the cache for. + int max_populated_cpu = -1; + int num_populated_cpus = 0; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + if (!HasPopulated(cpu)) { + continue; + } + const CpuCacheMissStats miss_stats = + GetIntervalCacheMissStats(cpu, MissCount::kShuffle); + misses[num_populated_cpus] = {cpu, + miss_stats.underflows + miss_stats.overflows}; + max_populated_cpu = cpu; + ++num_populated_cpus; + } + if (max_populated_cpu == -1) { + return; + } + + // Sorts misses to identify cpus with highest misses. + // + // TODO(vgogte): We can potentially sort the entire misses array and use that + // in StealFromOtherCache to determine cpus to steal from. That is, [0, + // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus) + // may be cpus we may steal from. We can iterate through the array in a + // descending order to steal from them. The upside of this mechanism is that + // we would be able to do a more fair stealing, starting with cpus with lowest + // misses. The downside of this mechanism is that we would have to sort the + // entire misses array. This might be compute intensive on servers with high + // number of cpus (eg. Rome, Milan). We need to investigate the compute + // required to implement this. + const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus); + std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus, + misses.begin() + num_populated_cpus, + [](CpuMissStat a, CpuMissStat b) { + if (a.misses == b.misses) { + return a.cpu < b.cpu; + } + return a.misses > b.misses; + }); + + // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for + // each destination cpu cache. + size_t to_steal = kBytesToStealPercent / 100.0 * CacheLimit(); + for (int i = 0; i < num_dest_cpus; ++i) { + if (misses[i].misses == 0) { + break; + } + absl::Span skip = {misses.begin(), static_cast(i + 1)}; + StealFromOtherCache(misses[i].cpu, max_populated_cpu, skip, to_steal); + } + + // Takes a snapshot of underflows and overflows at the end of this interval + // so that we can calculate the misses that occurred in the next interval. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + UpdateIntervalCacheMissStats(cpu, MissCount::kShuffle); + } +} + +template +inline void CpuCache::StealFromOtherCache( + int cpu, int max_populated_cpu, absl::Span skip_cpus, + size_t bytes) { + constexpr double kCacheMissThreshold = 0.80; + + const CpuCacheMissStats dest_misses = + GetIntervalCacheMissStats(cpu, MissCount::kShuffle); + + if (resize_[cpu].available.load(std::memory_order_relaxed) >= kMaxSize) { + // We still have enough available capacity, so all size classes can just + // grow as they see fit. + return; + } + + size_t acquired = 0; + + // We use next_cpu_cache_steal_ as a hint to start our search for cpu ids to + // steal from so that we can iterate through the cpus in a nice round-robin + // fashion. + int src_cpu = next_cpu_cache_steal_; + + // We iterate through max_populate_cpus number of cpus to steal from. + // max_populate_cpus records the max cpu id that has been populated. Note + // that, any intermediate changes since the max_populated_cpus was measured + // may have populated higher cpu ids, but we do not include those in the + // search. The approximation prevents us from doing another pass through the + // cpus to just find the latest populated cpu id. + // + // We break from the loop once we iterate through all the cpus once, or if the + // total number of acquired bytes is higher than or equal to the desired bytes + // we want to steal. + for (int i = 0; i <= max_populated_cpu && acquired < bytes; ++i, ++src_cpu) { + if (src_cpu > max_populated_cpu) { + src_cpu = 0; + } + TC_ASSERT_LE(0, src_cpu); + TC_ASSERT_LE(src_cpu, max_populated_cpu); + + // We do not steal from the CPUs we want to grow. Maybe we can explore + // combining this with stealing from the same CPU later. + bool skip = false; + for (auto dest : skip_cpus) { + if (src_cpu == dest.cpu) { + skip = true; + break; + } + } + if (skip) continue; + + // We do not steal from the cache that hasn't been populated yet. + if (!HasPopulated(src_cpu)) continue; + + // We do not steal from cache that has capacity less than our lower + // capacity threshold. + if (Capacity(src_cpu) < kCacheCapacityThreshold * CacheLimit()) continue; + + const CpuCacheMissStats src_misses = + GetIntervalCacheMissStats(src_cpu, MissCount::kShuffle); + + // If underflows and overflows from the source cpu are higher, we do not + // steal from that cache. We consider the cache as a candidate to steal from + // only when its misses are lower than 0.8x that of the dest cache. + if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows || + src_misses.overflows > kCacheMissThreshold * dest_misses.overflows) + continue; + + // Try to steal available capacity from the target cpu, if any. + // This is cheaper than remote slab operations. + size_t stolen = + subtract_at_least(&resize_[src_cpu].available, 0, bytes - acquired); + if (stolen != 0) { + resize_[src_cpu].capacity.fetch_sub(stolen, std::memory_order_relaxed); + acquired += stolen; + if (acquired >= bytes) { + continue; + } + } + + AllocationGuardSpinLockHolder h(&resize_[src_cpu].lock); + subtle::percpu::ScopedSlabCpuStop cpu_stop(freelist_, src_cpu); + size_t source_size_class = resize_[src_cpu].next_steal; + for (size_t i = 1; i < kNumClasses; ++i, ++source_size_class) { + if (source_size_class >= kNumClasses) { + source_size_class = 1; + } + if (size_t stolen = ShrinkOtherCache(src_cpu, source_size_class)) { + resize_[src_cpu].capacity.fetch_sub(stolen, std::memory_order_relaxed); + acquired += stolen; + if (acquired >= bytes) { + break; + } + } + } + resize_[src_cpu].next_steal = source_size_class; + } + // Record the last cpu id we stole from, which would provide a hint to the + // next time we iterate through the cpus for stealing. + next_cpu_cache_steal_ = src_cpu; + + // Increment the capacity of the destination cpu cache by the amount of bytes + // acquired from source caches. + if (acquired) { + resize_[cpu].available.fetch_add(acquired, std::memory_order_relaxed); + resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed); + } +} + +template +size_t CpuCache::ShrinkOtherCache(int cpu, size_t size_class) { + TC_ASSERT(cpu >= 0 && cpu < NumCPUs(), "cpu=%d", cpu); + TC_ASSERT(size_class >= 1 && size_class < kNumClasses); + TC_ASSERT(resize_[cpu].lock.IsHeld()); + const size_t capacity = freelist_.Capacity(cpu, size_class); + if (capacity == 0) { + return 0; // Nothing to steal. + } + + const size_t length = freelist_.Length(cpu, size_class); + const size_t batch_length = forwarder_.num_objects_to_move(size_class); + size_t size = forwarder_.class_to_size(size_class); + + // Clock-like algorithm to prioritize size classes for shrinking. + // + // Each size class has quiescent ticks counter which is incremented as we + // pass it, the counter is reset to 0 in UpdateCapacity on grow. + // If the counter value is 0, then we've just tried to grow the size class, + // so it makes little sense to shrink it back. The higher counter value + // the longer ago we grew the list and the more probable it is that + // the full capacity is unused. + // + // Then, we calculate "shrinking score", the higher the score the less we + // we want to shrink this size class. The score is considerably skewed + // towards larger size classes: smaller classes are usually used more + // actively and we also benefit less from shrinking smaller classes (steal + // less capacity). Then, we also avoid shrinking full freelists as we will + // need to evict an object and then go to the central freelist to return it. + // Then, we also avoid shrinking freelists that are just above batch size, + // because shrinking them will disable transfer cache. + // + // Finally, we shrink if the ticks counter is >= the score. + uint32_t score = 0; + // Note: the following numbers are based solely on intuition, common sense + // and benchmarking results. + if (size <= 144) { + score = 2 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= 1024) { + score = 1 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= (64 << 10)) { + score = (length >= capacity); + } + if (resize_[cpu].per_class[size_class].Tick() < score) { + return 0; + } + + // Finally, try to shrink. + if (!freelist_.ShrinkOtherCache( + cpu, size_class, /*len=*/1, + [this](size_t size_class, void** batch, size_t count) { + TC_ASSERT_EQ(count, 1); + ReleaseToBackingCache(size_class, {batch, count}); + })) { + return 0; + } + return size; +} + +template +inline size_t CpuCache::StealCapacityForSizeClassWithinCpu( + int cpu, absl::Span dest_size_classes, size_t bytes) { + // Steal from other sizeclasses. Try to go in a nice circle. + // Complicated by sizeclasses actually being 1-indexed. + size_t acquired = 0; + size_t source_size_class = resize_[cpu].next_steal; + for (size_t i = 1; i < kNumClasses; ++i, ++source_size_class) { + if (source_size_class >= kNumClasses) { + source_size_class = 1; + } + // Decide if we want to steal source_size_class. + // Don't shrink classes we want to grow. + bool skip = false; + for (auto dest : dest_size_classes) { + if (source_size_class == dest.size_class && dest.misses != 0) { + skip = true; + break; + } + } + if (skip) { + continue; + } + acquired += ShrinkOtherCache(cpu, source_size_class); + if (acquired >= bytes) { + // can't steal any more or don't need to + break; + } + } + // update the hint + resize_[cpu].next_steal = source_size_class; + return acquired; +} + +template +void CpuCache::DeallocateSlow(void* ptr, size_t size_class) { + DeallocateSlowNoHooks(ptr, size_class); + MaybeForceSlowPath(); +} + +template +void CpuCache::DeallocateSlowNoHooks(void* ptr, size_t size_class) { + if (BypassCpuCache(size_class)) { + return forwarder_.sharded_transfer_cache().Push(size_class, ptr); + } + auto [cpu, cached] = CacheCpuSlab(); + if (ABSL_PREDICT_FALSE(cached)) { + if (ABSL_PREDICT_FALSE(cpu < 0)) { + // The cpu is stopped. + return ReleaseToBackingCache(size_class, {&ptr, 1}); + } + if (DeallocateFast(ptr, size_class)) { + return; + } + } + RecordCacheMissStat(cpu, false); + const size_t target = UpdateCapacity(cpu, size_class, true); + size_t total = 0; + size_t count = 1; + void* batch[kMaxObjectsToMove]; + batch[0] = ptr; + do { + size_t want = std::min(kMaxObjectsToMove, target - total); + if (count < want) { + count += freelist_.PopBatch(size_class, batch + count, want - count); + } + if (!count) break; + + total += count; + ReleaseToBackingCache(size_class, absl::Span(batch, count)); + if (count != kMaxObjectsToMove) break; + count = 0; + } while (total < target); +} + +template +inline uint64_t CpuCache::Allocated(int target_cpu) const { + TC_ASSERT_GE(target_cpu, 0); + if (!HasPopulated(target_cpu)) { + return 0; + } + + uint64_t total = 0; + for (int size_class = 1; size_class < kNumClasses; size_class++) { + int size = forwarder_.class_to_size(size_class); + total += size * freelist_.Capacity(target_cpu, size_class); + } + return total; +} + +template +inline uint64_t CpuCache::UsedBytes(int target_cpu) const { + TC_ASSERT_GE(target_cpu, 0); + if (!HasPopulated(target_cpu)) { + return 0; + } + + uint64_t total = 0; + for (int size_class = 1; size_class < kNumClasses; size_class++) { + int size = forwarder_.class_to_size(size_class); + total += size * freelist_.Length(target_cpu, size_class); + } + return total; +} + +template +inline bool CpuCache::HasPopulated(int target_cpu) const { + TC_ASSERT_GE(target_cpu, 0); + return resize_[target_cpu].populated.load(std::memory_order_relaxed); +} + +template +inline PerCPUMetadataState CpuCache::MetadataMemoryUsage() const { + return freelist_.MetadataMemoryUsage(); +} + +template +inline uint64_t CpuCache::TotalUsedBytes() const { + uint64_t total = 0; + for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { + total += UsedBytes(cpu); + } + return total; +} + +template +inline uint64_t CpuCache::TotalObjectsOfClass( + size_t size_class) const { + TC_ASSERT_LT(size_class, kNumClasses); + uint64_t total_objects = 0; + if (size_class > 0) { + for (int cpu = 0, n = NumCPUs(); cpu < n; cpu++) { + if (!HasPopulated(cpu)) { + continue; + } + total_objects += freelist_.Length(cpu, size_class); + } + } + return total_objects; +} + +template +inline uint64_t CpuCache::Unallocated(int cpu) const { + return resize_[cpu].available.load(std::memory_order_relaxed); +} + +template +inline uint64_t CpuCache::Capacity(int cpu) const { + return resize_[cpu].capacity.load(std::memory_order_relaxed); +} + +template +inline uint64_t CpuCache::CacheLimit() const { + return max_per_cpu_cache_size_.load(std::memory_order_relaxed); +} + +template +inline void CpuCache::SetCacheLimit(uint64_t v) { + // TODO(b/179516472): Drain cores as required. + max_per_cpu_cache_size_.store(v, std::memory_order_relaxed); +} + +template +struct DrainHandler { + void operator()(int cpu, size_t size_class, void** batch, size_t count, + size_t cap) const { + const size_t size = cache.forwarder_.class_to_size(size_class); + const size_t batch_length = + cache.forwarder_.num_objects_to_move(size_class); + if (bytes != nullptr) *bytes += count * size; + // Drain resets capacity to 0, so return the allocated capacity to that + // CPU's slack. + cache.resize_[cpu].available.fetch_add(cap * size, + std::memory_order_relaxed); + for (size_t i = 0; i < count; i += batch_length) { + size_t n = std::min(batch_length, count - i); + cache.ReleaseToBackingCache(size_class, absl::Span(batch + i, n)); + } + } + + CpuCache& cache; + uint64_t* bytes; +}; + +template +inline uint64_t CpuCache::Reclaim(int cpu) { + AllocationGuardSpinLockHolder h(&resize_[cpu].lock); + + // If we haven't populated this core, freelist_.Drain() will touch the memory + // (for writing) as part of its locking process. Avoid faulting new pages as + // part of a release process. + if (!HasPopulated(cpu)) { + return 0; + } + + uint64_t bytes = 0; + freelist_.Drain(cpu, DrainHandler{*this, &bytes}); + + // Record that the reclaim occurred for this CPU. + resize_[cpu].num_reclaims.store( + resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1, + std::memory_order_relaxed); + resize_[cpu].last_reclaim.store(absl::base_internal::CycleClock::Now(), + std::memory_order_relaxed); + + return bytes; +} +template +inline uint64_t CpuCache::GetNumResizes(int cpu) const { + return resize_[cpu].num_size_class_resizes.load(std::memory_order_relaxed); +} + +template +inline uint64_t CpuCache::GetNumResizes() const { + uint64_t resizes = 0; + const int num_cpus = NumCPUs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) + resizes += + resize_[cpu].num_size_class_resizes.load(std::memory_order_relaxed); + return resizes; +} + +template +inline uint64_t CpuCache::GetNumReclaims(int cpu) const { + return resize_[cpu].num_reclaims.load(std::memory_order_relaxed); +} + +template +inline uint64_t CpuCache::GetNumReclaims() const { + uint64_t reclaims = 0; + const int num_cpus = NumCPUs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) + reclaims += resize_[cpu].num_reclaims.load(std::memory_order_relaxed); + return reclaims; +} + +template +inline std::pair CpuCache::AllocOrReuseSlabs( + absl::FunctionRef alloc, + subtle::percpu::Shift shift, int num_cpus, uint8_t shift_offset, + uint8_t resize_offset) { + TC_ASSERT_LT(resize_offset, kResizeSlabCopies); + TC_ASSERT_LT(shift_offset, kNumPossiblePerCpuShifts); + int slab_offset = kNumPossiblePerCpuShifts * resize_offset + shift_offset; + TC_ASSERT_LT(slab_offset, kTotalPossibleSlabs); + void*& reused_slabs = slabs_by_shift_[slab_offset]; + const size_t size = GetSlabsAllocSize(shift, num_cpus); + const bool can_reuse = reused_slabs != nullptr; + if (can_reuse) { + // Enable huge pages for reused slabs. + // TODO(b/214241843): we should be able to remove this once the kernel + // enables huge zero pages. + ErrnoRestorer errno_restorer; + madvise(reused_slabs, size, MADV_HUGEPAGE); + } else { + reused_slabs = alloc(size, subtle::percpu::kPhysicalPageAlign); + // MSan does not see writes in assembly. + ANNOTATE_MEMORY_IS_INITIALIZED(reused_slabs, size); + } + return {reused_slabs, can_reuse ? size : 0}; +} + +template +inline typename CpuCache::DynamicSlabResize +CpuCache::ShouldResizeSlab() { + const int num_cpus = NumCPUs(); + CpuCacheMissStats total_misses{}; + DynamicSlabResize resize = DynamicSlabResize::kNoop; + const bool wider_slabs_enabled = UseWiderSlabs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + CpuCacheMissStats misses = + GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kSlabResize); + total_misses += misses; + + // If overflows to underflows ratio exceeds the threshold, grow the slab. + // Increase counts by 1 during comparison so that we can still compare the + // ratio to the threshold when underflows is zero. + if (misses.overflows + 1 > + (misses.underflows + 1) * + forwarder_.per_cpu_caches_dynamic_slab_grow_threshold()) { + resize = DynamicSlabResize::kGrow; + } + } + + // When wider slabs featuee is enabled, we try to grow slabs when the + // condition for at least one cpu cache is met. Else, we use total misses to + // figure out whether to grow the slab, shrink it, or do nothing. + if (wider_slabs_enabled && resize == DynamicSlabResize::kGrow) { + return resize; + } + + // As a simple heuristic, we decide to grow if the total number of overflows + // is large compared to total number of underflows during the growth period. + // If the slab size was infinite, we would expect 0 overflows. If the slab + // size was 0, we would expect approximately equal numbers of underflows and + // overflows. + if (total_misses.overflows + 1 > + (total_misses.underflows + 1) * + forwarder_.per_cpu_caches_dynamic_slab_grow_threshold()) { + return DynamicSlabResize::kGrow; + } else if (total_misses.overflows < + total_misses.underflows * + forwarder_.per_cpu_caches_dynamic_slab_shrink_threshold()) { + return DynamicSlabResize::kShrink; + } + + return DynamicSlabResize::kNoop; +} + +template +void CpuCache::ResizeSlabIfNeeded() ABSL_NO_THREAD_SAFETY_ANALYSIS { + uint8_t per_cpu_shift = freelist_.GetShift(); + + const int num_cpus = NumCPUs(); + const DynamicSlabResize resize = ShouldResizeSlab(); + + if (resize == DynamicSlabResize::kGrow) { + if (per_cpu_shift == shift_bounds_.max_shift) return; + ++per_cpu_shift; + dynamic_slab_info_ + .grow_count[ShiftOffset(per_cpu_shift, shift_bounds_.initial_shift)] + .fetch_add(1, std::memory_order_relaxed); + } else if (resize == DynamicSlabResize::kShrink) { + if (per_cpu_shift == shift_bounds_.initial_shift) return; + --per_cpu_shift; + dynamic_slab_info_ + .shrink_count[ShiftOffset(per_cpu_shift, shift_bounds_.initial_shift)] + .fetch_add(1, std::memory_order_relaxed); + } else { + return; + } + + const auto new_shift = subtle::percpu::ToShiftType(per_cpu_shift); + const int64_t new_slabs_size = + subtle::percpu::GetSlabsAllocSize(new_shift, num_cpus); + // Account for impending allocation/reusing of new slab so that we can avoid + // going over memory limit. + forwarder_.ArenaUpdateAllocatedAndNonresident(new_slabs_size, 0); + + for (int cpu = 0; cpu < num_cpus; ++cpu) resize_[cpu].lock.Lock(); + ResizeSlabsInfo info; + const uint8_t resize_offset = + resize_slab_offset_.load(std::memory_order_relaxed); + int64_t reused_bytes; + { + // We can't allocate while holding the per-cpu spinlocks. + AllocationGuard enforce_no_alloc; + + void* new_slabs; + std::tie(new_slabs, reused_bytes) = AllocOrReuseSlabs( + [&](size_t size, std::align_val_t align) { + return forwarder_.AllocReportedImpending(size, align); + }, + new_shift, num_cpus, + ShiftOffset(per_cpu_shift, shift_bounds_.initial_shift), resize_offset); + info = freelist_.ResizeSlabs( + new_shift, new_slabs, + GetShiftMaxCapacity{max_capacity_, per_cpu_shift, shift_bounds_}, + [this](int cpu) { return HasPopulated(cpu); }, + DrainHandler{*this, nullptr}); + } + for (int cpu = 0; cpu < num_cpus; ++cpu) resize_[cpu].lock.Unlock(); + + MadviseAwaySlabs(info.old_slabs, info.old_slabs_size); + const int64_t old_slabs_size = info.old_slabs_size; + forwarder_.ArenaUpdateAllocatedAndNonresident(-old_slabs_size, + old_slabs_size - reused_bytes); +} + +template +inline void CpuCache::RecordCacheMissStat(const int cpu, + const bool is_alloc) { + MissCounts& misses = + is_alloc ? resize_[cpu].underflows : resize_[cpu].overflows; + auto& c = misses[MissCount::kTotal]; + c.store(c.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); +} + +template +inline typename CpuCache::CpuCacheMissStats +CpuCache::GetTotalCacheMissStats(int cpu) const { + CpuCacheMissStats stats; + stats.underflows = resize_[cpu].underflows[MissCount::kTotal].load( + std::memory_order_relaxed); + stats.overflows = + resize_[cpu].overflows[MissCount::kTotal].load(std::memory_order_relaxed); + return stats; +} + +template +inline typename CpuCache::CpuCacheMissStats +CpuCache::GetTotalCacheMissStats() const { + CpuCacheMissStats stats; + const int num_cpus = NumCPUs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) stats += GetTotalCacheMissStats(cpu); + return stats; +} + +template +inline typename CpuCache::CpuCacheMissStats +CpuCache::GetIntervalCacheMissStats(int cpu, + MissCount miss_count) const { + TC_ASSERT_NE(miss_count, MissCount::kTotal); + TC_ASSERT_LT(miss_count, MissCount::kNumCounts); + const auto get_safe_miss_diff = [miss_count](MissCounts& misses) { + const size_t total_misses = + misses[MissCount::kTotal].load(std::memory_order_relaxed); + const size_t interval_misses = + misses[miss_count].load(std::memory_order_relaxed); + // In case of a size_t overflow, we wrap around to 0. + return total_misses > interval_misses ? total_misses - interval_misses : 0; + }; + return {get_safe_miss_diff(resize_[cpu].underflows), + get_safe_miss_diff(resize_[cpu].overflows)}; +} + +template +void CpuCache::UpdateIntervalCacheMissStats(int cpu, + MissCount miss_count) { + CpuCacheMissStats total_stats = GetTotalCacheMissStats(cpu); + // Takes a snapshot of misses at the end of this interval so that we can + // calculate the misses that occurred in the next interval. + // + // Interval updates occur on a single thread so relaxed stores to interval + // miss stats are safe. + resize_[cpu].underflows[miss_count].store(total_stats.underflows, + std::memory_order_relaxed); + resize_[cpu].overflows[miss_count].store(total_stats.overflows, + std::memory_order_relaxed); +} + +template +inline typename CpuCache::CpuCacheMissStats +CpuCache::GetAndUpdateIntervalCacheMissStats(int cpu, + MissCount miss_count) { + // Note: it's possible for cache misses to occur between these two calls, but + // there's likely to be few of them so we don't handle them specially. + CpuCacheMissStats interval_stats = GetIntervalCacheMissStats(cpu, miss_count); + UpdateIntervalCacheMissStats(cpu, miss_count); + return interval_stats; +} + +template +size_t CpuCache::GetIntervalSizeClassMisses( + int cpu, size_t size_class, PerClassMissType total_type, + PerClassMissType interval_type) { + return resize_[cpu].per_class[size_class].GetIntervalMisses(total_type, + interval_type); +} + +template +inline typename CpuCache::SizeClassCapacityStats +CpuCache::GetSizeClassCapacityStats(size_t size_class) const { + SizeClassCapacityStats stats; + int num_populated = 0; + // We use a local variable here, instead of directly updating min_capacity in + // SizeClassCapacityStats struct to make sure we do not end up with SIZE_MAX + // in stats.min_capacity when num_populated is equal to zero. + size_t min_capacity = SIZE_MAX; + const double now = absl::base_internal::CycleClock::Now(); + const double frequency = absl::base_internal::CycleClock::Frequency(); + + // Scan through all per-CPU caches and calculate minimum, average and maximum + // capacities for the size class across all the populated caches. + for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { + // We do not include stats for non-populated cpus in our average. + if (!HasPopulated(cpu)) { + continue; + } + + ++num_populated; + + const auto last_reclaim = + resize_[cpu].last_reclaim.load(std::memory_order_relaxed); + + const auto last_underflow_cycles = + resize_[cpu].last_miss_cycles[0][size_class].load( + std::memory_order_relaxed); + const auto last_overflow_cycles = + resize_[cpu].last_miss_cycles[1][size_class].load( + std::memory_order_relaxed); + + size_t cap = freelist_.Capacity(cpu, size_class); + stats.max_capacity = std::max(stats.max_capacity, cap); + min_capacity = std::min(min_capacity, cap); + stats.avg_capacity += cap; + + if (last_reclaim >= last_underflow_cycles || + last_reclaim >= last_overflow_cycles) { + // Don't consider the underflow/overflow time on this CPU if we have + // recently reclaimed. + continue; + } + + if (cap == 0) { + // Or if the capacity is empty. We may simply not be allocating this size + // class. + continue; + } + + const absl::Duration last_underflow = + absl::Seconds((now - last_underflow_cycles) / frequency); + const absl::Duration last_overflow = + absl::Seconds((now - last_overflow_cycles) / frequency); + + if (last_overflow < stats.min_last_overflow) { + stats.min_last_overflow = last_overflow; + stats.min_last_overflow_cpu_id = cpu; + } + if (last_overflow > stats.max_last_overflow) { + stats.max_last_overflow = last_overflow; + stats.max_last_overflow_cpu_id = cpu; + } + if (last_underflow < stats.min_last_underflow) { + stats.min_last_underflow = last_underflow; + stats.min_last_underflow_cpu_id = cpu; + } + if (last_underflow > stats.max_last_underflow) { + stats.max_last_underflow = last_underflow; + stats.max_last_underflow_cpu_id = cpu; + } + stats.max_capacity_misses += + resize_[cpu].per_class[size_class].GetIntervalMisses( + PerClassMissType::kMaxCapacityTotal, + PerClassMissType::kMaxCapacityResize); + } + if (num_populated > 0) { + stats.avg_capacity /= num_populated; + stats.min_capacity = min_capacity; + } + return stats; +} + +template +inline void CpuCache::Print(Printer& out) const { + out.printf("------------------------------------------------\n"); + out.printf("Bytes in per-CPU caches (per cpu limit: %u bytes)\n", + CacheLimit()); + out.printf("------------------------------------------------\n"); + + const CpuSet allowed_cpus = FillActiveCpuMask(); + const int num_cpus = NumCPUs(); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + static constexpr double MiB = 1048576.0; + + uint64_t rbytes = UsedBytes(cpu); + bool populated = HasPopulated(cpu); + uint64_t unallocated = Unallocated(cpu); + out.printf( + "cpu %3d: %12u" + " bytes (%7.1f MiB) with" + "%12u bytes unallocated %s%s\n", + cpu, rbytes, rbytes / MiB, unallocated, + allowed_cpus.IsSet(cpu) ? " active" : "", + populated ? " populated" : ""); + } + + out.printf("------------------------------------------------\n"); + out.printf("Size class capacity statistics in per-cpu caches\n"); + out.printf("------------------------------------------------\n"); + + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + SizeClassCapacityStats stats = GetSizeClassCapacityStats(size_class); + out.printf( + "class %3d [ %8zu bytes ] : " + "%6zu (minimum), %7.1f (average), %6zu (maximum), %6zu maximum " + "allowed capacity, " + "maximum capacity misses %8zu, " + "(underflow: [%d us CPU %d, %d us CPU %d]; " + "overflow [%d us CPU %d, %d us CPU %d]\n", + size_class, forwarder_.class_to_size(size_class), stats.min_capacity, + stats.avg_capacity, stats.max_capacity, + GetMaxCapacity(size_class, freelist_.GetShift()), + stats.max_capacity_misses, + absl::ToInt64Microseconds(stats.min_last_underflow), + stats.min_last_underflow_cpu_id, + absl::ToInt64Microseconds(stats.max_last_underflow), + stats.max_last_underflow_cpu_id, + absl::ToInt64Microseconds(stats.min_last_overflow), + stats.min_last_overflow_cpu_id, + absl::ToInt64Microseconds(stats.max_last_overflow), + stats.max_last_overflow_cpu_id); + } + + out.printf("------------------------------------------------\n"); + out.printf("Number of per-CPU cache underflows, overflows, and reclaims\n"); + out.printf("------------------------------------------------\n"); + const auto print_miss_stats = [&out](CpuCacheMissStats miss_stats, + uint64_t reclaims, uint64_t resizes) { + out.printf( + "%12u underflows," + "%12u overflows, overflows / underflows: %5.2f, " + "%12u reclaims," + "%12u resizes\n", + miss_stats.underflows, miss_stats.overflows, + safe_div(miss_stats.overflows, miss_stats.underflows), reclaims, + resizes); + }; + out.printf("Total :"); + print_miss_stats(GetTotalCacheMissStats(), GetNumReclaims(), GetNumResizes()); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + out.printf("cpu %3d:", cpu); + print_miss_stats(GetTotalCacheMissStats(cpu), GetNumReclaims(cpu), + GetNumResizes(cpu)); + } + + out.printf("------------------------------------------------\n"); + out.printf("Per-CPU cache slab resizing info:\n"); + out.printf("------------------------------------------------\n"); + uint8_t current_shift = freelist_.GetShift(); + out.printf("Current shift: %3d (slab size: %4d KiB)\n", current_shift, + (1 << current_shift) / 1024); + for (int shift = 0; shift < kNumPossiblePerCpuShifts; ++shift) { + out.printf("shift %3d:", shift + shift_bounds_.initial_shift); + out.printf( + "%12u growths, %12u shrinkages\n", + dynamic_slab_info_.grow_count[shift].load(std::memory_order_relaxed), + dynamic_slab_info_.shrink_count[shift].load(std::memory_order_relaxed)); + } + out.printf( + "%12u bytes for which MADVISE_DONTNEED failed\n", + dynamic_slab_info_.madvise_failed_bytes.load(std::memory_order_relaxed)); +} + +template +inline void CpuCache::PrintInPbtxt(PbtxtRegion& region) const { + const CpuSet allowed_cpus = FillActiveCpuMask(); + + for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { + PbtxtRegion entry = region.CreateSubRegion("cpu_cache"); + uint64_t rbytes = UsedBytes(cpu); + bool populated = HasPopulated(cpu); + uint64_t unallocated = Unallocated(cpu); + CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); + uint64_t reclaims = GetNumReclaims(cpu); + uint64_t resizes = GetNumResizes(cpu); + entry.PrintI64("cpu", cpu); + entry.PrintI64("used", rbytes); + entry.PrintI64("unused", unallocated); + entry.PrintBool("active", allowed_cpus.IsSet(cpu)); + entry.PrintBool("populated", populated); + entry.PrintI64("underflows", miss_stats.underflows); + entry.PrintI64("overflows", miss_stats.overflows); + entry.PrintI64("reclaims", reclaims); + entry.PrintI64("size_class_resizes", resizes); + } + + // Record size class capacity statistics. + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + SizeClassCapacityStats stats = GetSizeClassCapacityStats(size_class); + PbtxtRegion entry = region.CreateSubRegion("size_class_capacity"); + entry.PrintI64("sizeclass", forwarder_.class_to_size(size_class)); + entry.PrintI64("min_capacity", stats.min_capacity); + entry.PrintDouble("avg_capacity", stats.avg_capacity); + entry.PrintI64("max_capacity", stats.max_capacity); + entry.PrintI64("max_allowed_capacity", + GetMaxCapacity(size_class, freelist_.GetShift())); + + entry.PrintI64("min_last_underflow_ns", + absl::ToInt64Nanoseconds(stats.min_last_underflow)); + entry.PrintI64("max_last_underflow_ns", + absl::ToInt64Nanoseconds(stats.max_last_underflow)); + entry.PrintI64("min_last_overflow_ns", + absl::ToInt64Nanoseconds(stats.min_last_overflow)); + entry.PrintI64("max_last_overflow_ns", + absl::ToInt64Nanoseconds(stats.max_last_overflow)); + entry.PrintI64("max_capacity_misses", stats.max_capacity_misses); + } + + // Record dynamic slab statistics. + region.PrintI64("dynamic_per_cpu_slab_size", 1 << freelist_.GetShift()); + for (int shift = 0; shift < kNumPossiblePerCpuShifts; ++shift) { + PbtxtRegion entry = region.CreateSubRegion("dynamic_slab"); + entry.PrintI64("shift", shift + shift_bounds_.initial_shift); + entry.PrintI64("grow_count", dynamic_slab_info_.grow_count[shift].load( + std::memory_order_relaxed)); + entry.PrintI64("shrink_count", dynamic_slab_info_.shrink_count[shift].load( + std::memory_order_relaxed)); + } + region.PrintI64( + "dynamic_slab_madvise_failed_bytes", + dynamic_slab_info_.madvise_failed_bytes.load(std::memory_order_relaxed)); +} + +template +inline void CpuCache::AcquireInternalLocks() { + int ncpus = absl::base_internal::NumCPUs(); + for (int cpu = 0; cpu < ncpus; ++cpu) { + resize_[cpu].lock.Lock(); + } +} + +template +inline void CpuCache::ReleaseInternalLocks() { + int ncpus = absl::base_internal::NumCPUs(); + for (int cpu = 0; cpu < ncpus; ++cpu) { + resize_[cpu].lock.Unlock(); + } +} + +template +inline void CpuCache::PerClassResizeInfo::Init() { + state_.store(0, std::memory_order_relaxed); +} + +template +inline bool CpuCache::PerClassResizeInfo::Update( + bool overflow, bool grow, uint32_t* successive) { + int32_t raw = state_.load(std::memory_order_relaxed); + State state; + memcpy(&state, &raw, sizeof(state)); + const bool overflow_then_underflow = !overflow && state.overflow; + grow |= overflow_then_underflow; + // Reset quiescent ticks for Steal clock algorithm if we are going to grow. + State new_state; + new_state.overflow = overflow; + new_state.quiescent_ticks = grow ? 0 : state.quiescent_ticks; + new_state.successive = overflow == state.overflow ? state.successive + 1 : 0; + memcpy(&raw, &new_state, sizeof(raw)); + state_.store(raw, std::memory_order_relaxed); + *successive = new_state.successive; + return overflow_then_underflow; +} + +template +inline uint32_t CpuCache::PerClassResizeInfo::Tick() { + int32_t raw = state_.load(std::memory_order_relaxed); + State state; + memcpy(&state, &raw, sizeof(state)); + state.quiescent_ticks++; + memcpy(&raw, &state, sizeof(raw)); + state_.store(raw, std::memory_order_relaxed); + return state.quiescent_ticks - 1; +} + +template +inline void CpuCache::PerClassResizeInfo::RecordMiss( + PerClassMissType type) { + auto& c = misses_[type]; + c.store(c.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); +} + +template +inline size_t CpuCache::PerClassResizeInfo::GetTotalMisses( + PerClassMissType type) { + return misses_[type].load(std::memory_order_relaxed); +} + +template +inline size_t +CpuCache::PerClassResizeInfo::GetAndUpdateIntervalMisses( + PerClassMissType total_type, PerClassMissType interval_type) { + TC_ASSERT_LT(total_type, PerClassMissType::kNumTypes); + TC_ASSERT_LT(interval_type, PerClassMissType::kNumTypes); + + const size_t total_misses = + misses_[total_type].load(std::memory_order_relaxed); + const size_t interval_misses = + misses_[interval_type].load(std::memory_order_relaxed); + misses_[interval_type].store(total_misses, std::memory_order_relaxed); + // In case of a size_t overflow, we wrap around to 0. + return total_misses > interval_misses ? total_misses - interval_misses : 0; +} + +template +inline size_t CpuCache::PerClassResizeInfo::GetIntervalMisses( + PerClassMissType total_type, PerClassMissType interval_type) { + TC_ASSERT_LT(total_type, PerClassMissType::kNumTypes); + TC_ASSERT_LT(interval_type, PerClassMissType::kNumTypes); + + const size_t total_misses = + misses_[total_type].load(std::memory_order_relaxed); + const size_t interval_misses = + misses_[interval_type].load(std::memory_order_relaxed); + // In case of a size_t overflow, we wrap around to 0. + return total_misses > interval_misses ? total_misses - interval_misses : 0; +} + +template +void CpuCache::PerClassResizeInfo::UpdateIntervalMisses( + PerClassMissType total_type, PerClassMissType interval_type) { + const size_t total_misses = GetTotalMisses(total_type); + // Takes a snapshot of misses at the end of this interval so that we can + // calculate the misses that occurred in the next interval. + // + // Interval updates occur on a single thread so relaxed stores to interval + // miss stats are safe. + misses_[interval_type].store(total_misses, std::memory_order_relaxed); +} + +} // namespace cpu_cache_internal + +// Static forward declares CpuCache to avoid a cycle in headers. Make +// "CpuCache" be non-templated to avoid breaking that forward declaration. +class CpuCache final + : public cpu_cache_internal::CpuCache { +}; + +template +inline bool UsePerCpuCache(State& state) { // We expect a fast path of per-CPU caches being active and the thread being // registered with rseq. - if (ABSL_PREDICT_FALSE(!Static::CPUCacheActive())) { + if (ABSL_PREDICT_FALSE(!state.CpuCacheActive())) { return false; } @@ -374,8 +2774,8 @@ inline bool UsePerCpuCache() { // into tcmalloc. // // If the per-CPU cache for a thread is not initialized, we push ourselves - // onto the slow path (if !defined(TCMALLOC_DEPRECATED_PERTHREAD)) until this - // occurs. See fast_alloc's use of TryRecordAllocationFast. + // onto the slow path until this occurs. See fast_alloc's use of + // TryRecordAllocationFast. if (ABSL_PREDICT_TRUE(subtle::percpu::IsFast())) { ThreadCache::BecomeIdle(); return true; diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_activate_test.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_activate_test.cc new file mode 100644 index 000000000000..960e79428618 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_activate_test.cc @@ -0,0 +1,87 @@ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include // NOLINT(build/c++11) + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/random/random.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/sysinfo.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +// This test mutates global state, including triggering the activation of the +// per-CPU caches. It should not be run along side other tests in the same +// process that may rely on an isolated global instance. +TEST(CpuCacheActivateTest, GlobalInstance) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache& cache = tc_globals.cpu_cache(); + + absl::Notification done; + + std::thread t([&]() { + const int num_cpus = NumCPUs(); + absl::BitGen rng; + + while (!done.HasBeenNotified()) { + const double coin = absl::Uniform(rng, 0., 1.); + const bool ready = tc_globals.CpuCacheActive(); + + if (ready && coin < 0.25) { + const int cpu = absl::Uniform(rng, 0, num_cpus); + benchmark::DoNotOptimize(cache.UsedBytes(cpu)); + } else if (ready && coin < 0.5) { + const int cpu = absl::Uniform(rng, 0, num_cpus); + benchmark::DoNotOptimize(cache.Capacity(cpu)); + } else if (ready && coin < 0.75) { + benchmark::DoNotOptimize(cache.TotalUsedBytes()); + } else { + benchmark::DoNotOptimize(cache.CacheLimit()); + } + } + }); + + // Trigger initialization of the CpuCache, confirming it was not initialized + // at the start of the test and is afterwards. + EXPECT_FALSE(tc_globals.CpuCacheActive()); + ASSERT_NE(&TCMalloc_Internal_ForceCpuCacheActivation, nullptr); + Parameters::set_per_cpu_caches(true); + TCMalloc_Internal_ForceCpuCacheActivation(); + EXPECT_TRUE(tc_globals.CpuCacheActive()); + + absl::SleepFor(absl::Seconds(0.2)); + + done.Notify(); + t.join(); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc index fd4282b9c3c1..68eb45b67f91 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc @@ -14,121 +14,494 @@ #include "tcmalloc/cpu_cache.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include // NOLINT(build/c++11) +#include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/optimization.h" +#include "absl/random/bit_gen_ref.h" #include "absl/random/random.h" -#include "absl/random/seed_sequences.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/internal/affinity.h" +#include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" -#include "tcmalloc/internal/util.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/percpu_tcmalloc.h" +#include "tcmalloc/internal/sysinfo.h" +#include "tcmalloc/mock_transfer_cache.h" #include "tcmalloc/parameters.h" +#include "tcmalloc/size_class_info.h" +#include "tcmalloc/sizemap.h" #include "tcmalloc/static_vars.h" +#include "tcmalloc/tcmalloc_policy.h" #include "tcmalloc/testing/testutil.h" +#include "tcmalloc/testing/thread_manager.h" +#include "tcmalloc/transfer_cache.h" namespace tcmalloc { namespace tcmalloc_internal { +namespace subtle::percpu { +class TcmallocTest { + public: + static int VirtualCpuSynchronize() { return VirtualCpu::Synchronize(); } +}; +} // namespace subtle::percpu + +class CpuCachePeer { + public: + template + static uint8_t GetSlabShift(const CpuCache& cpu_cache) { + return cpu_cache.freelist_.GetShift(); + } + + template + static void IncrementCacheMisses(CpuCache& cpu_cache) { + cpu_cache.RecordCacheMissStat(/*cpu=*/0, /*is_alloc=*/true); + cpu_cache.RecordCacheMissStat(/*cpu=*/0, /*is_alloc=*/false); + } + + // Validate that we're using >90% of the available slab bytes. + template + static void ValidateSlabBytes(const CpuCache& cpu_cache) { + cpu_cache_internal::SlabShiftBounds bounds = + cpu_cache.GetPerCpuSlabShiftBounds(); + for (uint8_t shift = bounds.initial_shift; + shift <= bounds.max_shift && + shift > cpu_cache_internal::kInitialBasePerCpuShift; + ++shift) { + const auto [bytes_required, bytes_available] = + EstimateSlabBytes(cpu_cache.GetMaxCapacityFunctor(shift)); + EXPECT_GT(bytes_required * 10, bytes_available * 9) + << bytes_required << " " << bytes_available << " " << kNumaPartitions + << " " << kNumBaseClasses << " " << kNumClasses; + EXPECT_LE(bytes_required, bytes_available); + } + } + + template + static size_t ResizeInfoSize() { + return sizeof(typename CpuCache::ResizeInfo); + } +}; + namespace { -constexpr size_t kStressSlabs = 4; -void* OOMHandler(size_t) { return nullptr; } +enum class DynamicSlab { kGrow, kShrink, kNoop }; -TEST(CpuCacheTest, Metadata) { - if (!subtle::percpu::IsFast()) { - return; +class TestStaticForwarder { + public: + TestStaticForwarder() : sharded_manager_(&owner_, &cpu_layout_) { + numa_topology_.Init(); + } + + void InitializeShardedManager(int num_shards) { + cpu_layout_.Init(num_shards); + sharded_manager_.Init(); } - const int num_cpus = absl::base_internal::NumCPUs(); + static void* Alloc(size_t size, std::align_val_t alignment) { + return mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + void* AllocReportedImpending(size_t size, std::align_val_t alignment) { + arena_reported_impending_bytes_ -= static_cast(size); + return mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } - PerCPUMetadataState r = cache.MetadataMemoryUsage(); - EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift); - if (Parameters::lazy_per_cpu_caches()) { - EXPECT_EQ(r.resident_size, 0); - } else { - EXPECT_EQ(r.resident_size, r.virtual_size); + static void Dealloc(void* ptr, size_t size, std::align_val_t /*alignment*/) { + munmap(ptr, size); } - auto count_cores = [&]() { - int populated_cores = 0; - for (int i = 0; i < num_cpus; i++) { - if (cache.HasPopulated(i)) { - populated_cores++; - } + void ArenaUpdateAllocatedAndNonresident(int64_t allocated, + int64_t nonresident) { + if (allocated > 0) { + EXPECT_EQ(arena_reported_impending_bytes_, 0); + ++shrink_to_usage_limit_calls_; } - return populated_cores; - }; - EXPECT_EQ(0, count_cores()); + if (nonresident == 0) { + arena_reported_impending_bytes_ += allocated; + } else { + arena_reported_impending_bytes_ = 0; + } + arena_reported_nonresident_bytes_ += nonresident; + } - int allowed_cpu_id; - const size_t kSizeClass = 3; - const size_t num_to_move = Static::sizemap().num_objects_to_move(kSizeClass); - const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() - ? offsetof(kernel_rseq, vcpu_id) - : offsetof(kernel_rseq, cpu_id); - void* ptr; - { - // Restrict this thread to a single core while allocating and processing the - // slow path. - // - // TODO(b/151313823): Without this restriction, we may access--for reading - // only--other slabs if we end up being migrated. These may cause huge - // pages to be faulted for those cores, leading to test flakiness. - tcmalloc_internal::ScopedAffinityMask mask( - tcmalloc_internal::AllowedCpus()[0]); - allowed_cpu_id = - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); + bool per_cpu_caches_dynamic_slab_enabled() { return dynamic_slab_enabled_; } - ptr = cache.Allocate(kSizeClass); + double per_cpu_caches_dynamic_slab_grow_threshold() { + if (dynamic_slab_grow_threshold_ >= 0) return dynamic_slab_grow_threshold_; + return dynamic_slab_ == DynamicSlab::kGrow + ? -1.0 + : std::numeric_limits::max(); + } - if (mask.Tampered() || - allowed_cpu_id != - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { - return; + double per_cpu_caches_dynamic_slab_shrink_threshold() { + if (dynamic_slab_shrink_threshold_ >= 0) + return dynamic_slab_shrink_threshold_; + return dynamic_slab_ == DynamicSlab::kShrink + ? std::numeric_limits::max() + : -1.0; + } + + bool reuse_size_classes() const { return true; } + + size_t class_to_size(int size_class) const { + if (size_map_.has_value()) { + return size_map_->class_to_size(size_class); + } else { + return transfer_cache_.class_to_size(size_class); + } + } + + absl::Span cold_size_classes() const { + if (size_map_.has_value()) { + return size_map_->ColdSizeClasses(); + } else { + return {}; + } + } + + size_t num_objects_to_move(int size_class) const { + if (size_map_.has_value()) { + return size_map_->num_objects_to_move(size_class); + } else { + return transfer_cache_.num_objects_to_move(size_class); } } - EXPECT_NE(ptr, nullptr); - EXPECT_EQ(1, count_cores()); - r = cache.MetadataMemoryUsage(); - EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift); - if (Parameters::lazy_per_cpu_caches()) { + const NumaTopology& numa_topology() const { + return numa_topology_; + } + + using ShardedManager = + ShardedTransferCacheManagerBase; + + ShardedManager& sharded_transfer_cache() { return sharded_manager_; } + + const ShardedManager& sharded_transfer_cache() const { + return sharded_manager_; + } + + TwoSizeClassManager& + transfer_cache() { + return transfer_cache_; + } + + bool UseGenericShardedCache() const { return owner_.UseGenericCache(); } + void SetGenericShardedCache(bool value) { owner_.SetGenericCache(value); } + bool UseShardedCacheForLargeClassesOnly() const { + return owner_.EnableCacheForLargeClassesOnly(); + } + void SetShardedCacheForLargeClassesOnly(bool value) { + owner_.SetCacheForLargeClassesOnly(value); + } + + bool HaveHooks() const { + // TODO(b/242550501): Test other states. + return false; + } + + size_t arena_reported_nonresident_bytes_ = 0; + int64_t arena_reported_impending_bytes_ = 0; + size_t shrink_to_usage_limit_calls_ = 0; + bool dynamic_slab_enabled_ = false; + double dynamic_slab_grow_threshold_ = -1; + double dynamic_slab_shrink_threshold_ = -1; + DynamicSlab dynamic_slab_ = DynamicSlab::kNoop; + std::optional size_map_; + + private: + NumaTopology numa_topology_; + FakeShardedTransferCacheManager owner_; + FakeCpuLayout cpu_layout_; + ShardedManager sharded_manager_; + TwoSizeClassManager + transfer_cache_; +}; + +using CpuCache = cpu_cache_internal::CpuCache; +using MissCount = CpuCache::MissCount; +using PerClassMissType = CpuCache::PerClassMissType; + +TEST(CpuCacheTest, MinimumShardsForGenericCache) { + if (!subtle::percpu::IsFast()) { + return; + } + CpuCache cache; + cache.Activate(); + + using ShardedManager = TestStaticForwarder::ShardedManager; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.SetShardedCacheForLargeClassesOnly(false); + forwarder.SetGenericShardedCache(true); + + ShardedManager& sharded_transfer_cache = forwarder.sharded_transfer_cache(); + constexpr int kNumShards = ShardedManager::kMinShardsAllowed - 1; + TC_ASSERT_GT(kNumShards, 0); + forwarder.InitializeShardedManager(kNumShards); + + constexpr int kCpuId = 0; + ScopedFakeCpuId fake_cpu_id(kCpuId); + EXPECT_FALSE(sharded_transfer_cache.shard_initialized(0)); + EXPECT_EQ(sharded_transfer_cache.NumActiveShards(), 0); + EXPECT_EQ(forwarder.transfer_cache().tc_length(kSizeClass), 0); + + constexpr size_t kSizeClass = 1; + const size_t num_to_move = cache.forwarder().num_objects_to_move(kSizeClass); + + // Allocate an object. As we are using less than kMinShardsAllowed number of + // shards, we should bypass sharded transfer cache entirely. + void* ptr = cache.Allocate(kSizeClass); + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + EXPECT_FALSE(sharded_transfer_cache.should_use(size_class)); + EXPECT_EQ(sharded_transfer_cache.GetStats(size_class).capacity, 0); + EXPECT_EQ(sharded_transfer_cache.GetStats(size_class).max_capacity, 0); + } + // No requests are sent to sharded transfer cache. So, it should stay + // uninitialized. + EXPECT_EQ(sharded_transfer_cache.tc_length(kCpuId, kSizeClass), 0); + EXPECT_FALSE(sharded_transfer_cache.shard_initialized(0)); + EXPECT_EQ(sharded_transfer_cache.NumActiveShards(), 0); + EXPECT_EQ(forwarder.transfer_cache().tc_length(kSizeClass), 0); + + cache.Deallocate(ptr, kSizeClass); + cache.Reclaim(0); + EXPECT_EQ(sharded_transfer_cache.tc_length(kCpuId, kSizeClass), 0); + EXPECT_FALSE(sharded_transfer_cache.shard_initialized(0)); + EXPECT_EQ(sharded_transfer_cache.NumActiveShards(), 0); + // We should deallocate directly to the LIFO transfer cache. + EXPECT_EQ(forwarder.transfer_cache().tc_length(kSizeClass), + num_to_move / 2 + 1); +} + +TEST(CpuCacheTest, UsesShardedAsBackingCache) { + if (!subtle::percpu::IsFast()) { + return; + } + CpuCache cache; + cache.Activate(); + + using ShardedManager = TestStaticForwarder::ShardedManager; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.SetShardedCacheForLargeClassesOnly(false); + forwarder.SetGenericShardedCache(true); + + ShardedManager& sharded_transfer_cache = forwarder.sharded_transfer_cache(); + constexpr int kNumShards = ShardedManager::kMinShardsAllowed; + TC_ASSERT_GT(kNumShards, 0); + forwarder.InitializeShardedManager(kNumShards); + + ScopedFakeCpuId fake_cpu_id(0); + EXPECT_FALSE(sharded_transfer_cache.shard_initialized(0)); + EXPECT_EQ(sharded_transfer_cache.NumActiveShards(), 0); + + constexpr size_t kSizeClass = 1; + TransferCacheStats sharded_stats = + sharded_transfer_cache.GetStats(kSizeClass); + EXPECT_EQ(sharded_stats.remove_hits, 0); + EXPECT_EQ(sharded_stats.remove_misses, 0); + EXPECT_EQ(sharded_stats.insert_hits, 0); + EXPECT_EQ(sharded_stats.insert_misses, 0); + + // Allocate an object and make sure that we allocate from the sharded transfer + // cache and that the sharded cache has been initialized. + void* ptr = cache.Allocate(kSizeClass); + sharded_stats = sharded_transfer_cache.GetStats(kSizeClass); + EXPECT_EQ(sharded_stats.remove_hits, 0); + EXPECT_EQ(sharded_stats.remove_misses, 1); + EXPECT_EQ(sharded_stats.insert_hits, 0); + EXPECT_EQ(sharded_stats.insert_misses, 0); + EXPECT_TRUE(sharded_transfer_cache.shard_initialized(0)); + EXPECT_EQ(sharded_transfer_cache.NumActiveShards(), 1); + + // Free objects to confirm that they are indeed released back to the sharded + // transfer cache. + cache.Deallocate(ptr, kSizeClass); + cache.Reclaim(0); + sharded_stats = sharded_transfer_cache.GetStats(kSizeClass); + EXPECT_EQ(sharded_stats.insert_hits, 1); + EXPECT_EQ(sharded_stats.insert_misses, 0); + + // Ensure that we never use legacy transfer cache by checking that hits and + // misses are zero. + TransferCacheStats tc_stats = forwarder.transfer_cache().GetStats(kSizeClass); + EXPECT_EQ(tc_stats.remove_hits, 0); + EXPECT_EQ(tc_stats.remove_misses, 0); + EXPECT_EQ(tc_stats.insert_hits, 0); + EXPECT_EQ(tc_stats.insert_misses, 0); + forwarder.SetGenericShardedCache(false); + cache.Deactivate(); +} + +TEST(CpuCacheTest, ResizeInfoNoFalseSharing) { + const size_t resize_info_size = CpuCachePeer::ResizeInfoSize(); + EXPECT_EQ(resize_info_size % ABSL_CACHELINE_SIZE, 0) << resize_info_size; +} + +TEST(CpuCacheTest, Metadata) { + if (!subtle::percpu::IsFast()) { + return; + } + + const int num_cpus = NumCPUs(); + + const int kAttempts = 3; + for (int attempt = 1; attempt <= kAttempts; attempt++) { + SCOPED_TRACE(absl::StrCat("attempt=", attempt)); + + CpuCache cache; + cache.Activate(); + + cpu_cache_internal::SlabShiftBounds shift_bounds = + cache.GetPerCpuSlabShiftBounds(); + + PerCPUMetadataState r = cache.MetadataMemoryUsage(); + size_t slabs_size = subtle::percpu::GetSlabsAllocSize( + subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus); + size_t resize_size = num_cpus * sizeof(bool); + size_t begins_size = kNumClasses * sizeof(std::atomic); + EXPECT_EQ(r.virtual_size, slabs_size + resize_size + begins_size); + EXPECT_EQ(r.resident_size, 0); + + auto count_cores = [&]() { + int populated_cores = 0; + for (int i = 0; i < num_cpus; i++) { + if (cache.HasPopulated(i)) { + populated_cores++; + } + } + return populated_cores; + }; + + EXPECT_EQ(0, count_cores()); + + int allowed_cpu_id; + const size_t kSizeClass = 2; + const size_t num_to_move = + cache.forwarder().num_objects_to_move(kSizeClass); + + TransferCacheStats tc_stats = + cache.forwarder().transfer_cache().GetStats(kSizeClass); + EXPECT_EQ(tc_stats.remove_hits, 0); + EXPECT_EQ(tc_stats.remove_misses, 0); + EXPECT_EQ(tc_stats.remove_object_misses, 0); + EXPECT_EQ(tc_stats.insert_hits, 0); + EXPECT_EQ(tc_stats.insert_misses, 0); + EXPECT_EQ(tc_stats.insert_object_misses, 0); + + void* ptr; + { + // Restrict this thread to a single core while allocating and processing + // the slow path. + // + // TODO(b/151313823): Without this restriction, we may access--for + // reading only--other slabs if we end up being migrated. These may cause + // huge pages to be faulted for those cores, leading to test flakiness. + tcmalloc_internal::ScopedAffinityMask mask( + tcmalloc_internal::AllowedCpus()[0]); + allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize(); + + ptr = cache.Allocate(kSizeClass); + + if (mask.Tampered() || + allowed_cpu_id != + subtle::percpu::TcmallocTest::VirtualCpuSynchronize()) { + return; + } + } + EXPECT_NE(ptr, nullptr); + EXPECT_EQ(1, count_cores()); + + // We don't care if the transfer cache hit or missed, but the CPU cache + // should have done the operation. + tc_stats = cache.forwarder().transfer_cache().GetStats(kSizeClass); + if ((tc_stats.remove_object_misses != num_to_move || + tc_stats.insert_hits + tc_stats.insert_misses != 0) && + attempt < kAttempts) { + // The operation didn't occur as expected, likely because we were + // preempted but returned to the same core (otherwise Tampered would have + // fired). + // + // The MSB of tcmalloc_slabs should be cleared to indicate we were + // preempted. As of December 2024, Refill and its callees do not invoke + // CacheCpuSlab. This check can spuriously pass if we're preempted + // between the end of Allocate and now, rather than within Allocate, but + // it ensures we do not silently break. +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + EXPECT_EQ(subtle::percpu::tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK, 0); +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + + cache.Deallocate(ptr, kSizeClass); + cache.Deactivate(); + + continue; + } + + EXPECT_EQ(tc_stats.remove_hits + tc_stats.remove_misses, 1); + EXPECT_EQ(tc_stats.remove_object_misses, num_to_move); + EXPECT_EQ(tc_stats.insert_hits, 0); + EXPECT_EQ(tc_stats.insert_misses, 0); + EXPECT_EQ(tc_stats.insert_object_misses, 0); + + r = cache.MetadataMemoryUsage(); + EXPECT_EQ( + r.virtual_size, + resize_size + begins_size + + subtle::percpu::GetSlabsAllocSize( + subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus)); + // We expect to fault in a single core, but we may end up faulting an - // entire hugepage worth of memory + // entire hugepage worth of memory when we touch that core and another when + // touching the header. const size_t core_slab_size = r.virtual_size / num_cpus; const size_t upper_bound = - ((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1)); + ((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1)) + + kHugePageSize; // A single core may be less than the full slab (core_slab_size), since we // do not touch every page within the slab. EXPECT_GT(r.resident_size, 0); - EXPECT_LE(r.resident_size, upper_bound) << count_cores(); + EXPECT_LE(r.resident_size, upper_bound) + << count_cores() << " " << core_slab_size << " " << kHugePageSize; // This test is much more sensitive to implementation details of the per-CPU // cache. It may need to be updated from time to time. These numbers were // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and // measuring the resident size. - // - // TODO(ckennelly): Allow CPUCache::Activate to accept a specific arena - // allocator, so we can MADV_NOHUGEPAGE the backing store in testing for - // more precise measurements. - switch (CPUCache::kPerCpuShift) { - case 12: + switch (shift_bounds.max_shift) { + case 13: EXPECT_GE(r.resident_size, 4096); break; - case 18: - EXPECT_GE(r.resident_size, 110592); + case 19: + EXPECT_GE(r.resident_size, 8192); break; default: ASSUME(false); break; - }; + } // Read stats from the CPU caches. This should not impact resident_size. const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size(); @@ -151,12 +524,12 @@ TEST(CpuCacheTest, Metadata) { cache.Capacity(cpu)); } - for (int cl = 0; cl < kNumClasses; ++cl) { - // This is sensitive to the current growth policies of CPUCache. It may + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + // This is sensitive to the current growth policies of CpuCache. It may // require updating from time-to-time. - EXPECT_EQ(cache.TotalObjectsOfClass(cl), - (cl == kSizeClass ? num_to_move - 1 : 0)) - << cl; + EXPECT_EQ(cache.TotalObjectsOfClass(size_class), + (size_class == kSizeClass ? num_to_move - 1 : 0)) + << size_class; } EXPECT_EQ(cache.TotalUsedBytes(), total_used_bytes); @@ -166,17 +539,11 @@ TEST(CpuCacheTest, Metadata) { EXPECT_LE(post_stats.resident_size, upper_bound) << count_cores(); // Confirm stats are unchanged. EXPECT_EQ(r.resident_size, post_stats.resident_size); - } else { - EXPECT_EQ(r.resident_size, r.virtual_size); - } - - // Tear down. - // - // TODO(ckennelly): We're interacting with the real TransferCache. - cache.Deallocate(ptr, kSizeClass); - for (int i = 0; i < num_cpus; i++) { - cache.Reclaim(i); + // Tear down. + cache.Deallocate(ptr, kSizeClass); + cache.Deactivate(); + break; } } @@ -185,30 +552,25 @@ TEST(CpuCacheTest, CacheMissStats) { return; } - const int num_cpus = absl::base_internal::NumCPUs(); + const int num_cpus = NumCPUs(); - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + CpuCache cache; + cache.Activate(); // The number of underflows and overflows must be zero for all the caches. for (int cpu = 0; cpu < num_cpus; ++cpu) { - CPUCache::CpuCacheMissStats total_misses = + CpuCache::CpuCacheMissStats total_misses = cache.GetTotalCacheMissStats(cpu); - CPUCache::CpuCacheMissStats interval_misses = - cache.GetIntervalCacheMissStats(cpu); + CpuCache::CpuCacheMissStats shuffle_misses = + cache.GetIntervalCacheMissStats(cpu, MissCount::kShuffle); EXPECT_EQ(total_misses.underflows, 0); EXPECT_EQ(total_misses.overflows, 0); - EXPECT_EQ(interval_misses.underflows, 0); - EXPECT_EQ(interval_misses.overflows, 0); + EXPECT_EQ(shuffle_misses.underflows, 0); + EXPECT_EQ(shuffle_misses.overflows, 0); } int allowed_cpu_id; - const size_t kSizeClass = 3; - const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() - ? offsetof(kernel_rseq, vcpu_id) - : offsetof(kernel_rseq, cpu_id); + const size_t kSizeClass = 2; void* ptr; { // Restrict this thread to a single core while allocating and processing the @@ -219,73 +581,79 @@ TEST(CpuCacheTest, CacheMissStats) { // pages to be faulted for those cores, leading to test flakiness. tcmalloc_internal::ScopedAffinityMask mask( tcmalloc_internal::AllowedCpus()[0]); - allowed_cpu_id = - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); + allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize(); - ptr = cache.Allocate(kSizeClass); + ptr = cache.Allocate(kSizeClass); if (mask.Tampered() || allowed_cpu_id != - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { + subtle::percpu::TcmallocTest::VirtualCpuSynchronize()) { return; } } for (int cpu = 0; cpu < num_cpus; ++cpu) { - CPUCache::CpuCacheMissStats total_misses = + CpuCache::CpuCacheMissStats total_misses = cache.GetTotalCacheMissStats(cpu); - CPUCache::CpuCacheMissStats interval_misses = - cache.GetIntervalCacheMissStats(cpu); + CpuCache::CpuCacheMissStats shuffle_misses = + cache.GetIntervalCacheMissStats(cpu, MissCount::kShuffle); if (cpu == allowed_cpu_id) { EXPECT_EQ(total_misses.underflows, 1); - EXPECT_EQ(interval_misses.underflows, 1); + EXPECT_EQ(shuffle_misses.underflows, 1); } else { EXPECT_EQ(total_misses.underflows, 0); - EXPECT_EQ(interval_misses.underflows, 0); + EXPECT_EQ(shuffle_misses.underflows, 0); } EXPECT_EQ(total_misses.overflows, 0); - EXPECT_EQ(interval_misses.overflows, 0); + EXPECT_EQ(shuffle_misses.overflows, 0); } // Tear down. - // - // TODO(ckennelly): We're interacting with the real TransferCache. cache.Deallocate(ptr, kSizeClass); + cache.Deactivate(); +} - for (int i = 0; i < num_cpus; i++) { - cache.Reclaim(i); +static void ResizeSizeClasses(CpuCache& cache, const std::atomic& stop) { + if (!subtle::percpu::IsFast()) { + return; + } + + // Wake up every 10ms to resize size classes. Let miss stats acummulate over + // those 10ms. + while (!stop.load(std::memory_order_acquire)) { + cache.ResizeSizeClasses(); + absl::SleepFor(absl::Milliseconds(10)); } } -static void ShuffleThread(const std::atomic& stop) { +static void ShuffleThread(CpuCache& cache, const std::atomic& stop) { if (!subtle::percpu::IsFast()) { return; } - CPUCache& cache = Static::cpu_cache(); // Wake up every 10ms to shuffle the caches so that we can allow misses to // accumulate during that interval - while (!stop) { + while (!stop.load(std::memory_order_acquire)) { cache.ShuffleCpuCaches(); absl::SleepFor(absl::Milliseconds(10)); } } -static void StressThread(size_t thread_id, const std::atomic& stop) { +static void StressThread(CpuCache& cache, size_t thread_id, + const std::atomic& stop) { if (!subtle::percpu::IsFast()) { return; } - CPUCache& cache = Static::cpu_cache(); std::vector> blocks; - absl::BitGen rnd; - while (!stop) { + absl::InsecureBitGen rnd; + while (!stop.load(std::memory_order_acquire)) { const int what = absl::Uniform(rnd, 0, 2); if (what) { // Allocate an object for a class - size_t cl = absl::Uniform(rnd, 1, kStressSlabs + 1); - void* ptr = cache.Allocate(cl); - blocks.emplace_back(std::make_pair(cl, ptr)); + size_t size_class = absl::Uniform(rnd, 1, 3); + void* ptr = cache.Allocate(size_class); + blocks.emplace_back(std::make_pair(size_class, ptr)); } else { // Deallocate an object for a class if (!blocks.empty()) { @@ -301,25 +669,247 @@ static void StressThread(size_t thread_id, const std::atomic& stop) { } } +void AllocateThenDeallocate(CpuCache& cache, int cpu, size_t size_class, + int ops) { + std::vector objects; + ScopedFakeCpuId fake_cpu_id(cpu); + for (int i = 0; i < ops; ++i) { + void* ptr = cache.Allocate(size_class); + objects.push_back(ptr); + } + for (auto* ptr : objects) { + cache.Deallocate(ptr, size_class); + } + objects.clear(); +} + +// In this test, we check if we can resize size classes based on the number of +// misses they encounter. First, we exhaust cache capacity by filling up +// larger size class as much as possible. Then, we try to allocate objects for +// the smaller size class. This should result in misses as we do not resize its +// capacity in the foreground when the feature is enabled. We confirm that it +// indeed encounters a capacity miss. We then resize size classes and allocate +// small size class objects again. We should be able to utilize an increased +// capacity for the size class to allocate and deallocate these objects. We also +// confirm that we do not lose the overall cpu cache capacity when we resize +// size class capacities. +TEST(CpuCacheTest, ResizeMaxCapacityTest) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache cache; + // Increase cache capacity so that we can exhaust max capacity for the size + // class before hitting the maximum cache limit. + const size_t max_cpu_cache_size = 128 << 10 << 10; + cache.SetCacheLimit(max_cpu_cache_size); + cache.Activate(); + + // Temporarily fake being on the given CPU. + constexpr int kCpuId = 0; + constexpr int kCpuId1 = 1; + + constexpr int kLargeClass = 2; + constexpr int kGrowthFactor = 5; + const int base_max_capacity = + cache.GetMaxCapacity(kLargeClass, CpuCachePeer::GetSlabShift(cache)); + + const size_t large_class_size = cache.forwarder().class_to_size(kLargeClass); + ASSERT_LT(large_class_size * base_max_capacity, cache.CacheLimit()); + + const size_t batch_size_large = + cache.forwarder().num_objects_to_move(kLargeClass); + + size_t ops = 0; + while (true) { + // We allocate and deallocate additional batch_size number of objects each + // time so that cpu cache suffers successive underflow and overflow, and it + // can grow. + ops += batch_size_large; + AllocateThenDeallocate(cache, kCpuId, kLargeClass, ops); + if (cache.GetCapacityOfSizeClass(kCpuId, kLargeClass) == + base_max_capacity) { + break; + } + } + + size_t interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kLargeClass, PerClassMissType::kMaxCapacityTotal, + PerClassMissType::kMaxCapacityResize); + EXPECT_GT(interval_misses, 0); + EXPECT_EQ(cache.GetCapacityOfSizeClass(kCpuId, kLargeClass), + base_max_capacity); + + AllocateThenDeallocate(cache, kCpuId, kLargeClass, ops); + EXPECT_GT(cache.GetIntervalSizeClassMisses( + kCpuId, kLargeClass, PerClassMissType::kMaxCapacityTotal, + PerClassMissType::kMaxCapacityResize), + 0); + + { + ScopedFakeCpuId fake_cpu_id_1(kCpuId1); + cache.ResizeSizeClassMaxCapacities(); + } + + const int resized_max_capacity = + cache.GetMaxCapacity(kLargeClass, CpuCachePeer::GetSlabShift(cache)); + EXPECT_EQ(resized_max_capacity, + base_max_capacity + kGrowthFactor * batch_size_large); + + interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kLargeClass, PerClassMissType::kMaxCapacityTotal, + PerClassMissType::kMaxCapacityResize); + EXPECT_EQ(interval_misses, 0); + + ops = 0; + while (true) { + // We allocate and deallocate additional batch_size number of objects each + // time so that cpu cache suffers successive underflow and overflow, and it + // can grow. + ops += batch_size_large; + AllocateThenDeallocate(cache, kCpuId, kLargeClass, ops); + if (cache.GetCapacityOfSizeClass(kCpuId, kLargeClass) == + base_max_capacity) { + break; + } + } + for (int i = 0; i < kGrowthFactor; ++i) { + ops += batch_size_large; + AllocateThenDeallocate(cache, kCpuId, kLargeClass, ops); + } + EXPECT_EQ(cache.GetCapacityOfSizeClass(kCpuId, kLargeClass), + resized_max_capacity); + + // Reclaim caches. + cache.Deactivate(); +} + +static void ResizeMaxCapacities(CpuCache& cache, + const std::atomic& stop) { + if (!subtle::percpu::IsFast()) { + return; + } + + // Wake up every 10ms to resize size classes. Let miss stats acummulate over + // those 10ms. + while (!stop.load(std::memory_order_acquire)) { + cache.ResizeSizeClassMaxCapacities(); + absl::SleepFor(absl::Milliseconds(10)); + } +} + +TEST(CpuCacheTest, StressMaxCapacityResize) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache cache; + cache.Activate(); + + std::vector threads; + std::thread resize_thread; + const int n_threads = NumCPUs(); + std::atomic stop(false); + + size_t old_max_capacity = 0; + size_t new_max_capacity = 0; + for (int size_class = 0; size_class < kNumClasses; ++size_class) { + old_max_capacity += + cache.GetMaxCapacity(size_class, CpuCachePeer::GetSlabShift(cache)); + } + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + resize_thread = + std::thread(ResizeMaxCapacities, std::ref(cache), std::ref(stop)); + + absl::SleepFor(absl::Seconds(10)); + stop = true; + for (auto& t : threads) { + t.join(); + } + resize_thread.join(); + + // Check that the total capacity is preserved after the stress test. + size_t capacity = 0; + const int num_cpus = NumCPUs(); + const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), + cache.Capacity(cpu)); + capacity += cache.Capacity(cpu); + } + for (int size_class = 0; size_class < kNumClasses; ++size_class) { + new_max_capacity += + cache.GetMaxCapacity(size_class, CpuCachePeer::GetSlabShift(cache)); + } + EXPECT_EQ(new_max_capacity, old_max_capacity); + + EXPECT_EQ(capacity, kTotalCapacity); + cache.Deactivate(); +} + +TEST(CpuCacheTest, StressSizeClassResize) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache cache; + cache.Activate(); + + std::vector threads; + std::thread resize_thread; + const int n_threads = NumCPUs(); + std::atomic stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + resize_thread = + std::thread(ResizeSizeClasses, std::ref(cache), std::ref(stop)); + + absl::SleepFor(absl::Seconds(5)); + stop = true; + for (auto& t : threads) { + t.join(); + } + resize_thread.join(); + + // Check that the total capacity is preserved after the stress test. + size_t capacity = 0; + const int num_cpus = NumCPUs(); + const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), + cache.Capacity(cpu)); + capacity += cache.Capacity(cpu); + } + EXPECT_EQ(capacity, kTotalCapacity); + + cache.Deactivate(); +} + TEST(CpuCacheTest, StealCpuCache) { if (!subtle::percpu::IsFast()) { return; } - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + CpuCache cache; + cache.Activate(); std::vector threads; std::thread shuffle_thread; - const int n_threads = absl::base_internal::NumCPUs(); + const int n_threads = NumCPUs(); std::atomic stop(false); for (size_t t = 0; t < n_threads; ++t) { - threads.push_back(std::thread(StressThread, t, std::ref(stop))); + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); } - shuffle_thread = std::thread(ShuffleThread, std::ref(stop)); + shuffle_thread = std::thread(ShuffleThread, std::ref(cache), std::ref(stop)); absl::SleepFor(absl::Seconds(5)); stop = true; @@ -330,7 +920,7 @@ TEST(CpuCacheTest, StealCpuCache) { // Check that the total capacity is preserved after the shuffle. size_t capacity = 0; - const int num_cpus = absl::base_internal::NumCPUs(); + const int num_cpus = NumCPUs(); const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size(); for (int cpu = 0; cpu < num_cpus; ++cpu) { EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), @@ -339,50 +929,225 @@ TEST(CpuCacheTest, StealCpuCache) { } EXPECT_EQ(capacity, kTotalCapacity); - for (int cpu = 0; cpu < num_cpus; ++cpu) { - cache.Reclaim(cpu); + cache.Deactivate(); +} + +// Test that when dynamic slab is enabled, nothing goes horribly wrong and that +// arena non-resident bytes increases as expected. +TEST(CpuCacheTest, DynamicSlab) { + if (!subtle::percpu::IsFast()) { + return; + } + CpuCache cache; + TestStaticForwarder& forwarder = cache.forwarder(); + + size_t prev_reported_nonresident_bytes = + forwarder.arena_reported_nonresident_bytes_; + EXPECT_EQ(forwarder.arena_reported_impending_bytes_, 0); + size_t prev_shrink_to_usage_limit_calls = + forwarder.shrink_to_usage_limit_calls_; + forwarder.dynamic_slab_enabled_ = true; + forwarder.dynamic_slab_ = DynamicSlab::kNoop; + + cache.Activate(); + + std::vector threads; + const int n_threads = NumCPUs(); + std::atomic stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + + cpu_cache_internal::SlabShiftBounds shift_bounds = + cache.GetPerCpuSlabShiftBounds(); + int shift = shift_bounds.initial_shift; + + const auto repeat_dynamic_slab_ops = [&](DynamicSlab op, int shift_update, + int end_shift) { + const DynamicSlab ops[2] = {DynamicSlab::kNoop, op}; + int iters = end_shift > shift ? end_shift - shift : shift - end_shift; + iters += 2; // Test that we don't resize past end_shift. + for (int i = 0; i < iters; ++i) { + for (DynamicSlab dynamic_slab : ops) { + EXPECT_EQ(shift, CpuCachePeer::GetSlabShift(cache)); + absl::SleepFor(absl::Milliseconds(100)); + forwarder.dynamic_slab_ = dynamic_slab; + // If there were no misses in the current resize interval, then we may + // not resize so we ensure non-zero misses. + CpuCachePeer::IncrementCacheMisses(cache); + cache.ResizeSlabIfNeeded(); + if (dynamic_slab != DynamicSlab::kNoop && shift != end_shift) { + EXPECT_LT(prev_reported_nonresident_bytes, + forwarder.arena_reported_nonresident_bytes_); + EXPECT_EQ(forwarder.shrink_to_usage_limit_calls_, + 1 + prev_shrink_to_usage_limit_calls); + shift += shift_update; + } else { + EXPECT_EQ(prev_reported_nonresident_bytes, + forwarder.arena_reported_nonresident_bytes_); + } + prev_reported_nonresident_bytes = + forwarder.arena_reported_nonresident_bytes_; + + EXPECT_EQ(forwarder.arena_reported_impending_bytes_, 0); + prev_shrink_to_usage_limit_calls = + forwarder.shrink_to_usage_limit_calls_; + } + } + }; + + // First grow the slab to max size, then shrink it to min size. + repeat_dynamic_slab_ops(DynamicSlab::kGrow, /*shift_update=*/1, + shift_bounds.max_shift); + repeat_dynamic_slab_ops(DynamicSlab::kShrink, /*shift_update=*/-1, + shift_bounds.initial_shift); + + stop = true; + for (auto& t : threads) { + t.join(); + } + + cache.Deactivate(); +} + +// In this test, we check if we can resize size classes based on the number of +// misses they encounter. First, we exhaust cache capacity by filling up +// larger size class as much as possible. Then, we try to allocate objects for +// the smaller size class. This should result in misses as we do not resize its +// capacity in the foreground when the feature is enabled. We confirm that it +// indeed encounters a capacity miss. When then resize size classes and allocate +// small size class objects again. We should be able to utilize an increased +// capacity for the size class to allocate and deallocate these objects. We also +// confirm that we do not lose the overall cpu cache capacity when we resize +// size class capacities. +TEST(CpuCacheTest, ResizeSizeClassesTest) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache cache; + // Reduce cache capacity so that it will see need in stealing and rebalancing. + const size_t max_cpu_cache_size = 128 << 10; + cache.SetCacheLimit(max_cpu_cache_size); + cache.Activate(); + + // Temporarily fake being on the given CPU. + constexpr int kCpuId = 0; + constexpr int kCpuId1 = 1; + + constexpr int kSmallClass = 1; + constexpr int kLargeClass = 2; + constexpr int kMaxCapacity = 2048; + + const size_t large_class_size = cache.forwarder().class_to_size(kLargeClass); + ASSERT_GT(large_class_size * kMaxCapacity, max_cpu_cache_size); + + const size_t batch_size_small = + cache.forwarder().num_objects_to_move(kSmallClass); + const size_t batch_size_large = + cache.forwarder().num_objects_to_move(kLargeClass); + + size_t ops = 0; + while (true) { + // We allocate and deallocate additional batch_size number of objects each + // time so that cpu cache suffers successive underflow and overflow, and it + // can grow. + ops += batch_size_large; + if (ops > kMaxCapacity || cache.Allocated(kCpuId) == max_cpu_cache_size) + break; + + AllocateThenDeallocate(cache, kCpuId, kLargeClass, ops); + } + + EXPECT_EQ(cache.Unallocated(kCpuId), 0); + EXPECT_EQ(cache.Allocated(kCpuId), max_cpu_cache_size); + EXPECT_EQ(cache.TotalObjectsOfClass(kSmallClass), 0); + + size_t interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kSmallClass, PerClassMissType::kCapacityTotal, + PerClassMissType::kCapacityResize); + EXPECT_EQ(interval_misses, 0); + + AllocateThenDeallocate(cache, kCpuId, kSmallClass, batch_size_small); + + interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kSmallClass, PerClassMissType::kCapacityTotal, + PerClassMissType::kCapacityResize); + EXPECT_EQ(interval_misses, 2 * batch_size_small); + + EXPECT_EQ(cache.Unallocated(kCpuId), 0); + EXPECT_EQ(cache.Allocated(kCpuId), max_cpu_cache_size); + EXPECT_EQ(cache.TotalObjectsOfClass(kSmallClass), 0); + + const int num_resizes = NumCPUs() / CpuCache::kNumCpuCachesToResize; + { + ScopedFakeCpuId fake_cpu_id_1(kCpuId1); + for (int i = 0; i < num_resizes; ++i) { + cache.ResizeSizeClasses(); + } } + + // Since we just resized size classes, we started a new interval. So, miss + // this interval should be zero. + interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kSmallClass, PerClassMissType::kCapacityTotal, + PerClassMissType::kCapacityResize); + EXPECT_EQ(interval_misses, 0); + + EXPECT_EQ(cache.Unallocated(kCpuId), 0); + EXPECT_EQ(cache.Allocated(kCpuId), max_cpu_cache_size); + + AllocateThenDeallocate(cache, kCpuId, kSmallClass, batch_size_small); + interval_misses = cache.GetIntervalSizeClassMisses( + kCpuId, kSmallClass, PerClassMissType::kCapacityTotal, + PerClassMissType::kCapacityResize); + // Given all objects are allocated, cpu cache will still try to grow the + // capacity on an underflow and record one miss. + EXPECT_EQ(interval_misses, 1); + + EXPECT_EQ(cache.Unallocated(kCpuId), 0); + EXPECT_EQ(cache.Allocated(kCpuId), max_cpu_cache_size); + EXPECT_EQ(cache.TotalObjectsOfClass(kSmallClass), batch_size_small); + + // Reclaim caches. + cache.Deactivate(); } // Runs a single allocate and deallocate operation to warm up the cache. Once a // few objects are allocated in the cold cache, we can shuffle cpu caches to // steal that capacity from the cold cache to the hot cache. -static void ColdCacheOperations(int cpu_id, size_t size_class) { +static void ColdCacheOperations(CpuCache& cache, int cpu_id, + size_t size_class) { // Temporarily fake being on the given CPU. ScopedFakeCpuId fake_cpu_id(cpu_id); - - CPUCache& cache = Static::cpu_cache(); -#if TCMALLOC_PERCPU_USE_RSEQ - if (subtle::percpu::UsingFlatVirtualCpus()) { - subtle::percpu::__rseq_abi.vcpu_id = cpu_id; - } -#endif - - void* ptr = cache.Allocate(size_class); + void* ptr = cache.Allocate(size_class); cache.Deallocate(ptr, size_class); } // Runs multiple allocate and deallocate operation on the cpu cache to collect // misses. Once we collect enough misses on this cache, we can shuffle cpu // caches to steal capacity from colder caches to the hot cache. -static void HotCacheOperations(int cpu_id) { +static void HotCacheOperations(CpuCache& cache, int cpu_id) { + constexpr size_t kPtrs = 4096; + std::vector ptrs; + ptrs.resize(kPtrs); + // Temporarily fake being on the given CPU. ScopedFakeCpuId fake_cpu_id(cpu_id); - CPUCache& cache = Static::cpu_cache(); -#if TCMALLOC_PERCPU_USE_RSEQ - if (subtle::percpu::UsingFlatVirtualCpus()) { - subtle::percpu::__rseq_abi.vcpu_id = cpu_id; - } -#endif - // Allocate and deallocate objects to make sure we have enough misses on the // cache. This will make sure we have sufficient disparity in misses between // the hotter and colder cache, and that we may be able to steal bytes from // the colder cache. - for (size_t cl = 1; cl <= kStressSlabs; ++cl) { - void* ptr = cache.Allocate(cl); - cache.Deallocate(ptr, cl); + for (size_t size_class = 1; size_class <= 2; ++size_class) { + for (auto& ptr : ptrs) { + ptr = cache.Allocate(size_class); + } + for (void* ptr : ptrs) { + cache.Deallocate(ptr, size_class); + } } // We reclaim the cache to reset it so that we record underflows/overflows the @@ -391,39 +1156,251 @@ static void HotCacheOperations(int cpu_id) { cache.Reclaim(cpu_id); } +// Test that we are complying with the threshold when we grow the slab. +// When wider slab is enabled, we check if overflow/underflow ratio is above the +// threshold for individual cpu caches. +TEST(CpuCacheTest, DynamicSlabThreshold) { + if (!subtle::percpu::IsFast()) { + return; + } + + constexpr double kDynamicSlabGrowThreshold = 0.9; + CpuCache cache; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.dynamic_slab_enabled_ = true; + forwarder.dynamic_slab_grow_threshold_ = kDynamicSlabGrowThreshold; + SizeMap size_map; + size_map.Init(size_map.CurrentClasses().classes); + forwarder.size_map_ = size_map; + + cache.Activate(); + + constexpr int kCpuId0 = 0; + constexpr int kCpuId1 = 1; + + // Accumulate overflows and underflows for kCpuId0. + HotCacheOperations(cache, kCpuId0); + CpuCache::CpuCacheMissStats interval_misses = + cache.GetIntervalCacheMissStats(kCpuId0, MissCount::kSlabResize); + // Make sure that overflows/underflows ratio is greater than the threshold + // for kCpuId0 cache. + ASSERT_GT(interval_misses.overflows, + interval_misses.underflows * kDynamicSlabGrowThreshold); + + // Perform allocations on kCpuId1 so that we accumulate only underflows. + // Reclaim after each allocation such that we have no objects in the cache + // for the next allocation. + for (int i = 0; i < 1024; ++i) { + ColdCacheOperations(cache, kCpuId1, /*size_class=*/1); + cache.Reclaim(kCpuId1); + } + + // Total overflows/underflows ratio must be less than grow threshold now. + CpuCache::CpuCacheMissStats total_misses = + cache.GetIntervalCacheMissStats(kCpuId0, MissCount::kSlabResize); + total_misses += + cache.GetIntervalCacheMissStats(kCpuId1, MissCount::kSlabResize); + ASSERT_LT(total_misses.overflows, + total_misses.underflows * kDynamicSlabGrowThreshold); + + cpu_cache_internal::SlabShiftBounds shift_bounds = + cache.GetPerCpuSlabShiftBounds(); + const int shift = shift_bounds.initial_shift; + EXPECT_EQ(CpuCachePeer::GetSlabShift(cache), shift); + cache.ResizeSlabIfNeeded(); + + EXPECT_EQ(CpuCachePeer::GetSlabShift(cache), shift + 1); +} + +// Test that when dynamic slab parameters change, things still work. +TEST(CpuCacheTest, DynamicSlabParamsChange) { + if (!subtle::percpu::IsFast()) { + return; + } + int n_threads = NumCPUs(); +#ifdef UNDEFINED_BEHAVIOR_SANITIZER + // Prevent timeout issues by using fewer stress threads with UBSan. + n_threads = std::min(n_threads, 2); +#endif + + SizeMap size_map; + size_map.Init(size_map.CurrentClasses().classes); + for (bool initially_enabled : {false, true}) { + for (DynamicSlab initial_dynamic_slab : + {DynamicSlab::kGrow, DynamicSlab::kShrink, DynamicSlab::kNoop}) { + CpuCache cache; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.dynamic_slab_enabled_ = initially_enabled; + forwarder.dynamic_slab_ = initial_dynamic_slab; + forwarder.size_map_ = size_map; + + cache.Activate(); + + std::vector threads; + std::atomic stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + + for (bool enabled : {false, true}) { + for (DynamicSlab dynamic_slab : + {DynamicSlab::kGrow, DynamicSlab::kShrink, DynamicSlab::kNoop}) { + absl::SleepFor(absl::Milliseconds(100)); + forwarder.dynamic_slab_enabled_ = enabled; + forwarder.dynamic_slab_ = dynamic_slab; + cache.ResizeSlabIfNeeded(); + } + } + stop = true; + for (auto& t : threads) { + t.join(); + } + + cache.Deactivate(); + } + } +} + +// Test that old slabs are madvised-away during max capacity resize even when +// memory is mlocked. +TEST(CpuCacheTest, MaxCapacityResizeFailedBytesMlocked) { + if (!subtle::percpu::IsFast()) { + return; + } + int n_threads = NumCPUs(); +#ifdef UNDEFINED_BEHAVIOR_SANITIZER + // Prevent timeout issues by using fewer stress threads with UBSan. + n_threads = std::min(n_threads, 2); +#endif + + int ret = mlockall(MCL_CURRENT | MCL_FUTURE); + ASSERT_EQ(ret, 0); + + CpuCache cache; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.dynamic_slab_enabled_ = true; + cache.Activate(); + + SizeMap size_map; + size_map.Init(size_map.CurrentClasses().classes); + forwarder.size_map_ = size_map; + + std::vector threads; + std::atomic stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + + for (int i = 0; i < 10; ++i) { + absl::SleepFor(absl::Milliseconds(100)); + cache.ResizeSizeClassMaxCapacities(); + } + stop = true; + for (auto& t : threads) { + t.join(); + } + int failed_bytes = cache.GetDynamicSlabFailedBytes(); + EXPECT_EQ(failed_bytes, 0); + + ret = munlockall(); + ASSERT_EQ(ret, 0); + + cache.Deactivate(); +} + +// Test that old slabs are madvised-away during slab resize even when memory is +// mlocked. +TEST(CpuCacheTest, SlabResizeFailedBytesMlocked) { + if (!subtle::percpu::IsFast()) { + return; + } + int n_threads = NumCPUs(); +#ifdef UNDEFINED_BEHAVIOR_SANITIZER + // Prevent timeout issues by using fewer stress threads with UBSan. + n_threads = std::min(n_threads, 2); +#endif + + int ret = mlockall(MCL_CURRENT | MCL_FUTURE); + ASSERT_EQ(ret, 0); + + CpuCache cache; + TestStaticForwarder& forwarder = cache.forwarder(); + forwarder.dynamic_slab_enabled_ = true; + cache.Activate(); + + SizeMap size_map; + size_map.Init(size_map.CurrentClasses().classes); + forwarder.size_map_ = size_map; + + std::vector threads; + std::atomic stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back( + std::thread(StressThread, std::ref(cache), t, std::ref(stop))); + } + + for (DynamicSlab dynamic_slab : + {DynamicSlab::kGrow, DynamicSlab::kShrink, DynamicSlab::kNoop}) { + absl::SleepFor(absl::Milliseconds(100)); + forwarder.dynamic_slab_ = dynamic_slab; + cache.ResizeSlabIfNeeded(); + } + stop = true; + for (auto& t : threads) { + t.join(); + } + int failed_bytes = cache.GetDynamicSlabFailedBytes(); + EXPECT_EQ(failed_bytes, 0); + + ret = munlockall(); + ASSERT_EQ(ret, 0); + + cache.Deactivate(); +} + +TEST(CpuCacheTest, SlabUsage) { + // Note: we can't do ValidateSlabBytes on the test-cpu-cache because in that + // case, the slab only uses size classes 1 and 2. + CpuCachePeer::ValidateSlabBytes(tc_globals.cpu_cache()); +} + TEST(CpuCacheTest, ColdHotCacheShuffleTest) { if (!subtle::percpu::IsFast()) { return; } - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + CpuCache cache; + // Reduce cache capacity so that it will see need in stealing and rebalancing. + const size_t max_cpu_cache_size = 1 << 10; + cache.SetCacheLimit(max_cpu_cache_size); + cache.Activate(); constexpr int hot_cpu_id = 0; constexpr int cold_cpu_id = 1; - const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size(); - // Empirical tests suggest that we should be able to steal all the steal-able // capacity from colder cache in < 100 tries. Keeping enough buffer here to // make sure we steal from colder cache, while at the same time avoid timeouts // if something goes bad. constexpr int kMaxStealTries = 1000; - // We allocate and deallocate a single highest cl object. + // We allocate and deallocate a single highest size_class object. // This makes sure that we have a single large object in the cache that faster // cache can steal. - const size_t size_class = kNumClasses - 1; + const size_t size_class = 2; for (int num_tries = 0; num_tries < kMaxStealTries && cache.Capacity(cold_cpu_id) > - CPUCache::kCacheCapacityThreshold * max_cpu_cache_size; + CpuCache::kCacheCapacityThreshold * max_cpu_cache_size; ++num_tries) { - ColdCacheOperations(cold_cpu_id, size_class); - HotCacheOperations(hot_cpu_id); + ColdCacheOperations(cache, cold_cpu_id, size_class); + HotCacheOperations(cache, hot_cpu_id); cache.ShuffleCpuCaches(); // Check that the capacity is preserved. @@ -439,9 +1416,10 @@ TEST(CpuCacheTest, ColdHotCacheShuffleTest) { // Check that we drained cold cache to the lower capacity limit. // We also keep some tolerance, up to the largest class size, below the lower // capacity threshold that we can drain cold cache to. + EXPECT_LT(cold_cache_capacity, max_cpu_cache_size); EXPECT_GT(cold_cache_capacity, - CPUCache::kCacheCapacityThreshold * max_cpu_cache_size - - Static::sizemap().class_to_size(kNumClasses - 1)); + CpuCache::kCacheCapacityThreshold * max_cpu_cache_size - + cache.forwarder().class_to_size(size_class)); // Check that we have at least stolen some capacity. EXPECT_GT(hot_cache_capacity, max_cpu_cache_size); @@ -450,8 +1428,8 @@ TEST(CpuCacheTest, ColdHotCacheShuffleTest) { // has been reached for the cold cache. A few more shuffles should not // change the capacity of either of the caches. for (int i = 0; i < 100; ++i) { - ColdCacheOperations(cold_cpu_id, size_class); - HotCacheOperations(hot_cpu_id); + ColdCacheOperations(cache, cold_cpu_id, size_class); + HotCacheOperations(cache, hot_cpu_id); cache.ShuffleCpuCaches(); // Check that the capacity is preserved. @@ -462,7 +1440,8 @@ TEST(CpuCacheTest, ColdHotCacheShuffleTest) { } // Check that the capacity of cold and hot caches is same as before. - EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity); + EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity) + << CpuCache::kCacheCapacityThreshold * max_cpu_cache_size; EXPECT_EQ(cache.Capacity(hot_cpu_id), hot_cache_capacity); // Make sure that the total capacity is preserved. @@ -470,10 +1449,7 @@ TEST(CpuCacheTest, ColdHotCacheShuffleTest) { 2 * max_cpu_cache_size); // Reclaim caches. - const int num_cpus = absl::base_internal::NumCPUs(); - for (int cpu = 0; cpu < num_cpus; ++cpu) { - cache.Reclaim(cpu); - } + cache.Deactivate(); } TEST(CpuCacheTest, ReclaimCpuCache) { @@ -481,18 +1457,16 @@ TEST(CpuCacheTest, ReclaimCpuCache) { return; } - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + CpuCache cache; + cache.Activate(); // The number of underflows and overflows must be zero for all the caches. - const int num_cpus = absl::base_internal::NumCPUs(); + const int num_cpus = NumCPUs(); for (int cpu = 0; cpu < num_cpus; ++cpu) { SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); // Check that reclaim miss metrics are reset. - CPUCache::CpuCacheMissStats reclaim_misses = - cache.GetReclaimCacheMissStats(cpu); + CpuCache::CpuCacheMissStats reclaim_misses = + cache.GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kReclaim); EXPECT_EQ(reclaim_misses.underflows, 0); EXPECT_EQ(reclaim_misses.overflows, 0); @@ -504,25 +1478,26 @@ TEST(CpuCacheTest, ReclaimCpuCache) { EXPECT_EQ(used_bytes, 0); } - const size_t kSizeClass = 3; + const size_t kSizeClass = 2; // We chose a different size class here so that we can populate different size // class slots and change the number of bytes used by the busy cache later in // our test. - const size_t kBusySizeClass = 4; + const size_t kBusySizeClass = 1; + ASSERT_NE(kSizeClass, kBusySizeClass); // Perform some operations to warm up caches and make sure they are populated. for (int cpu = 0; cpu < num_cpus; ++cpu) { SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - ColdCacheOperations(cpu, kSizeClass); + ColdCacheOperations(cache, cpu, kSizeClass); EXPECT_TRUE(cache.HasPopulated(cpu)); } for (int cpu = 0; cpu < num_cpus; ++cpu) { SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - CPUCache::CpuCacheMissStats misses_last_interval = - cache.GetReclaimCacheMissStats(cpu); - CPUCache::CpuCacheMissStats total_misses = + CpuCache::CpuCacheMissStats misses_last_interval = + cache.GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kReclaim); + CpuCache::CpuCacheMissStats total_misses = cache.GetTotalCacheMissStats(cpu); // Misses since the last reclaim (i.e. since we initialized the caches) @@ -543,8 +1518,8 @@ TEST(CpuCacheTest, ReclaimCpuCache) { // As no cache operations were performed since the last reclaim // operation, the reclaim misses captured during the last interval (i.e. // since the last reclaim) should be zero. - CPUCache::CpuCacheMissStats reclaim_misses = - cache.GetReclaimCacheMissStats(cpu); + CpuCache::CpuCacheMissStats reclaim_misses = + cache.GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kReclaim); EXPECT_EQ(reclaim_misses.underflows, 0); EXPECT_EQ(reclaim_misses.overflows, 0); @@ -557,10 +1532,9 @@ TEST(CpuCacheTest, ReclaimCpuCache) { } absl::BitGen rnd; - const int busy_cpu = - absl::Uniform(rnd, 0, absl::base_internal::NumCPUs()); + const int busy_cpu = absl::Uniform(rnd, 0, NumCPUs()); const size_t prev_used = cache.UsedBytes(busy_cpu); - ColdCacheOperations(busy_cpu, kBusySizeClass); + ColdCacheOperations(cache, busy_cpu, kBusySizeClass); EXPECT_GT(cache.UsedBytes(busy_cpu), prev_used); // Try reclaiming caches again. @@ -592,6 +1566,382 @@ TEST(CpuCacheTest, ReclaimCpuCache) { EXPECT_EQ(cache.UsedBytes(cpu), 0); EXPECT_EQ(cache.GetNumReclaims(cpu), 1); } + + cache.Deactivate(); +} + +TEST(CpuCacheTest, SizeClassCapacityTest) { + if (!subtle::percpu::IsFast()) { + return; + } + + CpuCache cache; + cache.Activate(); + + const int num_cpus = NumCPUs(); + constexpr size_t kSizeClass = 2; + const size_t batch_size = cache.forwarder().num_objects_to_move(kSizeClass); + + // Perform some operations to warm up caches and make sure they are populated. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + ColdCacheOperations(cache, cpu, kSizeClass); + EXPECT_TRUE(cache.HasPopulated(cpu)); + } + + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + SCOPED_TRACE(absl::StrFormat("Failed size_class: %d", size_class)); + CpuCache::SizeClassCapacityStats capacity_stats = + cache.GetSizeClassCapacityStats(size_class); + if (size_class == kSizeClass) { + // As all the caches are populated and each cache stores batch_size number + // of kSizeClass objects, all the stats below should be equal to + // batch_size. + EXPECT_EQ(capacity_stats.min_capacity, batch_size); + EXPECT_DOUBLE_EQ(capacity_stats.avg_capacity, batch_size); + EXPECT_EQ(capacity_stats.max_capacity, batch_size); + } else { + // Capacity stats for other size classes should be zero. + EXPECT_EQ(capacity_stats.min_capacity, 0); + EXPECT_DOUBLE_EQ(capacity_stats.avg_capacity, 0); + EXPECT_EQ(capacity_stats.max_capacity, 0); + } + } + + // Next, we reclaim per-cpu caches, one at a time, to drain all the kSizeClass + // objects cached by them. As we progressively reclaim per-cpu caches, the + // capacity for kSizeClass averaged over all CPUs should also drop linearly. + // We reclaim all but one per-cpu caches (we reclaim last per-cpu cache + // outside the loop so that we can check for max_capacity=0 separately). + for (int cpu = 0; cpu < num_cpus - 1; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + cache.Reclaim(cpu); + + CpuCache::SizeClassCapacityStats capacity_stats = + cache.GetSizeClassCapacityStats(kSizeClass); + // Reclaiming even one per-cpu cache should set min_capacity to zero. + EXPECT_EQ(capacity_stats.min_capacity, 0); + + // (cpu+1) number of caches have been reclaimed. So, (num_cpus-cpu-1) number + // of caches are currently populated, with each cache storing batch_size + // number of kSizeClass objects. + double expected_avg = + static_cast(batch_size * (num_cpus - cpu - 1)) / num_cpus; + EXPECT_DOUBLE_EQ(capacity_stats.avg_capacity, expected_avg); + + // At least one per-cpu cache exists that caches batch_size number of + // kSizeClass objects. + EXPECT_EQ(capacity_stats.max_capacity, batch_size); + } + + // We finally reclaim last per-cpu cache. All the reported capacity stats + // should drop to zero as none of the caches hold any objects. + cache.Reclaim(num_cpus - 1); + CpuCache::SizeClassCapacityStats capacity_stats = + cache.GetSizeClassCapacityStats(kSizeClass); + EXPECT_EQ(capacity_stats.min_capacity, 0); + EXPECT_DOUBLE_EQ(capacity_stats.avg_capacity, 0); + EXPECT_EQ(capacity_stats.max_capacity, 0); + + cache.Deactivate(); +} + +class CpuCacheEnvironment { + public: + CpuCacheEnvironment() : num_cpus_(NumCPUs()) {} + ~CpuCacheEnvironment() { cache_.Deactivate(); } + + void Activate() { + cache_.Activate(); + ready_.store(true, std::memory_order_release); + } + + void RandomlyPoke(absl::BitGenRef rng) { + // We run a random operation based on our random number generated. + const int coin = absl::Uniform(rng, 0, 18); + const bool ready = ready_.load(std::memory_order_acquire); + + // Pick a random CPU and size class. We will likely need one or both. + const int cpu = absl::Uniform(rng, 0, num_cpus_); + const int size_class = absl::Uniform(rng, 1, 3); + + if (!ready || coin < 1) { + benchmark::DoNotOptimize(cache_.CacheLimit()); + return; + } + + // Methods beyond this point require the CpuCache to be activated. + + switch (coin) { + case 1: { + // Allocate, Deallocate + void* ptr = cache_.Allocate(size_class); + EXPECT_NE(ptr, nullptr); + // Touch *ptr to allow sanitizers to see an access (and a potential + // race, if synchronization is insufficient). + *static_cast(ptr) = 1; + benchmark::DoNotOptimize(*static_cast(ptr)); + + cache_.Deallocate(ptr, size_class); + break; + } + case 2: + benchmark::DoNotOptimize(cache_.TotalUsedBytes()); + break; + case 3: + benchmark::DoNotOptimize(cache_.UsedBytes(cpu)); + break; + case 4: + benchmark::DoNotOptimize(cache_.Allocated(cpu)); + break; + case 5: + benchmark::DoNotOptimize(cache_.HasPopulated(cpu)); + break; + case 6: { + auto metadata = cache_.MetadataMemoryUsage(); + EXPECT_GE(metadata.virtual_size, metadata.resident_size); + EXPECT_GT(metadata.virtual_size, 0); + break; + } + case 7: + benchmark::DoNotOptimize(cache_.TotalObjectsOfClass(size_class)); + break; + case 8: + benchmark::DoNotOptimize(cache_.Unallocated(cpu)); + break; + case 9: + benchmark::DoNotOptimize(cache_.Capacity(cpu)); + break; + case 10: { + absl::MutexLock lock(&background_mutex_); + cache_.ShuffleCpuCaches(); + break; + } + case 11: { + absl::MutexLock lock(&background_mutex_); + cache_.TryReclaimingCaches(); + break; + } + case 12: { + absl::MutexLock lock(&background_mutex_); + cache_.Reclaim(cpu); + break; + } + case 13: + benchmark::DoNotOptimize(cache_.GetNumReclaims(cpu)); + break; + case 14: { + const auto total_misses = cache_.GetTotalCacheMissStats(cpu); + const auto reclaim_misses = + cache_.GetAndUpdateIntervalCacheMissStats(cpu, MissCount::kReclaim); + const auto shuffle_misses = + cache_.GetIntervalCacheMissStats(cpu, MissCount::kShuffle); + + benchmark::DoNotOptimize(total_misses); + benchmark::DoNotOptimize(reclaim_misses); + benchmark::DoNotOptimize(shuffle_misses); + break; + } + case 15: { + const auto stats = cache_.GetSizeClassCapacityStats(size_class); + EXPECT_GE(stats.max_capacity, stats.avg_capacity); + EXPECT_GE(stats.avg_capacity, stats.min_capacity); + break; + } + case 16: { + std::string out; + out.resize(128 << 10); + ANNOTATE_MEMORY_IS_UNINITIALIZED(out.data(), out.size()); + Printer p(out.data(), out.size()); + PbtxtRegion r(p, kTop); + + cache_.PrintInPbtxt(r); + + benchmark::DoNotOptimize(out.data()); + break; + } + case 17: { + std::string out; + out.resize(128 << 10); + ANNOTATE_MEMORY_IS_UNINITIALIZED(out.data(), out.size()); + Printer p(out.data(), out.size()); + + cache_.Print(p); + + benchmark::DoNotOptimize(out.data()); + break; + } + default: + GTEST_FAIL() << "Unexpected value " << coin; + break; + } + } + + CpuCache& cache() { return cache_; } + + int num_cpus() const { return num_cpus_; } + + private: + const int num_cpus_; + CpuCache cache_; + // Protects operations executed on the background thread in real life. + absl::Mutex background_mutex_; + std::atomic ready_{false}; +}; + +TEST(CpuCacheTest, Fuzz) { + if (!subtle::percpu::IsFast()) { + return; + } + + const int kThreads = 10; + struct ABSL_CACHELINE_ALIGNED ThreadState { + absl::BitGen rng; + }; + std::vector thread_state(kThreads); + + CpuCacheEnvironment env; + ThreadManager threads; + threads.Start(10, [&](int thread_id) { + // Ensure this thread has registered itself with the kernel to use + // restartable sequences. + ASSERT_TRUE(subtle::percpu::IsFast()); + env.RandomlyPoke(thread_state[thread_id].rng); + }); + + absl::SleepFor(absl::Seconds(0.1)); + env.Activate(); + absl::SleepFor(absl::Seconds(0.3)); + + threads.Stop(); + + // Inspect the CpuCache and validate invariants. + + // The number of caches * per-core limit should be equivalent to the bytes + // managed by the cache. + size_t capacity = 0; + size_t allocated = 0; + size_t unallocated = 0; + for (int i = 0, n = env.num_cpus(); i < n; i++) { + capacity += env.cache().Capacity(i); + allocated += env.cache().Allocated(i); + unallocated += env.cache().Unallocated(i); + } + + EXPECT_EQ(allocated + unallocated, capacity); + EXPECT_EQ(env.num_cpus() * env.cache().CacheLimit(), capacity); + + // Log mallocz content for manual inspection. + std::string mallocz; + mallocz.resize(128 << 10); + Printer p(mallocz.data(), mallocz.size()); + env.cache().Print(p); + std::cout << mallocz; +} + +// TODO(b/179516472): Enable this test. +TEST(CpuCacheTest, DISABLED_ChangingSizes) { + if (!subtle::percpu::IsFast()) { + return; + } + + constexpr int kThreads = 10; + struct ABSL_CACHELINE_ALIGNED ThreadState { + absl::BitGen rng; + }; + std::vector thread_state(kThreads); + + CpuCacheEnvironment env; + ThreadManager threads; + const size_t initial_size = env.cache().CacheLimit(); + ASSERT_GT(initial_size, 0); + bool rseq_active_for_size_changing_thread = false; + int index = 0; + size_t last_cache_size = initial_size; + + env.Activate(); + + threads.Start(kThreads, [&](int thread_id) { + // Ensure this thread has registered itself with the kernel to use + // restartable sequences. + if (thread_id > 0) { + ASSERT_TRUE(subtle::percpu::IsFast()); + env.RandomlyPoke(thread_state[thread_id].rng); + return; + } + + // Alternative between having the thread register for rseq and not, to + // ensure that we can call SetCacheLimit with either precondition. + std::optional rseq; + if (rseq_active_for_size_changing_thread) { + ASSERT_TRUE(subtle::percpu::IsFast()); + } else { + rseq.emplace(); + } + rseq_active_for_size_changing_thread = + !rseq_active_for_size_changing_thread; + + // Vary the cache size up and down. Exclude 1. from the list so that we + // will always expect to see a nontrivial change after the threads stop + // work. + constexpr double kConversions[] = {0.25, 0.5, 0.75, 1.25, 1.5}; + size_t new_cache_size = initial_size * kConversions[index]; + index = (index + 1) % 5; + + env.cache().SetCacheLimit(new_cache_size); + last_cache_size = new_cache_size; + }); + + absl::SleepFor(absl::Seconds(0.5)); + + threads.Stop(); + + // Inspect the CpuCache and validate invariants. + + // The number of caches * per-core limit should be equivalent to the bytes + // managed by the cache. + size_t capacity = 0; + size_t allocated = 0; + size_t unallocated = 0; + for (int i = 0, n = env.num_cpus(); i < n; i++) { + capacity += env.cache().Capacity(i); + allocated += env.cache().Allocated(i); + unallocated += env.cache().Unallocated(i); + } + + EXPECT_EQ(allocated + unallocated, capacity); + EXPECT_EQ(env.num_cpus() * last_cache_size, capacity); +} + +TEST(CpuCacheTest, TargetOverflowRefillCount) { + auto F = cpu_cache_internal::TargetOverflowRefillCount; + // Args are: capacity, batch_length, successive. + EXPECT_EQ(F(0, 8, 0), 1); + EXPECT_EQ(F(0, 8, 10), 1); + EXPECT_EQ(F(1, 8, 0), 1); + EXPECT_EQ(F(1, 8, 1), 1); + EXPECT_EQ(F(1, 8, 2), 1); + EXPECT_EQ(F(1, 8, 3), 2); + EXPECT_EQ(F(1, 8, 4), 2); + EXPECT_EQ(F(2, 8, 0), 2); + EXPECT_EQ(F(3, 8, 0), 3); + EXPECT_EQ(F(4, 8, 0), 3); + EXPECT_EQ(F(5, 8, 0), 4); + EXPECT_EQ(F(6, 8, 0), 4); + EXPECT_EQ(F(7, 8, 0), 5); + EXPECT_EQ(F(8, 8, 0), 5); + EXPECT_EQ(F(9, 8, 0), 6); + EXPECT_EQ(F(100, 8, 0), 8); + EXPECT_EQ(F(23, 8, 1), 13); + EXPECT_EQ(F(24, 8, 1), 13); + EXPECT_EQ(F(100, 8, 1), 16); + EXPECT_EQ(F(24, 8, 2), 13); + EXPECT_EQ(F(32, 8, 2), 17); + EXPECT_EQ(F(40, 8, 2), 21); + EXPECT_EQ(F(100, 8, 2), 32); + EXPECT_EQ(F(48, 8, 3), 25); + EXPECT_EQ(F(56, 8, 3), 29); + EXPECT_EQ(F(100, 8, 3), 51); } } // namespace diff --git a/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.cc b/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.cc new file mode 100644 index 000000000000..60539c4713f0 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.cc @@ -0,0 +1,760 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/deallocation_profiler.h" + +#include +#include // for std::lround +#include +#include // for uintptr_t +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" +#include "absl/base/internal/low_level_alloc.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" +#include "absl/debugging/stacktrace.h" // for GetStackTrace +#include "absl/functional/function_ref.h" +#include "absl/hash/hash.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/cache_topology.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/sampled_allocation.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/static_vars.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace deallocationz { +namespace { +using ::absl::base_internal::SpinLock; +using tcmalloc_internal::AllocationGuardSpinLockHolder; + +// STL adaptor for an arena based allocator which provides the following: +// static void* Alloc::Allocate(size_t size); +// static void Alloc::Free(void* ptr, size_t size); +template +class AllocAdaptor final { + public: + using value_type = T; + + AllocAdaptor() {} + AllocAdaptor(const AllocAdaptor&) {} + + template + using rebind = AllocAdaptor; + + template + explicit AllocAdaptor(const AllocAdaptor&) {} + + T* allocate(size_t n) { + // Check if n is too big to allocate. + TC_ASSERT_EQ((n * sizeof(T)) / sizeof(T), n); + return static_cast(Alloc::Allocate(n * sizeof(T))); + } + void deallocate(T* p, size_t n) { Alloc::Free(p, n * sizeof(T)); } +}; + +const int64_t kMaxStackDepth = 64; + +// Stores stack traces and metadata for any allocation or deallocation +// encountered by the profiler. +struct DeallocationSampleRecord { + double weight = 0.0; + size_t requested_size = 0; + size_t requested_alignment = 0; + size_t allocated_size = 0; // size after sizeclass/page rounding + + int depth = 0; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; + + // creation_time is used to capture the life_time of sampled allocations + absl::Time creation_time; + int cpu_id = -1; + int vcpu_id = -1; + int l3_id = -1; + int numa_id = -1; + pid_t thread_id = 0; + + template + friend H AbslHashValue(H h, const DeallocationSampleRecord& c) { + return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth), + c.depth, c.requested_size, c.requested_alignment, + c.allocated_size); + } + + bool operator==(const DeallocationSampleRecord& other) const { + if (depth != other.depth || requested_size != other.requested_size || + requested_alignment != other.requested_alignment || + allocated_size != other.allocated_size) { + return false; + } + return std::equal(stack, stack + depth, other.stack); + } +}; + +// Tracks whether an object was allocated/deallocated by the same CPU/thread. +struct CpuThreadMatchingStatus { + constexpr CpuThreadMatchingStatus(bool physical_cpu_matched, + bool virtual_cpu_matched, bool l3_matched, + bool numa_matched, bool thread_matched) + : physical_cpu_matched(physical_cpu_matched), + virtual_cpu_matched(virtual_cpu_matched), + l3_matched(l3_matched), + numa_matched(numa_matched), + thread_matched(thread_matched), + value((static_cast(physical_cpu_matched) << 4) | + (static_cast(virtual_cpu_matched) << 3) | + (static_cast(l3_matched) << 2) | + (static_cast(numa_matched) << 1) | + static_cast(thread_matched)) {} + bool physical_cpu_matched; + bool virtual_cpu_matched; + bool l3_matched; + bool numa_matched; + bool thread_matched; + int value; +}; + +struct RpcMatchingStatus { + static constexpr int ComputeValue(uint64_t alloc, uint64_t dealloc) { + if (alloc != 0 && dealloc != 0) { + return static_cast(alloc == dealloc); + } else { + return 2; + } + } + + constexpr RpcMatchingStatus(uint64_t alloc, uint64_t dealloc) + : value(ComputeValue(alloc, dealloc)) {} + + int value; +}; + +int ComputeIndex(CpuThreadMatchingStatus status, RpcMatchingStatus rpc_status) { + return status.value * 3 + rpc_status.value; +} + +int GetL3Id(int cpu_id) { + return cpu_id >= 0 + ? tcmalloc_internal::CacheTopology::Instance().GetL3FromCpuId( + cpu_id) + : -1; +} + +int GetNumaId(int cpu_id) { + return cpu_id >= 0 + ? tcmalloc_internal::tc_globals.numa_topology().GetCpuPartition( + cpu_id) + : -1; +} + +constexpr std::pair kAllCases[] = { + // clang-format off + {CpuThreadMatchingStatus(false, false, false, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, false, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, false, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, false, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(false, false, false, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, false, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, false, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, false, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(false, false, false, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, false, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, false, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, false, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(false, false, true, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, true, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, true, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, false, true, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(false, false, true, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, true, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, true, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, false, true, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(false, false, true, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, true, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, true, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, false, true, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(false, true, false, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, false, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, false, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, false, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(false, true, false, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, false, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, false, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, false, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(false, true, false, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, false, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, false, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, false, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(false, true, true, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, true, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, true, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(false, true, true, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(false, true, true, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, true, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, true, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(false, true, true, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(false, true, true, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, true, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, true, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(false, true, true, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(true, false, false, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, false, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, false, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, false, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(true, false, false, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, false, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, false, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, false, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(true, false, false, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, false, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, false, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, false, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(true, false, true, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, true, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, true, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, false, true, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(true, false, true, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, true, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, true, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, false, true, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(true, false, true, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, true, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, true, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, false, true, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(true, true, false, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, false, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, false, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, false, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(true, true, false, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, false, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, false, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, false, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(true, true, false, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, false, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, false, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, false, true, true), RpcMatchingStatus(1, 1)}, + + {CpuThreadMatchingStatus(true, true, true, false, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, true, false, true), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, true, true, false), RpcMatchingStatus(0, 0)}, + {CpuThreadMatchingStatus(true, true, true, true, true), RpcMatchingStatus(0, 0)}, + + {CpuThreadMatchingStatus(true, true, true, false, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, true, false, true), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, true, true, false), RpcMatchingStatus(1, 2)}, + {CpuThreadMatchingStatus(true, true, true, true, true), RpcMatchingStatus(1, 2)}, + + {CpuThreadMatchingStatus(true, true, true, false, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, true, false, true), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, true, true, false), RpcMatchingStatus(1, 1)}, + {CpuThreadMatchingStatus(true, true, true, true, true), RpcMatchingStatus(1, 1)}, + // clang-format on +}; +} // namespace + +class DeallocationProfiler { + private: + // Arena and allocator used to back STL objects used by DeallocationProfiler + // Shared between all instances of DeallocationProfiler + // TODO(b/248332543): Use TCMalloc's own arena allocator instead of defining a + // new one here. The need for refcount management could be the reason for + // using a custom allocator in the first place. + class MyAllocator { + public: + static void* Allocate(size_t n) { + return absl::base_internal::LowLevelAlloc::AllocWithArena(n, arena_); + } + static void Free(const void* p, size_t /* n */) { + absl::base_internal::LowLevelAlloc::Free(const_cast(p)); + } + + // The lifetime of the arena is managed using a reference count and + // determined by how long at least one emitted Profile remains alive. + struct LowLevelArenaReference { + LowLevelArenaReference() { + AllocationGuardSpinLockHolder h(&arena_lock_); + if ((refcount_++) == 0) { + TC_CHECK_EQ(arena_, nullptr); + arena_ = absl::base_internal::LowLevelAlloc::NewArena(0); + } + } + + ~LowLevelArenaReference() { + AllocationGuardSpinLockHolder h(&arena_lock_); + if ((--refcount_) == 0) { + TC_CHECK(absl::base_internal::LowLevelAlloc::DeleteArena(arena_)); + arena_ = nullptr; + } + } + }; + + private: + // We need to protect the arena with a mutex and ensure that every thread + // acquires that mutex before it uses the arena for the first time. Once + // it has acquired the mutex, it is guaranteed that arena won't change + // between that point in time and when the thread stops accessing it (as + // enforced by LowLevelArenaReference below). + ABSL_CONST_INIT static SpinLock arena_lock_; + static absl::base_internal::LowLevelAlloc::Arena* arena_; + + // We assume that launching a new deallocation profiler takes too long + // to cause this to overflow within the sampling period. The reason this + // is not using std::shared_ptr is that we do not only need to protect the + // value of the reference count but also the pointer itself (and therefore + // need a separate mutex either way). + static uint32_t refcount_; + }; + + // This must be the first member of the class to be initialized. The + // underlying arena must stay alive as long as the profiler. + MyAllocator::LowLevelArenaReference arena_ref_; + + // All active profilers are stored in a list. + DeallocationProfiler* next_; + DeallocationProfilerList* list_ = nullptr; + friend class DeallocationProfilerList; + + using AllocsTable = absl::flat_hash_map< + tcmalloc_internal::AllocHandle, DeallocationSampleRecord, + absl::Hash, + std::equal_to, + AllocAdaptor, + MyAllocator>>; + + class DeallocationStackTraceTable final + : public tcmalloc_internal::ProfileBase { + public: + // We define the dtor to ensure it is placed in the desired text section. + ~DeallocationStackTraceTable() override = default; + void AddTrace(const DeallocationSampleRecord& alloc_trace, + const DeallocationSampleRecord& dealloc_trace); + + void Iterate( + absl::FunctionRef func) const override; + + ProfileType Type() const override { + return tcmalloc::ProfileType::kLifetimes; + } + + std::optional StartTime() const override { return start_time_; } + + absl::Duration Duration() const override { + return stop_time_ - start_time_; + } + + void StopAndRecord(const AllocsTable& allocs); + + private: + // This must be the first member of the class to be initialized. The + // underlying arena must stay alive as long as the profile. + MyAllocator::LowLevelArenaReference arena_ref_; + + static constexpr int kNumCases = ABSL_ARRAYSIZE(kAllCases); + + struct Key { + DeallocationSampleRecord alloc; + DeallocationSampleRecord dealloc; + + Key(const DeallocationSampleRecord& alloc, + const DeallocationSampleRecord& dealloc) + : alloc(alloc), dealloc(dealloc) {} + + template + friend H AbslHashValue(H h, const Key& c) { + return H::combine(std::move(h), c.alloc, c.dealloc); + } + + bool operator==(const Key& other) const { + return (alloc == other.alloc) && (dealloc == other.dealloc); + } + }; + + struct Value { + // for each possible cases, we collect repetition count and avg lifetime + // we also collect the minimum and maximum lifetimes, as well as the sum + // of squares (to calculate the standard deviation). + double counts[kNumCases] = {0.0}; + double mean_life_times_ns[kNumCases] = {0.0}; + double variance_life_times_ns[kNumCases] = {0.0}; + double min_life_times_ns[kNumCases] = {0.0}; + double max_life_times_ns[kNumCases] = {0.0}; + + Value() { + std::fill_n(min_life_times_ns, kNumCases, + std::numeric_limits::max()); + } + }; + + absl::flat_hash_map, std::equal_to, + AllocAdaptor, MyAllocator>> + table_; + + absl::Time start_time_ = absl::Now(); + absl::Time stop_time_; + }; + + // Keep track of allocations that are in flight + AllocsTable allocs_; + + // Table to store lifetime information collected by this profiler + std::unique_ptr reports_ = nullptr; + + public: + explicit DeallocationProfiler(DeallocationProfilerList* list) : list_(list) { + reports_ = std::make_unique(); + list_->Add(this); + } + + ~DeallocationProfiler() { + if (reports_ != nullptr) { + Stop(); + } + } + + const tcmalloc::Profile Stop() { + if (reports_ != nullptr) { + // We first remove the profiler from the list to avoid racing with + // potential allocations which may modify the allocs_ table. + list_->Remove(this); + reports_->StopAndRecord(allocs_); + return tcmalloc_internal::ProfileAccessor::MakeProfile( + std::move(reports_)); + } + return tcmalloc::Profile(); + } + + void ReportMalloc(const tcmalloc_internal::StackTrace& stack_trace) { + // store sampled alloc in the hashmap + DeallocationSampleRecord& allocation = + allocs_[stack_trace.sampled_alloc_handle]; + + allocation.allocated_size = stack_trace.allocated_size; + allocation.requested_size = stack_trace.requested_size; + allocation.requested_alignment = stack_trace.requested_alignment; + allocation.depth = stack_trace.depth; + memcpy(allocation.stack, stack_trace.stack, + sizeof(void*) * std::min(static_cast(stack_trace.depth), + kMaxStackDepth)); + // TODO(mmaas): Do we need to worry about b/65384231 anymore? + allocation.creation_time = stack_trace.allocation_time; + allocation.cpu_id = tcmalloc_internal::subtle::percpu::GetRealCpu(); + allocation.vcpu_id = tcmalloc_internal::subtle::percpu::VirtualCpu::get(); + allocation.l3_id = GetL3Id(allocation.cpu_id); + allocation.numa_id = GetNumaId(allocation.cpu_id); + allocation.thread_id = absl::base_internal::GetTID(); + // We divide by the requested size to obtain the number of allocations. + // TODO(b/248332543): Consider using AllocatedBytes from sampler.h. + allocation.weight = static_cast(stack_trace.weight) / + (stack_trace.requested_size + 1); + } + + void ReportFree(tcmalloc_internal::AllocHandle handle) { + auto it = allocs_.find(handle); + + // Handle the case that we observed the deallocation but not the allocation + if (it == allocs_.end()) { + return; + } + + DeallocationSampleRecord sample = it->second; + allocs_.erase(it); + + DeallocationSampleRecord deallocation; + deallocation.allocated_size = sample.allocated_size; + deallocation.requested_alignment = sample.requested_alignment; + deallocation.requested_size = sample.requested_size; + deallocation.creation_time = absl::Now(); + deallocation.cpu_id = tcmalloc_internal::subtle::percpu::GetRealCpu(); + deallocation.vcpu_id = tcmalloc_internal::subtle::percpu::VirtualCpu::get(); + deallocation.l3_id = GetL3Id(deallocation.cpu_id); + deallocation.numa_id = GetNumaId(deallocation.cpu_id); + deallocation.thread_id = absl::base_internal::GetTID(); + deallocation.depth = + absl::GetStackTrace(deallocation.stack, kMaxStackDepth, 1); + + reports_->AddTrace(sample, deallocation); + } +}; + +void DeallocationProfilerList::Add(DeallocationProfiler* profiler) { + AllocationGuardSpinLockHolder h(&profilers_lock_); + profiler->next_ = first_; + first_ = profiler; + + // Whenever a new profiler is created, we seed it with live allocations. + tcmalloc_internal::tc_globals.sampled_allocation_recorder().Iterate( + [profiler]( + const tcmalloc_internal::SampledAllocation& sampled_allocation) { + profiler->ReportMalloc(sampled_allocation.sampled_stack); + }); +} + +// This list is very short and we're nowhere near a hot path, just walk +void DeallocationProfilerList::Remove(DeallocationProfiler* profiler) { + AllocationGuardSpinLockHolder h(&profilers_lock_); + DeallocationProfiler** link = &first_; + DeallocationProfiler* cur = first_; + while (cur != profiler) { + TC_CHECK_NE(cur, nullptr); + link = &cur->next_; + cur = cur->next_; + } + *link = profiler->next_; +} + +void DeallocationProfilerList::ReportMalloc( + const tcmalloc_internal::StackTrace& stack_trace) { + AllocationGuardSpinLockHolder h(&profilers_lock_); + DeallocationProfiler* cur = first_; + while (cur != nullptr) { + cur->ReportMalloc(stack_trace); + cur = cur->next_; + } +} + +void DeallocationProfilerList::ReportFree( + tcmalloc_internal::AllocHandle handle) { + AllocationGuardSpinLockHolder h(&profilers_lock_); + DeallocationProfiler* cur = first_; + while (cur != nullptr) { + cur->ReportFree(handle); + cur = cur->next_; + } +} + +// Initialize static variables +absl::base_internal::LowLevelAlloc::Arena* + DeallocationProfiler::MyAllocator::arena_ = nullptr; +uint32_t DeallocationProfiler::MyAllocator::refcount_ = 0; +ABSL_CONST_INIT SpinLock DeallocationProfiler::MyAllocator::arena_lock_( + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); + +void DeallocationProfiler::DeallocationStackTraceTable::StopAndRecord( + const AllocsTable& allocs) { + stop_time_ = absl::Now(); + + // Insert a dummy DeallocationSampleRecord since the table stores pairs. This + // allows us to make minimal changes to the rest of the sample processing + // steps reducing special casing for censored samples. This also allows us to + // aggregate censored samples just like regular deallocation samples. + const DeallocationSampleRecord censored{ + .creation_time = stop_time_, + }; + for (const auto& [unused, alloc] : allocs) { + AddTrace(alloc, censored); + } +} + +void DeallocationProfiler::DeallocationStackTraceTable::AddTrace( + const DeallocationSampleRecord& alloc_trace, + const DeallocationSampleRecord& dealloc_trace) { + CpuThreadMatchingStatus status = + CpuThreadMatchingStatus(alloc_trace.cpu_id == dealloc_trace.cpu_id, + alloc_trace.vcpu_id == dealloc_trace.vcpu_id, + alloc_trace.l3_id == dealloc_trace.l3_id, + alloc_trace.numa_id == dealloc_trace.numa_id, + alloc_trace.thread_id == dealloc_trace.thread_id); + + // Initialize a default rpc matched status. + RpcMatchingStatus rpc_status(/*alloc=*/0, /*dealloc=*/0); + + const int index = ComputeIndex(status, rpc_status); + + DeallocationStackTraceTable::Value& v = + table_[DeallocationStackTraceTable::Key(alloc_trace, dealloc_trace)]; + + const absl::Duration life_time = + dealloc_trace.creation_time - alloc_trace.creation_time; + double life_time_ns = absl::ToDoubleNanoseconds(life_time); + + // Update mean and variance using Welford’s online algorithm. + TC_ASSERT_LT(index, ABSL_ARRAYSIZE(v.counts)); + + double old_mean_ns = v.mean_life_times_ns[index]; + v.mean_life_times_ns[index] += + (life_time_ns - old_mean_ns) / static_cast(v.counts[index] + 1); + v.variance_life_times_ns[index] += + (life_time_ns - v.mean_life_times_ns[index]) * + (v.mean_life_times_ns[index] - old_mean_ns); + + v.min_life_times_ns[index] = + std::min(v.min_life_times_ns[index], life_time_ns); + v.max_life_times_ns[index] = + std::max(v.max_life_times_ns[index], life_time_ns); + v.counts[index]++; +} + +void DeallocationProfiler::DeallocationStackTraceTable::Iterate( + absl::FunctionRef func) const { + uint64_t pair_id = 1; + + for (auto& it : table_) { + const Key& k = it.first; + const Value& v = it.second; + + // Report total bytes that are a multiple of the object size. + size_t allocated_size = k.alloc.allocated_size; + + for (const auto& matching_case : kAllCases) { + const int index = ComputeIndex(matching_case.first, matching_case.second); + if (v.counts[index] == 0) { + continue; + } + + uintptr_t bytes = + std::lround(v.counts[index] * k.alloc.weight * allocated_size); + int64_t count = (bytes + allocated_size - 1) / allocated_size; + int64_t sum = count * allocated_size; + + // The variance should be >= 0, but it's not impossible that it drops + // below 0 for numerical reasons. We don't want to crash in this case, + // so we ensure to return 0 if this happens. + double stddev_life_time_ns = + sqrt(std::max(0.0, v.variance_life_times_ns[index] / + static_cast((v.counts[index])))); + + const auto bucketize = internal::LifetimeNsToBucketedDuration; + Profile::Sample sample; + sample.sum = sum, sample.requested_size = k.alloc.requested_size, + sample.requested_alignment = k.alloc.requested_alignment, + sample.allocated_size = allocated_size, sample.profile_id = pair_id++, + // Set the is_censored flag so that when we create a proto + // sample later we can treat the *_lifetime accordingly. + sample.is_censored = (k.dealloc.depth == 0), + sample.avg_lifetime = bucketize(v.mean_life_times_ns[index]), + sample.stddev_lifetime = bucketize(stddev_life_time_ns), + sample.min_lifetime = bucketize(v.min_life_times_ns[index]), + sample.max_lifetime = bucketize(v.max_life_times_ns[index]); + // Only set the cpu and thread matched flags if the sample is not + // censored. + if (!sample.is_censored) { + sample.allocator_deallocator_physical_cpu_matched = + matching_case.first.physical_cpu_matched; + sample.allocator_deallocator_virtual_cpu_matched = + matching_case.first.virtual_cpu_matched; + sample.allocator_deallocator_l3_matched = + matching_case.first.l3_matched; + sample.allocator_deallocator_numa_matched = + matching_case.first.numa_matched; + sample.allocator_deallocator_thread_matched = + matching_case.first.thread_matched; + } + + // first for allocation + sample.count = count; + sample.depth = k.alloc.depth; + std::copy(k.alloc.stack, k.alloc.stack + k.alloc.depth, sample.stack); + func(sample); + + // If this is a right-censored allocation (i.e. we did not observe the + // deallocation) then do not emit a deallocation sample pair. + if (sample.is_censored) { + continue; + } + + // second for deallocation + static_assert( + std::is_signed::value, + "Deallocation samples are tagged with negative count values."); + sample.count = -1 * count; + sample.depth = k.dealloc.depth; + std::copy(k.dealloc.stack, k.dealloc.stack + k.dealloc.depth, + sample.stack); + func(sample); + } + } +} + +DeallocationSample::DeallocationSample(DeallocationProfilerList* list) { + profiler_ = std::make_unique(list); +} + +tcmalloc::Profile DeallocationSample::Stop() && { + if (profiler_ != nullptr) { + tcmalloc::Profile profile = profiler_->Stop(); + profiler_.reset(); + return profile; + } + return tcmalloc::Profile(); +} + +namespace internal { + +// Lifetimes below 1ns are truncated to 1ns. Lifetimes between 1ns and 1ms +// are rounded to the next smaller power of 10. Lifetimes above 1ms are rounded +// down to the nearest millisecond. +absl::Duration LifetimeNsToBucketedDuration(double lifetime_ns) { + if (lifetime_ns < 1000000.0) { + if (lifetime_ns <= 1) { + // Avoid negatives. We can't allocate in a negative amount of time or + // even as quickly as a nanosecond (microbenchmarks of + // allocation/deallocation in a tight loop are several nanoseconds), so + // results this small indicate probable clock skew or other confounding + // factors in the data. + return absl::Nanoseconds(1); + } + + for (uint64_t cutoff_ns = 10; cutoff_ns <= 1000000; cutoff_ns *= 10) { + if (lifetime_ns < cutoff_ns) { + return absl::Nanoseconds(cutoff_ns / 10); + } + } + } + + // Round down to nearest millisecond. + return absl::Nanoseconds(static_cast(lifetime_ns / 1000000.0) * + 1000000L); +} + +} // namespace internal +} // namespace deallocationz +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.h b/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.h new file mode 100644 index 000000000000..0f0d47120d46 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/deallocation_profiler.h @@ -0,0 +1,69 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_DEALLOCATION_PROFILER_H_ +#define TCMALLOC_DEALLOCATION_PROFILER_H_ + +#include + +#include "absl/base/const_init.h" +#include "absl/base/internal/spinlock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace deallocationz { + +class DeallocationProfiler; + +class DeallocationProfilerList { + public: + constexpr DeallocationProfilerList() = default; + + void ReportMalloc(const tcmalloc_internal::StackTrace& stack_trace); + void ReportFree(tcmalloc_internal::AllocHandle handle); + void Add(DeallocationProfiler* profiler); + void Remove(DeallocationProfiler* profiler); + + private: + DeallocationProfiler* first_ = nullptr; + absl::base_internal::SpinLock profilers_lock_{ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; +}; + +class DeallocationSample final + : public tcmalloc_internal::AllocationProfilingTokenBase { + public: + explicit DeallocationSample(DeallocationProfilerList* list); + // We define the dtor to ensure it is placed in the desired text section. + ~DeallocationSample() override = default; + + tcmalloc::Profile Stop() && override; + + private: + std::unique_ptr profiler_; +}; + +namespace internal { +absl::Duration LifetimeNsToBucketedDuration(double lifetime_ns); +} // namespace internal +} // namespace deallocationz +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_DEALLOCATION_PROFILER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.cc b/contrib/libs/tcmalloc/tcmalloc/experiment.cc index 1c425fbf9ed1..bd131db4f7a1 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experiment.cc @@ -16,11 +16,28 @@ #include -#include "absl/base/macros.h" +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/functional/function_ref.h" +#include "absl/hash/hash.h" +#include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -33,6 +50,28 @@ const char kDisableExperiments[] = "BORG_DISABLE_EXPERIMENTS"; constexpr absl::string_view kEnableAll = "enable-all-known-experiments"; constexpr absl::string_view kDisableAll = "all"; +// Experiments that have known issues with brittle tests, are not enabled +// involuntarily in tests, and shouldn't be enabled widely. +bool HasBrittleTestFailures(Experiment exp) { + if (exp == Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS) { + return true; + } + + if (exp == Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE) { + return true; + } + + return false; +} + +bool IsCompilerExperiment(Experiment exp) { +#ifdef NPX_COMPILER_ENABLED_EXPERIMENT + return exp == Experiment::NPX_COMPILER_EXPERIMENT; +#else + return false; +#endif +} + bool LookupExperimentID(absl::string_view label, Experiment* exp) { for (auto config : experiments) { if (config.name == label) { @@ -45,16 +84,20 @@ bool LookupExperimentID(absl::string_view label, Experiment* exp) { } const bool* GetSelectedExperiments() { - static bool by_id[kNumExperiments]; + ABSL_CONST_INIT static bool by_id[kNumExperiments]; + ABSL_CONST_INIT static absl::once_flag flag; - static const bool* status = [&]() { + absl::base_internal::LowLevelCallOnce(&flag, [&]() { + const char* test_target = thread_safe_getenv("TEST_TARGET"); const char* active_experiments = thread_safe_getenv(kExperiments); const char* disabled_experiments = thread_safe_getenv(kDisableExperiments); - return SelectExperiments(by_id, - active_experiments ? active_experiments : "", - disabled_experiments ? disabled_experiments : ""); - }(); - return status; + SelectExperiments( + by_id, test_target ? test_target : "", + active_experiments ? active_experiments : "", + disabled_experiments ? disabled_experiments : "", + active_experiments == nullptr && disabled_experiments == nullptr); + }); + return by_id; } template @@ -77,8 +120,9 @@ void ParseExperiments(absl::string_view labels, F f) { } // namespace -const bool* SelectExperiments(bool* buffer, absl::string_view active, - absl::string_view disabled) { +const bool* SelectExperiments(bool* buffer, absl::string_view test_target, + absl::string_view active, + absl::string_view disabled, bool unset) { memset(buffer, 0, sizeof(*buffer) * kNumExperiments); if (active == kEnableAll) { @@ -92,70 +136,114 @@ const bool* SelectExperiments(bool* buffer, absl::string_view active, } }); + // The compiler experiments should be env variable independent. +#ifdef NPX_COMPILER_ENABLED_EXPERIMENT + if (!absl::StrContains(active, NPX_COMPILER_ENABLED_EXPERIMENT)) { + Experiment id; + if (LookupExperimentID(NPX_COMPILER_ENABLED_EXPERIMENT, &id)) { + buffer[static_cast(id)] = true; + } + } +#endif + if (disabled == kDisableAll) { - memset(buffer, 0, sizeof(*buffer) * kNumExperiments); + for (auto config : experiments) { + // Exclude compile-time experiments + if (!IsCompilerExperiment(config.id)) { + buffer[static_cast(config.id)] = false; + } + } } + // disable non-compiler experiments ParseExperiments(disabled, [buffer](absl::string_view token) { Experiment id; - if (LookupExperimentID(token, &id)) { + if (LookupExperimentID(token, &id) && !IsCompilerExperiment(id)) { buffer[static_cast(id)] = false; } }); - return buffer; -} - -void PrintExperiments(Printer* printer) { - // Index experiments by their positions in the experiments array, rather than - // by experiment ID. - static bool active[ABSL_ARRAYSIZE(experiments)]; - static const bool* status = []() { - memset(active, 0, sizeof(active)); - const bool* by_id = GetSelectedExperiments(); - - for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) { - const auto& config = experiments[i]; - active[i] = by_id[static_cast(config.id)]; + // Enable some random combination of experiments for tests that don't + // explicitly set any of the experiment env vars. This allows to get better + // test coverage of experiments before production. + // Tests can opt out by exporting BORG_EXPERIMENTS="". + // Enabled experiments are selected based on the stable test target name hash, + // this allows get a wide range of experiment permutations on a large test + // base, but at the same time avoids flaky test failures (if a particular + // test fails only with a particular experiment combination). + // It would be nice to print what experiments we enable, but printing even + // to stderr breaks some tests that capture subprocess output. + if (unset && !test_target.empty()) { + TC_CHECK(active.empty() && disabled.empty()); + uint64_t seed = + static_cast(absl::base_internal::CycleClock::Now()); + const size_t target_hash = absl::HashOf(test_target, seed); + constexpr size_t kVanillaOneOf = 11; + constexpr size_t kEnableOneOf = 3; + if ((target_hash % kVanillaOneOf) == 0) { + return buffer; } - return active; - }(); - - printer->printf("MALLOC EXPERIMENTS:"); - for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) { - const char* value = status[i] ? "1" : "0"; - printer->printf(" %s=%s", experiments[i].name, value); + int num_enabled_experiments = 0; + Experiment experiment_id = Experiment::kMaxExperimentID; + for (auto config : experiments) { + if (IsCompilerExperiment(config.id) || + HasBrittleTestFailures(config.id)) { + continue; + } + TC_CHECK(!buffer[static_cast(config.id)]); + experiment_id = config.id; + + // Enabling is specifically based on the experiment name so that it's + // stable when experiments are added/removed. + bool enabled = + ((target_hash ^ absl::HashOf(config.name)) % kEnableOneOf) == 0; + buffer[static_cast(config.id)] = enabled; + num_enabled_experiments += enabled; + } + // In case the hash-based selection above did not work out, select the last + // experiment. + if (num_enabled_experiments == 0 && + experiment_id != Experiment::kMaxExperimentID) { + TC_CHECK(!buffer[static_cast(experiment_id)]); + buffer[static_cast(experiment_id)] = true; + } } - printer->printf("\n"); -} - -void FillExperimentProperties( - std::map* result) { - for (const auto& config : experiments) { - (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value = - IsExperimentActive(config.id) ? 1 : 0; - } + return buffer; } } // namespace tcmalloc_internal bool IsExperimentActive(Experiment exp) { - ASSERT(static_cast(exp) >= 0); - ASSERT(exp < Experiment::kMaxExperimentID); + TC_ASSERT_GE(static_cast(exp), 0); + TC_ASSERT_LT(exp, Experiment::kMaxExperimentID); return tcmalloc_internal::GetSelectedExperiments()[static_cast(exp)]; } -absl::optional FindExperimentByName(absl::string_view name) { +std::optional FindExperimentByName(absl::string_view name) { for (const auto& config : experiments) { if (name == config.name) { return config.id; } } - return absl::nullopt; + return std::nullopt; +} + +void WalkExperiments( + absl::FunctionRef callback) { + for (const auto& config : experiments) { + callback(config.name, IsExperimentActive(config.id)); + } +} + +extern "C" void MallocExtension_Internal_GetExperiments( + std::map* result) { + WalkExperiments([&](absl::string_view name, bool active) { + (*result)[absl::StrCat("tcmalloc.experiment.", name)].value = active; + }); } } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.h b/contrib/libs/tcmalloc/tcmalloc/experiment.h index 90b3049df117..75ee9d64f2ce 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment.h +++ b/contrib/libs/tcmalloc/tcmalloc/experiment.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,14 +18,13 @@ #include -#include -#include +#include +#include "absl/functional/function_ref.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tcmalloc/experiment_config.h" -#include "tcmalloc/internal/logging.h" -#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/internal/config.h" // TCMalloc Experiment Controller // @@ -51,19 +51,18 @@ constexpr size_t kNumExperiments = // buffer must be sized for kMaxExperimentID entries. // // This is exposed for testing purposes only. -const bool* SelectExperiments(bool* buffer, absl::string_view active, - absl::string_view disabled); - -void FillExperimentProperties( - std::map* result); - -void PrintExperiments(Printer* printer); +const bool* SelectExperiments(bool* buffer, absl::string_view test_target, + absl::string_view active, + absl::string_view disabled, bool unset); } // namespace tcmalloc_internal bool IsExperimentActive(Experiment exp); -absl::optional FindExperimentByName(absl::string_view name); +std::optional FindExperimentByName(absl::string_view name); + +void WalkExperiments( + absl::FunctionRef callback); } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h index 294c0374e415..382b3388061a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h +++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,13 +22,18 @@ namespace tcmalloc { enum class Experiment : int { - TCMALLOC_TEMERAIRE, - TCMALLOC_SANS_56_SIZECLASS, + // clang-format off + // go/keep-sorted start + TCMALLOC_L3_AWARE_VCPUS, // TODO(b/239977380): Complete experiment. + TCMALLOC_MIN_HOT_ACCESS_HINT_ABLATION, // TODO(b/376902157): Complete experiment. + TEST_ONLY_L3_AWARE, // TODO(b/239977380): Complete experiment. + TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED, // TODO(b/348043731): Complete experiment. + TEST_ONLY_TCMALLOC_HUGE_CACHE_RELEASE_30S, // TODO(b/319872040): Complete experiment. TEST_ONLY_TCMALLOC_POW2_SIZECLASS, - TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, - TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, + // go/keep-sorted end kMaxExperimentID, + // clang-format on }; struct ExperimentConfig { @@ -37,12 +43,15 @@ struct ExperimentConfig { // clang-format off inline constexpr ExperimentConfig experiments[] = { - {Experiment::TCMALLOC_TEMERAIRE, "TCMALLOC_TEMERAIRE"}, - {Experiment::TCMALLOC_SANS_56_SIZECLASS, "TCMALLOC_SANS_56_SIZECLASS"}, + // go/keep-sorted start + {Experiment::TCMALLOC_L3_AWARE_VCPUS, "TCMALLOC_L3_AWARE_VCPUS"}, + {Experiment::TCMALLOC_MIN_HOT_ACCESS_HINT_ABLATION, "TCMALLOC_MIN_HOT_ACCESS_HINT_ABLATION"}, + {Experiment::TEST_ONLY_L3_AWARE, "TEST_ONLY_L3_AWARE"}, + {Experiment::TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED, "TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED"}, + {Experiment::TEST_ONLY_TCMALLOC_HUGE_CACHE_RELEASE_30S, "TEST_ONLY_TCMALLOC_HUGE_CACHE_RELEASE_30S"}, {Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_SIZECLASS"}, - {Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS"}, - {Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE"}, {Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE"}, + // go/keep-sorted end }; // clang-format on diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc index 2a7afe9b85a9..87fa5c26a07a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc @@ -16,23 +16,26 @@ #include #include +#include "fuzztest/fuzztest.h" #include "absl/strings/string_view.h" #include "tcmalloc/experiment.h" -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { - const char* data = reinterpret_cast(d); +namespace tcmalloc::tcmalloc_internal { +namespace { - bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments]; - absl::string_view active, disabled; - - const char* split = static_cast(memchr(data, ';', size)); - if (split == nullptr) { - active = absl::string_view(data, size); - } else { - active = absl::string_view(data, split - data); - disabled = absl::string_view(split + 1, size - (split - data + 1)); +void FuzzSelectExperiments(absl::string_view test_target, + absl::string_view active, absl::string_view disabled, + bool unset) { + if (unset && !test_target.empty() && (!active.empty() || !disabled.empty())) { + return; } - tcmalloc::tcmalloc_internal::SelectExperiments(buffer, active, disabled); - return 0; + bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments]; + + SelectExperiments(buffer, test_target, active, disabled, unset); } + +FUZZ_TEST(ExperimentTest, FuzzSelectExperiments); + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc deleted file mode 100644 index c582cdb9baf5..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc +++ /dev/null @@ -1,706 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/common.h" - -namespace tcmalloc { - -// is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. - -// clang-format off -#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 86; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 24, 1, 32}, // 0.68% - { 32, 1, 32}, // 0.59% - { 40, 1, 32}, // 0.98% - { 48, 1, 32}, // 0.98% - { 64, 1, 32}, // 0.59% - { 72, 1, 32}, // 1.28% - { 80, 1, 32}, // 0.98% - { 88, 1, 32}, // 0.68% - { 96, 1, 32}, // 0.98% - { 104, 1, 32}, // 1.58% - { 112, 1, 32}, // 0.78% - { 120, 1, 32}, // 0.98% - { 128, 1, 32}, // 0.59% - { 136, 1, 32}, // 0.98% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 184, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 288, 1, 32}, // 2.18% - { 312, 1, 32}, // 1.58% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 384, 1, 32}, // 2.18% - { 408, 1, 32}, // 0.98% - { 424, 1, 32}, // 2.28% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 2, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 2, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 98304, 12, 2}, // 0.05% - { 106496, 13, 2}, // 0.05% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 229376, 28, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 78; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 24, 1, 32}, // 0.17% - { 32, 1, 32}, // 0.15% - { 40, 1, 32}, // 0.17% - { 48, 1, 32}, // 0.24% - { 64, 1, 32}, // 0.15% - { 72, 1, 32}, // 0.17% - { 80, 1, 32}, // 0.29% - { 88, 1, 32}, // 0.24% - { 96, 1, 32}, // 0.24% - { 104, 1, 32}, // 0.17% - { 112, 1, 32}, // 0.34% - { 120, 1, 32}, // 0.17% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 280, 1, 32}, // 0.17% - { 304, 1, 32}, // 0.89% - { 336, 1, 32}, // 0.69% - { 368, 1, 32}, // 0.20% - { 416, 1, 32}, // 1.13% - { 456, 1, 32}, // 1.36% - { 488, 1, 32}, // 0.37% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 768, 1, 32}, // 1.74% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1408, 1, 32}, // 1.33% - { 1664, 1, 32}, // 3.80% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2432, 1, 26}, // 3.80% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13056, 2, 5}, // 0.47% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 89; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 24, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 40, 1, 32}, // 0.03% - { 48, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 72, 1, 32}, // 0.04% - { 80, 1, 32}, // 0.04% - { 88, 1, 32}, // 0.05% - { 96, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 216, 1, 32}, // 0.07% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 288, 1, 32}, // 0.04% - { 312, 1, 32}, // 0.04% - { 344, 1, 32}, // 0.02% - { 360, 1, 32}, // 0.04% - { 416, 1, 32}, // 0.04% - { 464, 1, 32}, // 0.19% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1408, 1, 32}, // 0.12% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2688, 1, 24}, // 0.56% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4608, 1, 14}, // 1.61% - { 5120, 1, 12}, // 0.41% - { 5504, 1, 11}, // 1.35% - { 5760, 1, 11}, // 1.15% - { 6144, 1, 10}, // 1.61% - { 6656, 1, 9}, // 1.00% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 9344, 1, 7}, // 0.21% - { 9984, 1, 6}, // 1.00% - { 10880, 1, 6}, // 0.41% - { 11904, 1, 5}, // 0.12% - { 13056, 1, 5}, // 0.41% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 17408, 1, 3}, // 0.41% - { 20096, 1, 3}, // 0.36% - { 21760, 1, 3}, // 0.41% - { 23808, 1, 2}, // 0.12% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 45568, 2, 2}, // 4.61% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 196608, 3, 2}, // 0.01% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 46; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 24, 1, 32}, // 1.57% - { 32, 1, 32}, // 1.17% - { 40, 1, 32}, // 1.57% - { 48, 1, 32}, // 1.57% - { 64, 1, 32}, // 1.17% - { 72, 1, 32}, // 2.78% - { 80, 1, 32}, // 1.57% - { 88, 1, 32}, // 2.37% - { 96, 1, 32}, // 2.78% - { 104, 1, 32}, // 2.17% - { 112, 1, 32}, // 2.78% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 176, 1, 32}, // 2.37% - { 192, 1, 32}, // 2.78% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 312, 1, 32}, // 2.17% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 408, 1, 32}, // 1.57% - { 448, 1, 32}, // 2.78% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 640, 2, 32}, // 7.29% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3200, 4, 20}, // 2.70% - { 4096, 4, 16}, // 0.29% - { 4736, 5, 13}, // 8.36% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#else -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 86; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 48, 1, 32}, // 0.98% - { 64, 1, 32}, // 0.59% - { 80, 1, 32}, // 0.98% - { 96, 1, 32}, // 0.98% - { 112, 1, 32}, // 0.78% - { 128, 1, 32}, // 0.59% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 288, 1, 32}, // 2.18% - { 304, 1, 32}, // 4.25% - { 320, 1, 32}, // 3.00% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 384, 1, 32}, // 2.18% - { 400, 1, 32}, // 3.00% - { 416, 1, 32}, // 4.25% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 2, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 6784, 5, 9}, // 0.75% - { 7168, 7, 9}, // 0.08% - { 8192, 2, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 90112, 11, 2}, // 0.05% - { 98304, 12, 2}, // 0.05% - { 106496, 13, 2}, // 0.05% - { 122880, 15, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 139264, 17, 2}, // 0.03% - { 155648, 19, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 221184, 27, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 78; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 48, 1, 32}, // 0.24% - { 64, 1, 32}, // 0.15% - { 80, 1, 32}, // 0.29% - { 96, 1, 32}, // 0.24% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 272, 1, 32}, // 0.54% - { 288, 1, 32}, // 0.84% - { 304, 1, 32}, // 0.89% - { 336, 1, 32}, // 0.69% - { 368, 1, 32}, // 0.20% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 480, 1, 32}, // 0.54% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 768, 1, 32}, // 1.74% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1408, 1, 32}, // 1.33% - { 1536, 1, 32}, // 1.74% - { 1664, 1, 32}, // 3.80% - { 1920, 1, 32}, // 0.54% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2432, 1, 26}, // 3.80% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 5632, 2, 11}, // 5.86% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13056, 2, 5}, // 0.47% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 89; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 48, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 80, 1, 32}, // 0.04% - { 96, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 224, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 288, 1, 32}, // 0.04% - { 320, 1, 32}, // 0.04% - { 352, 1, 32}, // 0.12% - { 368, 1, 32}, // 0.07% - { 416, 1, 32}, // 0.04% - { 464, 1, 32}, // 0.19% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1408, 1, 32}, // 0.12% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2688, 1, 24}, // 0.56% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4608, 1, 14}, // 1.61% - { 5120, 1, 12}, // 0.41% - { 5504, 1, 11}, // 1.35% - { 5760, 1, 11}, // 1.15% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 7040, 1, 9}, // 0.66% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 9984, 1, 6}, // 1.00% - { 10880, 1, 6}, // 0.41% - { 11904, 1, 5}, // 0.12% - { 13056, 1, 5}, // 0.41% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 17408, 1, 3}, // 0.41% - { 20096, 1, 3}, // 0.36% - { 21760, 1, 3}, // 0.41% - { 23808, 1, 2}, // 0.12% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 45568, 2, 2}, // 4.61% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 196608, 3, 2}, // 0.01% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 46; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalSizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 48, 1, 32}, // 1.57% - { 64, 1, 32}, // 1.17% - { 80, 1, 32}, // 1.57% - { 96, 1, 32}, // 2.78% - { 112, 1, 32}, // 2.78% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 176, 1, 32}, // 2.37% - { 192, 1, 32}, // 2.78% - { 208, 1, 32}, // 4.86% - { 224, 1, 32}, // 2.78% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 288, 1, 32}, // 2.78% - { 304, 1, 32}, // 4.86% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 400, 1, 32}, // 3.60% - { 448, 1, 32}, // 2.78% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 640, 2, 32}, // 7.29% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3200, 4, 20}, // 2.70% - { 3584, 7, 18}, // 0.17% - { 4096, 4, 16}, // 0.29% - { 4736, 5, 13}, // 8.36% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#endif -// clang-format on - -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc deleted file mode 100755 index c6769f450ed9..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc +++ /dev/null @@ -1,679 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/common.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { - -namespace tcmalloc_internal { - -// is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. - -// clang-format off -#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 82; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 72, 1, 32}, // 1.28% - { 80, 1, 32}, // 0.98% - { 88, 1, 32}, // 0.68% - { 96, 1, 32}, // 0.98% - { 104, 1, 32}, // 1.58% - { 112, 1, 32}, // 0.78% - { 120, 1, 32}, // 0.98% - { 128, 1, 32}, // 0.59% - { 136, 1, 32}, // 0.98% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 296, 1, 32}, // 3.10% - { 312, 1, 32}, // 1.58% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 408, 1, 32}, // 0.98% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 1, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 1, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 98304, 12, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 74; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 72, 1, 32}, // 0.17% - { 80, 1, 32}, // 0.29% - { 88, 1, 32}, // 0.24% - { 96, 1, 32}, // 0.24% - { 104, 1, 32}, // 0.17% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 280, 1, 32}, // 0.17% - { 304, 1, 32}, // 0.89% - { 328, 1, 32}, // 1.06% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 488, 1, 32}, // 0.37% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 85; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 72, 1, 32}, // 0.04% - { 80, 1, 32}, // 0.04% - { 88, 1, 32}, // 0.05% - { 96, 1, 32}, // 0.04% - { 104, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 360, 1, 32}, // 0.04% - { 408, 1, 32}, // 0.10% - { 456, 1, 32}, // 0.17% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10880, 1, 6}, // 0.41% - { 11904, 1, 5}, // 0.12% - { 13056, 1, 5}, // 0.41% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 18688, 1, 3}, // 0.21% - { 21760, 1, 3}, // 0.41% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 42; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 72, 1, 32}, // 2.78% - { 80, 1, 32}, // 1.57% - { 88, 1, 32}, // 2.37% - { 96, 1, 32}, // 2.78% - { 104, 1, 32}, // 2.17% - { 120, 1, 32}, // 1.57% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 184, 1, 32}, // 2.37% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 312, 1, 32}, // 2.17% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 408, 1, 32}, // 1.57% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#else -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 82; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 80, 1, 32}, // 0.98% - { 96, 1, 32}, // 0.98% - { 112, 1, 32}, // 0.78% - { 128, 1, 32}, // 0.59% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 288, 1, 32}, // 2.18% - { 304, 1, 32}, // 4.25% - { 320, 1, 32}, // 3.00% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 384, 1, 32}, // 2.18% - { 400, 1, 32}, // 3.00% - { 416, 1, 32}, // 4.25% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 1, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 1, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 90112, 11, 2}, // 0.05% - { 98304, 12, 2}, // 0.05% - { 106496, 13, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 74; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 80, 1, 32}, // 0.29% - { 96, 1, 32}, // 0.24% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 272, 1, 32}, // 0.54% - { 288, 1, 32}, // 0.84% - { 304, 1, 32}, // 0.89% - { 320, 1, 32}, // 0.54% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 480, 1, 32}, // 0.54% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 768, 1, 32}, // 1.74% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1408, 1, 32}, // 1.33% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 85; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 80, 1, 32}, // 0.04% - { 96, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 368, 1, 32}, // 0.07% - { 416, 1, 32}, // 0.04% - { 464, 1, 32}, // 0.19% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1408, 1, 32}, // 0.12% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3200, 1, 20}, // 1.15% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10368, 1, 6}, // 1.15% - { 11392, 1, 5}, // 0.07% - { 12416, 1, 5}, // 0.56% - { 13696, 1, 4}, // 0.76% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 18688, 1, 3}, // 0.21% - { 21760, 1, 3}, // 0.41% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 42; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 80, 1, 32}, // 1.57% - { 96, 1, 32}, // 2.78% - { 112, 1, 32}, // 2.78% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 176, 1, 32}, // 2.37% - { 192, 1, 32}, // 2.78% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 304, 1, 32}, // 4.86% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 400, 1, 32}, // 3.60% - { 448, 1, 32}, // 2.78% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 640, 2, 32}, // 7.29% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#endif -// clang-format on - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc old mode 100755 new mode 100644 index 1e6da051cae2..a24e95d9aeb2 --- a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc @@ -12,122 +12,153 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/size_class_info.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { - namespace tcmalloc_internal { -// is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. +// Columns in the following tables: +// - bytes: size of the size class +// - pages: number of pages per span +// - batch: preferred number of objects for transfers between caches +// - class: size class number +// - objs: number of objects per span +// - waste/fixed: fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use +// a single 8KiB page, then we will hold 85 objects per span, and have 32 +// bytes left over. There is also a fixed component of 48 bytes of TCMalloc +// metadata per span. Together, the fixed overhead would be wasted/allocated +// = (32 + 48) / (8192 - 32) ~= 0.98%. +// - waste/sampling: overhead due to heap sampling +// (rounding to page size, proxy object, metadata). +// - inc: increment from the previous size class. This caps the dynamic +// overhead component based on mismatches between the number of bytes +// requested and the number of bytes provided by the size class. Together +// they sum to the total overhead; for instance if you asked for a 50-byte +// allocation that rounds up to a 64-byte size class, the dynamic overhead +// would be 28%, and if waste were 22% it would mean (on average) 25 bytes +// of overhead for allocations of that size. // clang-format off #if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 #if TCMALLOC_PAGE_SHIFT == 13 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 128, 1, 32}, // 0.59% - { 256, 1, 32}, // 0.59% - { 512, 1, 32}, // 0.59% - { 1024, 1, 32}, // 0.59% - { 2048, 2, 32}, // 0.29% - { 4096, 1, 16}, // 0.29% - { 8192, 1, 8}, // 0.29% - { 16384, 2, 4}, // 0.29% - { 32768, 4, 2}, // 0.15% - { 65536, 8, 2}, // 0.07% - { 131072, 16, 2}, // 0.04% - { 262144, 32, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 1024 0.58% 0.42% 0.00% + { 16, 1, 32}, // 1 512 0.58% 0.42% 100.00% + { 32, 1, 32}, // 2 256 0.58% 0.42% 100.00% + { 64, 1, 32}, // 3 128 0.58% 0.42% 100.00% + { 128, 1, 32}, // 4 64 0.58% 0.42% 100.00% + { 256, 1, 32}, // 5 32 0.58% 0.42% 100.00% + { 512, 1, 32}, // 6 16 0.58% 0.42% 100.00% + { 1024, 1, 32}, // 7 8 0.58% 0.42% 100.00% + { 2048, 2, 32}, // 8 8 0.29% 0.42% 100.00% + { 4096, 1, 16}, // 9 2 0.58% 0.43% 100.00% + { 8192, 1, 8}, // 10 1 0.58% 0.03% 100.00% + { 16384, 2, 4}, // 11 1 0.29% 0.03% 100.00% + { 32768, 4, 2}, // 12 1 0.15% 0.03% 100.00% + { 65536, 8, 2}, // 13 1 0.07% 0.03% 100.00% + {131072, 16, 2}, // 14 1 0.04% 0.03% 100.00% + {262144, 32, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 15 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 128, 1, 32}, // 0.15% - { 256, 1, 32}, // 0.15% - { 512, 1, 32}, // 0.15% - { 1024, 1, 32}, // 0.15% - { 2048, 1, 32}, // 0.15% - { 4096, 1, 16}, // 0.15% - { 8192, 1, 8}, // 0.15% - { 16384, 1, 4}, // 0.15% - { 32768, 1, 2}, // 0.15% - { 65536, 2, 2}, // 0.07% - { 131072, 4, 2}, // 0.04% - { 262144, 8, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 4096 0.15% 1.60% 0.00% + { 16, 1, 32}, // 1 2048 0.15% 1.60% 100.00% + { 32, 1, 32}, // 2 1024 0.15% 1.60% 100.00% + { 64, 1, 32}, // 3 512 0.15% 1.60% 100.00% + { 128, 1, 32}, // 4 256 0.15% 1.60% 100.00% + { 256, 1, 32}, // 5 128 0.15% 1.60% 100.00% + { 512, 1, 32}, // 6 64 0.15% 1.60% 100.00% + { 1024, 1, 32}, // 7 32 0.15% 1.60% 100.00% + { 2048, 1, 32}, // 8 16 0.15% 1.60% 100.00% + { 4096, 1, 16}, // 9 8 0.15% 1.60% 100.00% + { 8192, 1, 8}, // 10 4 0.15% 1.60% 100.00% + { 16384, 1, 4}, // 11 2 0.15% 1.60% 100.00% + { 32768, 1, 2}, // 12 1 0.15% 0.03% 100.00% + { 65536, 2, 2}, // 13 1 0.07% 0.03% 100.00% + {131072, 4, 2}, // 14 1 0.04% 0.03% 100.00% + {262144, 8, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 18 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 128, 1, 32}, // 0.02% - { 256, 1, 32}, // 0.02% - { 512, 1, 32}, // 0.02% - { 1024, 1, 32}, // 0.02% - { 2048, 1, 32}, // 0.02% - { 4096, 1, 16}, // 0.02% - { 8192, 1, 8}, // 0.02% - { 16384, 1, 4}, // 0.02% - { 32768, 1, 2}, // 0.02% - { 65536, 1, 2}, // 0.02% - { 131072, 1, 2}, // 0.02% - { 262144, 1, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 32768 0.02% 12.53% 0.00% + { 16, 1, 32}, // 1 16384 0.02% 12.53% 100.00% + { 32, 1, 32}, // 2 8192 0.02% 12.53% 100.00% + { 64, 1, 32}, // 3 4096 0.02% 12.53% 100.00% + { 128, 1, 32}, // 4 2048 0.02% 12.53% 100.00% + { 256, 1, 32}, // 5 1024 0.02% 12.53% 100.00% + { 512, 1, 32}, // 6 512 0.02% 12.53% 100.00% + { 1024, 1, 32}, // 7 256 0.02% 12.53% 100.00% + { 2048, 1, 32}, // 8 128 0.02% 12.53% 100.00% + { 4096, 1, 16}, // 9 64 0.02% 12.53% 100.00% + { 8192, 1, 8}, // 10 32 0.02% 12.53% 100.00% + { 16384, 1, 4}, // 11 16 0.02% 12.53% 100.00% + { 32768, 1, 2}, // 12 8 0.02% 12.53% 100.00% + { 65536, 1, 2}, // 13 4 0.02% 12.53% 100.00% + {131072, 1, 2}, // 14 2 0.02% 12.53% 100.00% + {262144, 1, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 12 static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 12; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 128, 1, 32}, // 1.17% - { 256, 1, 32}, // 1.17% - { 512, 1, 32}, // 1.17% - { 1024, 2, 32}, // 0.59% - { 2048, 4, 32}, // 0.29% - { 4096, 4, 16}, // 0.29% - { 8192, 4, 8}, // 0.29% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = false, + .span_size = 48, + .sampling_interval = 524288, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 512 1.16% 0.92% 0.00% + { 16, 1, 32}, // 1 256 1.16% 0.92% 100.00% + { 32, 1, 32}, // 2 128 1.16% 0.92% 100.00% + { 64, 1, 32}, // 3 64 1.16% 0.92% 100.00% + { 128, 1, 32}, // 4 32 1.16% 0.92% 100.00% + { 256, 1, 32}, // 5 16 1.16% 0.92% 100.00% + { 512, 1, 32}, // 6 8 1.16% 0.92% 100.00% + { 1024, 2, 32}, // 7 8 0.58% 0.92% 100.00% + { 2048, 4, 32}, // 8 8 0.29% 0.92% 100.00% + { 4096, 4, 16}, // 9 4 0.29% 0.92% 100.00% + { 8192, 4, 8}, // 10 2 0.29% 1.70% 100.00% }; #else #error "Unsupported TCMALLOC_PAGE_SHIFT value!" @@ -135,98 +166,118 @@ const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimental #else #if TCMALLOC_PAGE_SHIFT == 13 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 128, 1, 32}, // 0.59% - { 256, 1, 32}, // 0.59% - { 512, 1, 32}, // 0.59% - { 1024, 1, 32}, // 0.59% - { 2048, 2, 32}, // 0.29% - { 4096, 1, 16}, // 0.29% - { 8192, 1, 8}, // 0.29% - { 16384, 2, 4}, // 0.29% - { 32768, 4, 2}, // 0.15% - { 65536, 8, 2}, // 0.07% - { 131072, 16, 2}, // 0.04% - { 262144, 32, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 1024 0.58% 0.42% 0.00% + { 16, 1, 32}, // 1 512 0.58% 0.42% 100.00% + { 32, 1, 32}, // 2 256 0.58% 0.42% 100.00% + { 64, 1, 32}, // 3 128 0.58% 0.42% 100.00% + { 128, 1, 32}, // 4 64 0.58% 0.42% 100.00% + { 256, 1, 32}, // 5 32 0.58% 0.42% 100.00% + { 512, 1, 32}, // 6 16 0.58% 0.42% 100.00% + { 1024, 1, 32}, // 7 8 0.58% 0.42% 100.00% + { 2048, 2, 32}, // 8 8 0.29% 0.42% 100.00% + { 4096, 1, 16}, // 9 2 0.58% 0.43% 100.00% + { 8192, 1, 8}, // 10 1 0.58% 0.03% 100.00% + { 16384, 2, 4}, // 11 1 0.29% 0.03% 100.00% + { 32768, 4, 2}, // 12 1 0.15% 0.03% 100.00% + { 65536, 8, 2}, // 13 1 0.07% 0.03% 100.00% + {131072, 16, 2}, // 14 1 0.04% 0.03% 100.00% + {262144, 32, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 15 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 128, 1, 32}, // 0.15% - { 256, 1, 32}, // 0.15% - { 512, 1, 32}, // 0.15% - { 1024, 1, 32}, // 0.15% - { 2048, 1, 32}, // 0.15% - { 4096, 1, 16}, // 0.15% - { 8192, 1, 8}, // 0.15% - { 16384, 1, 4}, // 0.15% - { 32768, 1, 2}, // 0.15% - { 65536, 2, 2}, // 0.07% - { 131072, 4, 2}, // 0.04% - { 262144, 8, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 4096 0.15% 1.60% 0.00% + { 16, 1, 32}, // 1 2048 0.15% 1.60% 100.00% + { 32, 1, 32}, // 2 1024 0.15% 1.60% 100.00% + { 64, 1, 32}, // 3 512 0.15% 1.60% 100.00% + { 128, 1, 32}, // 4 256 0.15% 1.60% 100.00% + { 256, 1, 32}, // 5 128 0.15% 1.60% 100.00% + { 512, 1, 32}, // 6 64 0.15% 1.60% 100.00% + { 1024, 1, 32}, // 7 32 0.15% 1.60% 100.00% + { 2048, 1, 32}, // 8 16 0.15% 1.60% 100.00% + { 4096, 1, 16}, // 9 8 0.15% 1.60% 100.00% + { 8192, 1, 8}, // 10 4 0.15% 1.60% 100.00% + { 16384, 1, 4}, // 11 2 0.15% 1.60% 100.00% + { 32768, 1, 2}, // 12 1 0.15% 0.03% 100.00% + { 65536, 2, 2}, // 13 1 0.07% 0.03% 100.00% + {131072, 4, 2}, // 14 1 0.04% 0.03% 100.00% + {262144, 8, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 18 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 128, 1, 32}, // 0.02% - { 256, 1, 32}, // 0.02% - { 512, 1, 32}, // 0.02% - { 1024, 1, 32}, // 0.02% - { 2048, 1, 32}, // 0.02% - { 4096, 1, 16}, // 0.02% - { 8192, 1, 8}, // 0.02% - { 16384, 1, 4}, // 0.02% - { 32768, 1, 2}, // 0.02% - { 65536, 1, 2}, // 0.02% - { 131072, 1, 2}, // 0.02% - { 262144, 1, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 32768 0.02% 12.53% 0.00% + { 16, 1, 32}, // 1 16384 0.02% 12.53% 100.00% + { 32, 1, 32}, // 2 8192 0.02% 12.53% 100.00% + { 64, 1, 32}, // 3 4096 0.02% 12.53% 100.00% + { 128, 1, 32}, // 4 2048 0.02% 12.53% 100.00% + { 256, 1, 32}, // 5 1024 0.02% 12.53% 100.00% + { 512, 1, 32}, // 6 512 0.02% 12.53% 100.00% + { 1024, 1, 32}, // 7 256 0.02% 12.53% 100.00% + { 2048, 1, 32}, // 8 128 0.02% 12.53% 100.00% + { 4096, 1, 16}, // 9 64 0.02% 12.53% 100.00% + { 8192, 1, 8}, // 10 32 0.02% 12.53% 100.00% + { 16384, 1, 4}, // 11 16 0.02% 12.53% 100.00% + { 32768, 1, 2}, // 12 8 0.02% 12.53% 100.00% + { 65536, 1, 2}, // 13 4 0.02% 12.53% 100.00% + {131072, 1, 2}, // 14 2 0.02% 12.53% 100.00% + {262144, 1, 2}, // 15 1 0.02% 0.03% 100.00% }; #elif TCMALLOC_PAGE_SHIFT == 12 static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 12; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 128, 1, 32}, // 1.17% - { 256, 1, 32}, // 1.17% - { 512, 1, 32}, // 1.17% - { 1024, 2, 32}, // 0.59% - { 2048, 4, 32}, // 0.29% - { 4096, 4, 16}, // 0.29% - { 8192, 4, 8}, // 0.29% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = false, + .span_size = 48, + .sampling_interval = 524288, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 512 1.16% 0.92% 0.00% + { 16, 1, 32}, // 1 256 1.16% 0.92% 100.00% + { 32, 1, 32}, // 2 128 1.16% 0.92% 100.00% + { 64, 1, 32}, // 3 64 1.16% 0.92% 100.00% + { 128, 1, 32}, // 4 32 1.16% 0.92% 100.00% + { 256, 1, 32}, // 5 16 1.16% 0.92% 100.00% + { 512, 1, 32}, // 6 8 1.16% 0.92% 100.00% + { 1024, 2, 32}, // 7 8 0.58% 0.92% 100.00% + { 2048, 4, 32}, // 8 8 0.29% 0.92% 100.00% + { 4096, 4, 16}, // 9 4 0.29% 0.92% 100.00% + { 8192, 4, 8}, // 10 2 0.29% 1.70% 100.00% }; #else #error "Unsupported TCMALLOC_PAGE_SHIFT value!" @@ -234,6 +285,9 @@ const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimental #endif // clang-format on +static_assert(sizeof(List) / sizeof(List[0]) <= kNumBaseClasses); +extern constexpr SizeClasses kExperimentalPow2SizeClasses{List, Assumptions}; + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/global_stats.cc b/contrib/libs/tcmalloc/tcmalloc/global_stats.cc new file mode 100644 index 000000000000..0d5244429b7f --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/global_stats.cc @@ -0,0 +1,1029 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/global_stats.h" + +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "absl/time/time.h" +#include "absl/types/optional.h" +#include "tcmalloc/central_freelist.h" +#include "tcmalloc/common.h" +#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/guarded_page_allocator.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/memory_stats.h" +#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/metadata_object_allocator.h" +#include "tcmalloc/page_allocator.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/selsan/selsan.h" +#include "tcmalloc/span.h" +#include "tcmalloc/span_stats.h" +#include "tcmalloc/stack_trace_table.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" +#include "tcmalloc/system-alloc.h" +#include "tcmalloc/thread_cache.h" +#include "tcmalloc/transfer_cache.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +using subtle::percpu::RseqVcpuMode; + +static absl::string_view MadviseString() { + MadvisePreference pref = tc_globals.system_allocator().madvise_preference(); + + switch (pref) { + case MadvisePreference::kNever: + return "MADVISE_NEVER"; + case MadvisePreference::kDontNeed: + return "MADVISE_DONTNEED"; + case MadvisePreference::kFreeOnly: + return "MADVISE_FREE_ONLY"; + case MadvisePreference::kFreeAndDontNeed: + return "MADVISE_FREE_AND_DONTNEED"; + } + + ABSL_UNREACHABLE(); +} + +// Get stats into "r". Also, if class_count != NULL, class_count[k] +// will be set to the total number of objects of size class k in the +// central cache, transfer cache, and per-thread and per-CPU caches. +// If small_spans is non-NULL, it is filled. Same for large_spans. +// The boolean report_residence determines whether residence information +// should be captured or not. Residence info requires a potentially +// costly OS call, and is not necessary in all situations. +void ExtractStats(TCMallocStats* r, uint64_t* class_count, + SpanStats* span_stats, SmallSpanStats* small_spans, + LargeSpanStats* large_spans, bool report_residence) { + r->central_bytes = 0; + r->transfer_bytes = 0; + for (int size_class = 0; size_class < kNumClasses; ++size_class) { + const size_t length = tc_globals.central_freelist(size_class).length(); + const size_t tc_length = tc_globals.transfer_cache().tc_length(size_class); + const size_t sharded_tc_length = + tc_globals.sharded_transfer_cache().TotalObjectsOfClass(size_class); + const size_t cache_overhead = + tc_globals.central_freelist(size_class).OverheadBytes(); + const size_t size = tc_globals.sizemap().class_to_size(size_class); + r->central_bytes += (size * length) + cache_overhead; + r->transfer_bytes += (size * tc_length); + if (class_count) { + // Sum the lengths of all per-class freelists, except the per-thread + // freelists, which get counted when we call GetThreadStats(), below. + class_count[size_class] = length + tc_length + sharded_tc_length; + if (UsePerCpuCache(tc_globals)) { + class_count[size_class] += + tc_globals.cpu_cache().TotalObjectsOfClass(size_class); + } + } + if (span_stats) { + span_stats[size_class] = + tc_globals.central_freelist(size_class).GetSpanStats(); + } + } + + // Add stats from per-thread heaps + r->thread_bytes = 0; + + r->span_stats = tc_globals.span_allocator().stats(); + r->stack_stats = tc_globals.sampledallocation_allocator().stats(); + r->linked_sample_stats = tc_globals.linked_sample_allocator().stats(); + r->tc_stats = ThreadCache::GetStats(&r->thread_bytes, class_count); + + { // scope + PageHeapSpinLockHolder l; + r->metadata_bytes = tc_globals.metadata_bytes(); + r->pagemap_bytes = tc_globals.pagemap().bytes(); + r->pageheap = tc_globals.page_allocator().stats(); + r->peak_stats = tc_globals.page_allocator().peak_stats(); + if (small_spans != nullptr) { + tc_globals.page_allocator().GetSmallSpanStats(small_spans); + } + if (large_spans != nullptr) { + tc_globals.page_allocator().GetLargeSpanStats(large_spans); + } + + // TODO(b/207622377): Arena is thread-safe, but we take the pageheap_lock + // to present a consistent view of memory usage. + r->arena = tc_globals.arena().stats(); + if (!report_residence) { + r->metadata_bytes += r->arena.bytes_nonresident; + } + + const PageReleaseStats release_stats = + tc_globals.page_allocator().GetReleaseStats(); + + r->num_released_total = release_stats.total; + r->num_released_release_memory_to_system = + release_stats.release_memory_to_system; + r->num_released_process_background_actions = + release_stats.process_background_actions; + r->num_released_soft_limit_exceeded = release_stats.soft_limit_exceeded; + r->num_released_hard_limit_exceeded = release_stats.hard_limit_exceeded; + + r->per_cpu_bytes = 0; + r->sharded_transfer_bytes = 0; + r->percpu_metadata_bytes_res = 0; + r->percpu_metadata_bytes = 0; + if (UsePerCpuCache(tc_globals)) { + r->per_cpu_bytes = tc_globals.cpu_cache().TotalUsedBytes(); + r->sharded_transfer_bytes = + tc_globals.sharded_transfer_cache().TotalBytes(); + + if (report_residence) { + auto percpu_metadata = tc_globals.cpu_cache().MetadataMemoryUsage(); + r->percpu_metadata_bytes_res = percpu_metadata.resident_size; + r->percpu_metadata_bytes = percpu_metadata.virtual_size; + + TC_ASSERT_GE(r->metadata_bytes, r->percpu_metadata_bytes); + r->metadata_bytes = r->metadata_bytes - r->percpu_metadata_bytes + + r->percpu_metadata_bytes_res; + } + } + } + // We can access the pagemap without holding the pageheap_lock since it + // is static data, and we are only taking address and size which are + // constants. + if (report_residence) { + auto resident_bytes = tc_globals.pagemap_residence(); + r->pagemap_root_bytes_res = resident_bytes; + TC_ASSERT_GE(r->metadata_bytes, r->pagemap_bytes); + r->metadata_bytes = r->metadata_bytes - r->pagemap_bytes + resident_bytes; + } else { + r->pagemap_root_bytes_res = 0; + } +} + +void ExtractTCMallocStats(TCMallocStats* r, bool report_residence) { + ExtractStats(r, nullptr, nullptr, nullptr, nullptr, report_residence); +} + +// Because different fields of stats are computed from state protected +// by different locks, they may be inconsistent. Prevent underflow +// when subtracting to avoid gigantic results. +static uint64_t StatSub(uint64_t a, uint64_t b) { + return (a >= b) ? (a - b) : 0; +} + +// Return approximate number of bytes in use by app. +uint64_t InUseByApp(const TCMallocStats& stats) { + return StatSub(stats.pageheap.system_bytes, + stats.thread_bytes + stats.central_bytes + + stats.transfer_bytes + stats.per_cpu_bytes + + stats.sharded_transfer_bytes + stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); +} + +uint64_t VirtualMemoryUsed(const TCMallocStats& stats) { + return stats.pageheap.system_bytes + stats.metadata_bytes + + stats.arena.bytes_unallocated + stats.arena.bytes_unavailable + + stats.arena.bytes_nonresident; +} + +uint64_t UnmappedBytes(const TCMallocStats& stats) { + return stats.pageheap.unmapped_bytes + stats.arena.bytes_nonresident; +} + +uint64_t PhysicalMemoryUsed(const TCMallocStats& stats) { + return StatSub(VirtualMemoryUsed(stats), UnmappedBytes(stats)); +} + +// The number of bytes either in use by the app or fragmented so that +// it cannot be (arbitrarily) reused. +uint64_t RequiredBytes(const TCMallocStats& stats) { + return StatSub(PhysicalMemoryUsed(stats), stats.pageheap.free_bytes); +} + +size_t ExternalBytes(const TCMallocStats& stats) { + return stats.pageheap.free_bytes + stats.central_bytes + stats.per_cpu_bytes + + stats.sharded_transfer_bytes + stats.transfer_bytes + + stats.thread_bytes + stats.metadata_bytes + + stats.arena.bytes_unavailable + stats.arena.bytes_unallocated; +} + +size_t HeapSizeBytes(const BackingStats& stats) { + return StatSub(stats.system_bytes, stats.unmapped_bytes); +} + +size_t LocalBytes(const TCMallocStats& stats) { + return stats.thread_bytes + stats.per_cpu_bytes + + stats.sharded_transfer_bytes; +} + +size_t SlackBytes(const BackingStats& stats) { + return stats.free_bytes + stats.unmapped_bytes; +} + +static int CountAllowedCpus() { + CpuSet allowed_cpus; + if (!allowed_cpus.GetAffinity(0)) { + return 0; + } + + return allowed_cpus.Count(); +} + +static absl::string_view SizeClassConfigurationString( + SizeClassConfiguration config) { + switch (config) { + case SizeClassConfiguration::kPow2Below64: + return "SIZE_CLASS_POW2_BELOW_64"; + case SizeClassConfiguration::kPow2Only: + return "SIZE_CLASS_POW2_ONLY"; + case SizeClassConfiguration::kLegacy: + // TODO(b/242710633): remove this opt out. + return "SIZE_CLASS_LEGACY"; + case SizeClassConfiguration::kReuse: + return "SIZE_CLASS_REUSE"; + } + + ASSUME(false); + return "SIZE_CLASS_UNKNOWN"; +} + +static absl::string_view PerCpuTypeString(RseqVcpuMode mode) { + switch (mode) { + case RseqVcpuMode::kNone: + return "NONE"; + } + + ASSUME(false); + return "NONE"; +} + +void DumpStats(Printer& out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + SpanStats span_stats[kNumClasses]; + if (level >= 2) { + ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, true); + } else { + ExtractTCMallocStats(&stats, true); + } + + static const double MiB = 1048576.0; + + out.printf( + "See https://github.com/google/tcmalloc/tree/master/docs/stats.md for an explanation of " + "this page\n"); + + const uint64_t virtual_memory_used = VirtualMemoryUsed(stats); + const uint64_t physical_memory_used = PhysicalMemoryUsed(stats); + const uint64_t unmapped_bytes = UnmappedBytes(stats); + const uint64_t bytes_in_use_by_app = InUseByApp(stats); + +#ifdef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + out.printf("NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif + // clang-format off + // Avoid clang-format complaining about the way that this text is laid out. + out.printf( + "------------------------------------------------\n" + "MALLOC: %12u (%7.1f MiB) Bytes in use by application\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in per-CPU cache freelist\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in Sharded cache freelist\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in malloc metadata\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in malloc metadata Arena unallocated\n" + "MALLOC: + %12u (%7.1f MiB) Bytes in malloc metadata Arena unavailable\n" + + "MALLOC: ------------\n" + "MALLOC: = %12u (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12u (%7.1f MiB) Bytes released to OS (aka unmapped)\n" + "MALLOC: ------------\n" + "MALLOC: = %12u (%7.1f MiB) Virtual address space used\n" + "MALLOC:\n" + "MALLOC: %12u Spans in use\n" + "MALLOC: %12u (%7.1f MiB) Spans created\n" + "MALLOC: %12u Thread heaps in use\n" + "MALLOC: %12u (%7.1f MiB) Thread heaps created\n" + "MALLOC: %12u Stack traces in use\n" + "MALLOC: %12u (%7.1f MiB) Stack traces created\n" + "MALLOC: %12u Table buckets in use\n" + "MALLOC: %12u (%7.1f MiB) Table buckets created\n" + "MALLOC: %12u (%7.1f MiB) Pagemap bytes used\n" + "MALLOC: %12u (%7.1f MiB) Pagemap root resident bytes\n" + "MALLOC: %12u (%7.1f MiB) per-CPU slab bytes used\n" + "MALLOC: %12u (%7.1f MiB) per-CPU slab resident bytes\n" + "MALLOC: %12u (%7.1f MiB) malloc metadata Arena non-resident bytes\n" + "MALLOC: %12u (%7.1f MiB) Actual memory used at peak\n" + "MALLOC: %12u (%7.1f MiB) Estimated in-use at peak\n" + "MALLOC: %12.4f Realized fragmentation (%%)\n" + "MALLOC: %12u Tcmalloc page size\n" + "MALLOC: %12u Tcmalloc hugepage size\n" + "MALLOC: %12u CPUs Allowed in Mask\n" + "MALLOC: %12u Arena blocks\n", + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.per_cpu_bytes, stats.per_cpu_bytes / MiB, + stats.sharded_transfer_bytes, stats.sharded_transfer_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + stats.arena.bytes_unallocated, stats.arena.bytes_unallocated / MiB, + stats.arena.bytes_unavailable, stats.arena.bytes_unavailable / MiB, + physical_memory_used, physical_memory_used / MiB, + unmapped_bytes, unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, + uint64_t(stats.span_stats.in_use), + uint64_t(stats.span_stats.total), + (stats.span_stats.total * Span::CalcSizeOf(Parameters::max_span_cache_array_size())) / MiB, + uint64_t(stats.tc_stats.in_use), + uint64_t(stats.tc_stats.total), + (stats.tc_stats.total * sizeof(ThreadCache)) / MiB, + uint64_t(stats.stack_stats.in_use), + uint64_t(stats.stack_stats.total), + (stats.stack_stats.total * sizeof(StackTrace)) / MiB, + uint64_t(stats.linked_sample_stats.in_use), + uint64_t(stats.linked_sample_stats.total), + (stats.linked_sample_stats.total * sizeof(StackTraceTable::LinkedSample)) / MiB, + uint64_t(stats.pagemap_bytes), + stats.pagemap_bytes / MiB, + stats.pagemap_root_bytes_res, stats.pagemap_root_bytes_res / MiB, + uint64_t(stats.percpu_metadata_bytes), + stats.percpu_metadata_bytes / MiB, + stats.percpu_metadata_bytes_res, stats.percpu_metadata_bytes_res / MiB, + stats.arena.bytes_nonresident, stats.arena.bytes_nonresident / MiB, + uint64_t(stats.peak_stats.backed_bytes), + stats.peak_stats.backed_bytes / MiB, + uint64_t(stats.peak_stats.sampled_application_bytes), + stats.peak_stats.sampled_application_bytes / MiB, + 100. * safe_div(stats.peak_stats.backed_bytes - stats.peak_stats.sampled_application_bytes, stats.peak_stats.sampled_application_bytes), + uint64_t(kPageSize), + uint64_t(kHugePageSize), + CountAllowedCpus(), + stats.arena.blocks + ); + // clang-format on + + out.printf("MALLOC EXPERIMENTS:"); + WalkExperiments([&](absl::string_view name, bool active) { + const char* value = active ? "1" : "0"; + out.printf(" %s=%s", name, value); + }); + out.printf("\n"); + + out.printf( + "MALLOC SAMPLED PROFILES: %zu bytes (current), %zu bytes (internal " + "fragmentation), %zu bytes (peak), %zu count (total)\n", + static_cast(tc_globals.sampled_objects_size_.value()), + tc_globals.sampled_internal_fragmentation_.value(), + tc_globals.peak_heap_tracker().CurrentPeakSize(), + tc_globals.total_sampled_count_.value()); + + MemoryStats memstats; + if (GetMemoryStats(&memstats)) { + uint64_t rss = memstats.rss; + uint64_t vss = memstats.vss; + // clang-format off + out.printf( + "\n" + "Total process stats (inclusive of non-malloc sources):\n" + "TOTAL: %12u (%7.1f MiB) Bytes resident (physical memory used)\n" + "TOTAL: %12u (%7.1f MiB) Bytes mapped (virtual memory used)\n", + rss, rss / MiB, vss, vss / MiB); + // clang-format on + } + + out.printf( + "------------------------------------------------\n" + "Call ReleaseMemoryToSystem() to release freelist memory to the OS" + " (via madvise()).\n" + "Bytes released to the OS take up virtual address space" + " but no physical memory.\n"); + if (level >= 2) { + out.printf("------------------------------------------------\n"); + out.printf("Total size of freelists for per-thread and per-CPU caches,\n"); + out.printf("transfer cache, and central cache, as well as number of\n"); + out.printf("live pages, returned/requested spans by size class\n"); + out.printf("------------------------------------------------\n"); + + uint64_t cumulative = 0; + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + uint64_t class_bytes = class_count[size_class] * + tc_globals.sizemap().class_to_size(size_class); + + cumulative += class_bytes; + out.printf( + // clang-format off + "class %3d [ %8zu bytes ] : %8u objs; %5.1f MiB; %6.1f cum MiB; " + "%8u live pages; spans: %10zu ret / %10zu req = %5.4f;\n", + // clang-format on + size_class, tc_globals.sizemap().class_to_size(size_class), + class_count[size_class], class_bytes / MiB, cumulative / MiB, + span_stats[size_class].num_live_spans() * + tc_globals.sizemap().class_to_pages(size_class), + span_stats[size_class].num_spans_returned, + span_stats[size_class].num_spans_requested, + span_stats[size_class].prob_returned()); + } + +#ifndef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + out.printf("------------------------------------------------\n"); + out.printf("Central cache freelist: Span utilization histogram\n"); + out.printf("Non-cumulative number of spans with allocated objects < N\n"); + out.printf("------------------------------------------------\n"); + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + tc_globals.central_freelist(size_class).PrintSpanUtilStats(out); + } + + out.printf("\n"); + out.printf("------------------------------------------------\n"); + out.printf("Central cache freelist: Span lifetime histogram\n"); + out.printf("Non-cumulative number of spans lifetime a < N\n"); + out.printf("------------------------------------------------\n"); + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + tc_globals.central_freelist(size_class).PrintSpanLifetimeStats(out); + } +#endif + + tc_globals.transfer_cache().Print(out); + tc_globals.sharded_transfer_cache().Print(out); + + if (UsePerCpuCache(tc_globals)) { + tc_globals.cpu_cache().Print(out); + } + + tc_globals.page_allocator().Print(out, MemoryTag::kNormal); + if (tc_globals.numa_topology().active_partitions() > 1) { + tc_globals.page_allocator().Print(out, MemoryTag::kNormalP1); + } + tc_globals.page_allocator().Print(out, MemoryTag::kSampled); + tc_globals.page_allocator().Print(out, MemoryTag::kCold); + if (selsan::IsEnabled()) { + tc_globals.page_allocator().Print(out, MemoryTag::kSelSan); + } + tc_globals.guardedpage_allocator().Print(out); + selsan::PrintTextStats(out); + + out.printf("------------------------------------------------\n"); + out.printf("Configured limits and related statistics\n"); + out.printf("------------------------------------------------\n"); + uint64_t soft_limit_bytes = + tc_globals.page_allocator().limit(PageAllocator::kSoft); + uint64_t hard_limit_bytes = + tc_globals.page_allocator().limit(PageAllocator::kHard); + + out.printf("PARAMETER desired_usage_limit_bytes %u\n", soft_limit_bytes); + out.printf("PARAMETER hard_usage_limit_bytes %u\n", hard_limit_bytes); + out.printf("Number of times soft limit was hit: %lld\n", + tc_globals.page_allocator().limit_hits(PageAllocator::kSoft)); + out.printf("Number of times hard limit was hit: %lld\n", + tc_globals.page_allocator().limit_hits(PageAllocator::kHard)); + out.printf("Number of times memory shrank below soft limit: %lld\n", + tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kSoft)); + out.printf("Number of times memory shrank below hard limit: %lld\n", + tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kHard)); + + out.printf("Total number of pages released: %llu (%7.1f MiB)\n", + stats.num_released_total.in_pages().raw_num(), + stats.num_released_total.in_mib()); + out.printf( + "Number of pages released by ReleaseMemoryToSystem: %llu (%7.1f " + "MiB)\n", + stats.num_released_release_memory_to_system.in_pages().raw_num(), + stats.num_released_release_memory_to_system.in_mib()); + out.printf( + "Number of pages released by ProcessBackgroundActions: %llu " + "(%7.1f MiB)\n", + stats.num_released_process_background_actions.in_pages().raw_num(), + stats.num_released_process_background_actions.in_mib()); + out.printf( + "Number of pages released after soft limit hits: %llu (%7.1f " + "MiB)\n", + stats.num_released_soft_limit_exceeded.in_pages().raw_num(), + stats.num_released_soft_limit_exceeded.in_mib()); + out.printf( + "Number of pages released after hard limit hits: %llu (%7.1f " + "MiB)\n", + stats.num_released_hard_limit_exceeded.in_pages().raw_num(), + stats.num_released_hard_limit_exceeded.in_mib()); + + out.printf("------------------------------------------------\n"); + out.printf("Parameters\n"); + out.printf("------------------------------------------------\n"); + out.printf("PARAMETER tcmalloc_per_cpu_caches %d\n", + Parameters::per_cpu_caches() ? 1 : 0); + out.printf("PARAMETER tcmalloc_max_per_cpu_cache_size %d\n", + Parameters::max_per_cpu_cache_size()); + out.printf("PARAMETER tcmalloc_max_total_thread_cache_bytes %lld\n", + Parameters::max_total_thread_cache_bytes()); + out.printf("PARAMETER malloc_release_bytes_per_sec %llu\n", + Parameters::background_release_rate()); + out.printf("PARAMETER tcmalloc_skip_subrelease_short_interval %s\n", + absl::FormatDuration( + Parameters::filler_skip_subrelease_short_interval())); + out.printf("PARAMETER tcmalloc_skip_subrelease_long_interval %s\n", + absl::FormatDuration( + Parameters::filler_skip_subrelease_long_interval())); + out.printf("PARAMETER tcmalloc_cache_demand_release_short_interval %s\n", + absl::FormatDuration( + Parameters::cache_demand_release_short_interval())); + out.printf( + "PARAMETER tcmalloc_cache_demand_release_long_interval %s\n", + absl::FormatDuration(Parameters::cache_demand_release_long_interval())); + out.printf("PARAMETER tcmalloc_release_partial_alloc_pages %d\n", + Parameters::release_partial_alloc_pages() ? 1 : 0); + out.printf("PARAMETER tcmalloc_huge_cache_demand_based_release %d\n", + Parameters::huge_cache_demand_based_release() ? 1 : 0); + out.printf("PARAMETER tcmalloc_huge_region_demand_based_release %d\n", + Parameters::huge_region_demand_based_release() ? 1 : 0); + out.printf("PARAMETER tcmalloc_release_pages_from_huge_region %d\n", + Parameters::release_pages_from_huge_region() ? 1 : 0); + out.printf("PARAMETER tcmalloc_use_wider_slabs %d\n", + tc_globals.cpu_cache().UseWiderSlabs() ? 1 : 0); + + out.printf( + "PARAMETER size_class_config %s\n", + SizeClassConfigurationString(tc_globals.size_class_configuration())); + out.printf("PARAMETER percpu_vcpu_type %s\n", + PerCpuTypeString(subtle::percpu::GetRseqVcpuMode())); + out.printf("PARAMETER max_span_cache_size %d\n", + Parameters::max_span_cache_size()); + out.printf("PARAMETER max_span_cache_array_size %d\n", + Parameters::max_span_cache_array_size()); + out.printf("PARAMETER madvise %s\n", MadviseString()); + out.printf("PARAMETER tcmalloc_resize_size_class_max_capacity %d\n", + Parameters::resize_size_class_max_capacity() ? 1 : 0); + out.printf( + "PARAMETER tcmalloc_dense_trackers_sorted_on_spans_allocated %d\n", + Parameters::dense_trackers_sorted_on_spans_allocated() ? 1 : 0); + out.printf("PARAMETER min_hot_access_hint %d\n", + static_cast(Parameters::min_hot_access_hint())); + } +} + +void DumpStatsInPbtxt(Printer& out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + SpanStats span_stats[kNumClasses]; + if (level >= 2) { + ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, true); + } else { + ExtractTCMallocStats(&stats, true); + } + + const uint64_t bytes_in_use_by_app = InUseByApp(stats); + const uint64_t virtual_memory_used = VirtualMemoryUsed(stats); + const uint64_t physical_memory_used = PhysicalMemoryUsed(stats); + const uint64_t unmapped_bytes = UnmappedBytes(stats); + + PbtxtRegion region(out, kTop); + region.PrintI64("in_use_by_app", bytes_in_use_by_app); + region.PrintI64("page_heap_freelist", stats.pageheap.free_bytes); + region.PrintI64("central_cache_freelist", stats.central_bytes); + region.PrintI64("per_cpu_cache_freelist", stats.per_cpu_bytes); + region.PrintI64("sharded_transfer_cache_freelist", + stats.sharded_transfer_bytes); + region.PrintI64("transfer_cache_freelist", stats.transfer_bytes); + region.PrintI64("thread_cache_freelists", stats.thread_bytes); + region.PrintI64("malloc_metadata", stats.metadata_bytes); + region.PrintI64("malloc_metadata_arena_unavailable", + stats.arena.bytes_unavailable); + region.PrintI64("malloc_metadata_arena_unallocated", + stats.arena.bytes_unallocated); + region.PrintI64("actual_mem_used", physical_memory_used); + region.PrintI64("unmapped", unmapped_bytes); + region.PrintI64("virtual_address_space_used", virtual_memory_used); + region.PrintI64("num_spans", uint64_t(stats.span_stats.in_use)); + region.PrintI64("num_spans_created", uint64_t(stats.span_stats.total)); + region.PrintI64("num_thread_heaps", uint64_t(stats.tc_stats.in_use)); + region.PrintI64("num_thread_heaps_created", uint64_t(stats.tc_stats.total)); + region.PrintI64("num_stack_traces", uint64_t(stats.stack_stats.in_use)); + region.PrintI64("num_stack_traces_created", + uint64_t(stats.stack_stats.total)); + region.PrintI64("num_table_buckets", + uint64_t(stats.linked_sample_stats.in_use)); + region.PrintI64("num_table_buckets_created", + uint64_t(stats.linked_sample_stats.total)); + region.PrintI64("pagemap_size", uint64_t(stats.pagemap_bytes)); + region.PrintI64("pagemap_root_residence", stats.pagemap_root_bytes_res); + region.PrintI64("percpu_slab_size", stats.percpu_metadata_bytes); + region.PrintI64("percpu_slab_residence", stats.percpu_metadata_bytes_res); + region.PrintI64("peak_backed", stats.peak_stats.backed_bytes); + region.PrintI64("peak_application_demand", + stats.peak_stats.sampled_application_bytes); + region.PrintI64("tcmalloc_page_size", uint64_t(kPageSize)); + region.PrintI64("tcmalloc_huge_page_size", uint64_t(kHugePageSize)); + region.PrintI64("cpus_allowed", CountAllowedCpus()); + region.PrintI64("arena_blocks", stats.arena.blocks); + + { + auto sampled_profiles = region.CreateSubRegion("sampled_profiles"); + sampled_profiles.PrintI64("current_bytes", + tc_globals.sampled_objects_size_.value()); + sampled_profiles.PrintI64( + "current_fragmentation_bytes", + tc_globals.sampled_internal_fragmentation_.value()); + sampled_profiles.PrintI64("peak_bytes", + tc_globals.peak_heap_tracker().CurrentPeakSize()); + } + + // Print total process stats (inclusive of non-malloc sources). + MemoryStats memstats; + if (GetMemoryStats(&memstats)) { + region.PrintI64("total_resident", uint64_t(memstats.rss)); + region.PrintI64("total_mapped", uint64_t(memstats.vss)); + } + + region.PrintI64("total_sampled_count", + tc_globals.total_sampled_count_.value()); + + if (level >= 2) { + { +#ifndef TCMALLOC_INTERNAL_SMALL_BUT_SLOW + for (int size_class = 1; size_class < kNumClasses; ++size_class) { + uint64_t class_bytes = class_count[size_class] * + tc_globals.sizemap().class_to_size(size_class); + PbtxtRegion entry = region.CreateSubRegion("freelist"); + entry.PrintI64("sizeclass", + tc_globals.sizemap().class_to_size(size_class)); + entry.PrintI64("bytes", class_bytes); + entry.PrintI64("num_spans_requested", + span_stats[size_class].num_spans_requested); + entry.PrintI64("num_spans_returned", + span_stats[size_class].num_spans_returned); + entry.PrintI64("obj_capacity", span_stats[size_class].obj_capacity); + tc_globals.central_freelist(size_class) + .PrintSpanUtilStatsInPbtxt(entry); + tc_globals.central_freelist(size_class) + .PrintSpanLifetimeStatsInPbtxt(entry); + } +#endif + } + + tc_globals.transfer_cache().PrintInPbtxt(region); + tc_globals.sharded_transfer_cache().PrintInPbtxt(region); + + if (UsePerCpuCache(tc_globals)) { + tc_globals.cpu_cache().PrintInPbtxt(region); + } + } + tc_globals.page_allocator().PrintInPbtxt(region, MemoryTag::kNormal); + if (tc_globals.numa_topology().active_partitions() > 1) { + tc_globals.page_allocator().PrintInPbtxt(region, MemoryTag::kNormalP1); + } + tc_globals.page_allocator().PrintInPbtxt(region, MemoryTag::kSampled); + tc_globals.page_allocator().PrintInPbtxt(region, MemoryTag::kCold); + if (selsan::IsEnabled()) { + tc_globals.page_allocator().PrintInPbtxt(region, MemoryTag::kSelSan); + } + // We do not collect tracking information in pbtxt. + + size_t soft_limit_bytes = + tc_globals.page_allocator().limit(PageAllocator::kSoft); + size_t hard_limit_bytes = + tc_globals.page_allocator().limit(PageAllocator::kHard); + + region.PrintI64("desired_usage_limit_bytes", soft_limit_bytes); + region.PrintI64("hard_usage_limit_bytes", hard_limit_bytes); + region.PrintI64("soft_limit_hits", + tc_globals.page_allocator().limit_hits(PageAllocator::kSoft)); + region.PrintI64("hard_limit_hits", + tc_globals.page_allocator().limit_hits(PageAllocator::kHard)); + region.PrintI64( + "successful_shrinks_after_soft_limit_hit", + tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kSoft)); + region.PrintI64( + "successful_shrinks_after_hard_limit_hit", + tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kHard)); + + region.PrintI64("num_released_total_pages", + stats.num_released_total.in_pages().raw_num()); + region.PrintI64( + "num_released_release_memory_to_system_pages", + stats.num_released_release_memory_to_system.in_pages().raw_num()); + region.PrintI64( + "num_released_process_background_actions_pages", + stats.num_released_process_background_actions.in_pages().raw_num()); + region.PrintI64("num_released_soft_limit_exceeded_pages", + stats.num_released_soft_limit_exceeded.in_pages().raw_num()); + region.PrintI64("num_released_hard_limit_exceeded_pages", + stats.num_released_hard_limit_exceeded.in_pages().raw_num()); + + { + auto gwp_asan = region.CreateSubRegion("gwp_asan"); + tc_globals.guardedpage_allocator().PrintInPbtxt(gwp_asan); + } + selsan::PrintPbtxtStats(region); + + region.PrintI64("memory_release_failures", + tc_globals.system_allocator().release_errors()); + + region.PrintBool("tcmalloc_per_cpu_caches", Parameters::per_cpu_caches()); + region.PrintI64("tcmalloc_max_per_cpu_cache_size", + Parameters::max_per_cpu_cache_size()); + region.PrintI64("tcmalloc_max_total_thread_cache_bytes", + Parameters::max_total_thread_cache_bytes()); + region.PrintI64("malloc_release_bytes_per_sec", + static_cast(Parameters::background_release_rate())); + region.PrintI64("tcmalloc_skip_subrelease_short_interval_ns", + absl::ToInt64Nanoseconds( + Parameters::filler_skip_subrelease_short_interval())); + region.PrintI64("tcmalloc_skip_subrelease_long_interval_ns", + absl::ToInt64Nanoseconds( + Parameters::filler_skip_subrelease_long_interval())); + region.PrintI64("tcmalloc_cache_demand_release_short_interval_ns", + absl::ToInt64Nanoseconds( + Parameters::cache_demand_release_short_interval())); + region.PrintI64("tcmalloc_cache_demand_release_long_interval_ns", + absl::ToInt64Nanoseconds( + Parameters::cache_demand_release_long_interval())); + region.PrintBool("tcmalloc_release_partial_alloc_pages", + Parameters::release_partial_alloc_pages()); + region.PrintBool("tcmalloc_huge_cache_demand_based_release", + Parameters::huge_cache_demand_based_release()); + region.PrintBool("tcmalloc_huge_region_demand_based_release", + Parameters::huge_region_demand_based_release()); + region.PrintBool("tcmalloc_release_pages_from_huge_region", + Parameters::release_pages_from_huge_region()); + region.PrintI64("profile_sampling_interval", + Parameters::profile_sampling_interval()); + region.PrintRaw("percpu_vcpu_type", + PerCpuTypeString(subtle::percpu::GetRseqVcpuMode())); + region.PrintBool("tcmalloc_use_wider_slabs", + tc_globals.cpu_cache().UseWiderSlabs()); + region.PrintI64("span_max_cache_size", Parameters::max_span_cache_size()); + region.PrintI64("span_max_cache_array_size", + Parameters::max_span_cache_array_size()); + region.PrintBool("tcmalloc_dense_trackers_sorted_on_spans_allocated", + Parameters::dense_trackers_sorted_on_spans_allocated()); + region.PrintI64("min_hot_access_hint", + static_cast(Parameters::min_hot_access_hint())); + + region.PrintRaw( + "size_class_config", + SizeClassConfigurationString(tc_globals.size_class_configuration())); + region.PrintRaw("madvise", MadviseString()); + region.PrintBool("tcmalloc_resize_size_class_max_capacity", + Parameters::resize_size_class_max_capacity()); +} + +bool GetNumericProperty(const char* name_data, size_t name_size, + size_t* value) { + // LINT.IfChange + TC_ASSERT_NE(name_data, nullptr); + TC_ASSERT_NE(value, nullptr); + const absl::string_view name(name_data, name_size); + + // This is near the top since ReleasePerCpuMemoryToOS() calls it frequently. + if (name == "tcmalloc.per_cpu_caches_active") { + *value = tc_globals.CpuCacheActive(); + return true; + } + + if (name == "generic.virtual_memory_used") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = VirtualMemoryUsed(stats); + return true; + } + + if (name == "generic.physical_memory_used") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = PhysicalMemoryUsed(stats); + return true; + } + + if (name == "generic.current_allocated_bytes" || + name == "generic.bytes_in_use_by_app") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = InUseByApp(stats); + return true; + } + + if (name == "generic.peak_memory_usage") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = static_cast(stats.peak_stats.sampled_application_bytes); + return true; + } + + if (name == "generic.realized_fragmentation") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = static_cast( + 100. * safe_div(stats.peak_stats.backed_bytes - + stats.peak_stats.sampled_application_bytes, + stats.peak_stats.sampled_application_bytes)); + + return true; + } + + if (name == "generic.heap_size") { + PageHeapSpinLockHolder l; + BackingStats stats = tc_globals.page_allocator().stats(); + *value = HeapSizeBytes(stats); + return true; + } + + if (name == "tcmalloc.central_cache_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.central_bytes; + return true; + } + + if (name == "tcmalloc.cpu_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.per_cpu_bytes; + return true; + } + + if (name == "tcmalloc.sharded_transfer_cache_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.sharded_transfer_bytes; + return true; + } + + if (name == "tcmalloc.slack_bytes") { + // Kept for backwards compatibility. Now defined externally as: + // pageheap_free_bytes + pageheap_unmapped_bytes. + PageHeapSpinLockHolder l; + BackingStats stats = tc_globals.page_allocator().stats(); + *value = SlackBytes(stats); + return true; + } + + if (name == "tcmalloc.pageheap_free_bytes" || + name == "tcmalloc.page_heap_free") { + PageHeapSpinLockHolder l; + *value = tc_globals.page_allocator().stats().free_bytes; + return true; + } + + if (name == "tcmalloc.pageheap_unmapped_bytes" || + name == "tcmalloc.page_heap_unmapped") { + PageHeapSpinLockHolder l; + // Arena non-resident bytes aren't on the page heap, but they are unmapped. + *value = tc_globals.page_allocator().stats().unmapped_bytes + + tc_globals.arena().stats().bytes_nonresident; + return true; + } + + if (name == "tcmalloc.sampled_internal_fragmentation") { + *value = tc_globals.sampled_internal_fragmentation_.value(); + return true; + } + + if (name == "tcmalloc.max_total_thread_cache_bytes") { + *value = ThreadCache::overall_thread_cache_size(); + return true; + } + + if (name == "tcmalloc.current_total_thread_cache_bytes" || + name == "tcmalloc.thread_cache_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.thread_bytes; + return true; + } + + if (name == "tcmalloc.thread_cache_count") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.tc_stats.in_use; + return true; + } + + if (name == "tcmalloc.local_bytes") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = LocalBytes(stats); + return true; + } + + if (name == "tcmalloc.external_fragmentation_bytes") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = ExternalBytes(stats); + return true; + } + + if (name == "tcmalloc.metadata_bytes") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, true); + *value = stats.metadata_bytes; + return true; + } + + if (name == "tcmalloc.transfer_cache_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.transfer_bytes; + return true; + } + + auto limit_kind = (name == "tcmalloc.hard_usage_limit_bytes") + ? PageAllocator::kHard + : PageAllocator::kSoft; + if (limit_kind == PageAllocator::kHard || + name == "tcmalloc.desired_usage_limit_bytes") { + *value = tc_globals.page_allocator().limit(limit_kind); + return true; + } + if (name == "tcmalloc.soft_limit_hits") { + *value = tc_globals.page_allocator().limit_hits(PageAllocator::kSoft); + return true; + } + if (name == "tcmalloc.hard_limit_hits") { + *value = tc_globals.page_allocator().limit_hits(PageAllocator::kHard); + return true; + } + if (name == "tcmalloc.successful_shrinks_after_soft_limit_hit") { + *value = tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kSoft); + return true; + } + if (name == "tcmalloc.successful_shrinks_after_hard_limit_hit") { + *value = tc_globals.page_allocator().successful_shrinks_after_limit_hit( + PageAllocator::kHard); + return true; + } + + for (const auto& [property_name, field] : + std::initializer_list>{ + {"tcmalloc.num_released_total_bytes", &PageReleaseStats::total}, + {"tcmalloc.num_released_release_memory_to_system_bytes", + &PageReleaseStats::release_memory_to_system}, + {"tcmalloc.num_released_process_background_actions_bytes", + &PageReleaseStats::process_background_actions}, + {"tcmalloc.num_released_soft_limit_exceeded_bytes", + &PageReleaseStats::soft_limit_exceeded}, + {"tcmalloc.num_released_hard_limit_exceeded_bytes", + &PageReleaseStats::hard_limit_exceeded}}) { + if (name == property_name) { + const PageHeapSpinLockHolder l; + *value = + (tc_globals.page_allocator().GetReleaseStats().*field).in_bytes(); + return true; + } + } + + if (name == "tcmalloc.required_bytes") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = RequiredBytes(stats); + return true; + } + + const absl::string_view kExperimentPrefix = "tcmalloc.experiment."; + if (absl::StartsWith(name, kExperimentPrefix)) { + std::optional exp = + FindExperimentByName(absl::StripPrefix(name, kExperimentPrefix)); + if (exp.has_value()) { + *value = IsExperimentActive(*exp) ? 1 : 0; + return true; + } + } + + // LINT.ThenChange(//depot/google3/tcmalloc/testing/malloc_extension_test.cc) + return false; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/global_stats.h b/contrib/libs/tcmalloc/tcmalloc/global_stats.h new file mode 100644 index 000000000000..80d3c81e2152 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/global_stats.h @@ -0,0 +1,87 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_GLOBAL_STATS_H_ +#define TCMALLOC_GLOBAL_STATS_H_ + +#include +#include + +#include "tcmalloc/arena.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/metadata_object_allocator.h" +#include "tcmalloc/page_allocator.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/stats.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Extract interesting stats +struct TCMallocStats { + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + uint64_t sharded_transfer_bytes; // Bytes in per-CCX cache + uint64_t per_cpu_bytes; // Bytes in per-CPU cache + uint64_t pagemap_root_bytes_res; // Resident bytes of pagemap root node + uint64_t percpu_metadata_bytes_res; // Resident bytes of the per-CPU metadata + AllocatorStats tc_stats; // ThreadCache objects + AllocatorStats span_stats; // Span objects + AllocatorStats stack_stats; // StackTrace objects + AllocatorStats linked_sample_stats; // StackTraceTable::LinkedSample objects + size_t pagemap_bytes; // included in metadata bytes + size_t percpu_metadata_bytes; // included in metadata bytes + BackingStats pageheap; // Stats from page heap + PageAllocator::PeakStats peak_stats; + + Length num_released_total; + Length num_released_release_memory_to_system; + Length num_released_process_background_actions; + Length num_released_soft_limit_exceeded; + Length num_released_hard_limit_exceeded; + + ArenaStats arena; // Stats from the metadata Arena + + // Explicitly declare the ctor to put it in the google_malloc section. + TCMallocStats() = default; +}; + +void ExtractTCMallocStats(TCMallocStats* r, bool report_residence); + +uint64_t InUseByApp(const TCMallocStats& stats); +uint64_t VirtualMemoryUsed(const TCMallocStats& stats); +uint64_t UnmappedBytes(const TCMallocStats& stats); +uint64_t PhysicalMemoryUsed(const TCMallocStats& stats); +uint64_t RequiredBytes(const TCMallocStats& stats); +size_t ExternalBytes(const TCMallocStats& stats); +size_t HeapSizeBytes(const BackingStats& stats); +size_t LocalBytes(const TCMallocStats& stats); +size_t SlackBytes(const BackingStats& stats); + +// WRITE stats to "out" +void DumpStats(Printer& out, int level); +void DumpStatsInPbtxt(Printer& out, int level); + +bool GetNumericProperty(const char* name_data, size_t name_size, size_t* value); + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_GLOBAL_STATS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_allocations.h b/contrib/libs/tcmalloc/tcmalloc/guarded_allocations.h new file mode 100644 index 000000000000..ecf10d87cc36 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_allocations.h @@ -0,0 +1,62 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_GUARDED_ALLOCATIONS_H_ +#define TCMALLOC_GUARDED_ALLOCATIONS_H_ + +#include + +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +struct GuardedAllocationsStackTrace { + void* stack[kMaxStackDepth]; + size_t depth = 0; + pid_t thread_id = 0; +}; + +enum class WriteFlag : int { Unknown, Read, Write }; + +enum class GuardedAllocationsErrorType { + kUseAfterFree, + kUseAfterFreeRead, + kUseAfterFreeWrite, + kBufferUnderflow, + kBufferUnderflowRead, + kBufferUnderflowWrite, + kBufferOverflow, + kBufferOverflowRead, + kBufferOverflowWrite, + kDoubleFree, + kBufferOverflowOnDealloc, + kUnknown, +}; + +struct GuardedAllocWithStatus { + void* alloc = nullptr; + Profile::Sample::GuardedStatus status = + Profile::Sample::GuardedStatus::Unknown; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_GUARDED_ALLOCATIONS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc index cc02ed7a05df..9e2a54a45d92 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc @@ -14,30 +14,34 @@ #include "tcmalloc/guarded_page_allocator.h" -#include -#include #include -#include #include -#include +#include #include -#include -#include +#include +#include +#include #include -#include "absl/base/call_once.h" +#include "absl/base/attributes.h" +#include "absl/base/internal/cycleclock.h" #include "absl/base/internal/spinlock.h" #include "absl/base/internal/sysinfo.h" +#include "absl/base/optimization.h" #include "absl/debugging/stacktrace.h" #include "absl/numeric/bits.h" -#include "absl/strings/string_view.h" #include "tcmalloc/common.h" -#include "tcmalloc/internal/environment.h" +#include "tcmalloc/guarded_allocations.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/util.h" +#include "tcmalloc/internal/memory_tag.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/malloc_extension.h" #include "tcmalloc/pagemap.h" -#include "tcmalloc/sampler.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" #include "tcmalloc/system-alloc.h" @@ -45,191 +49,346 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -const size_t GuardedPageAllocator::kMagicSize; // NOLINT - -void GuardedPageAllocator::Init(size_t max_alloced_pages, size_t total_pages) { - CHECK_CONDITION(max_alloced_pages > 0); - CHECK_CONDITION(max_alloced_pages <= total_pages); - CHECK_CONDITION(total_pages <= kGpaMaxPages); - max_alloced_pages_ = max_alloced_pages; +void GuardedPageAllocator::Init(size_t max_allocated_pages, + size_t total_pages) { + TC_CHECK_GT(max_allocated_pages, 0); + TC_CHECK_LE(max_allocated_pages, total_pages); + TC_CHECK_LE(total_pages, kGpaMaxPages); + max_allocated_pages_ = max_allocated_pages; total_pages_ = total_pages; // If the system page size is larger than kPageSize, we need to use the // system page size for this allocator since mprotect operates on full pages // only. This case happens on PPC. - page_size_ = std::max(kPageSize, static_cast(getpagesize())); - ASSERT(page_size_ % kPageSize == 0); + page_size_ = std::max(kPageSize, static_cast(GetPageSize())); + TC_ASSERT_EQ(page_size_ % kPageSize, 0); - rand_ = reinterpret_cast(this); // Initialize RNG seed. + rand_.Reset(static_cast(absl::base_internal::CycleClock::Now()) + + reinterpret_cast(this)); MapPages(); } void GuardedPageAllocator::Destroy() { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); + AllocationGuardSpinLockHolder h(&guarded_page_lock_); if (initialized_) { size_t len = pages_end_addr_ - pages_base_addr_; - int err = munmap(reinterpret_cast(pages_base_addr_), len); - ASSERT(err != -1); + int err = munmap(reinterpret_cast(pages_base_addr_), len); + TC_ASSERT_NE(err, -1); (void)err; initialized_ = false; } } -void *GuardedPageAllocator::Allocate(size_t size, size_t alignment) { - if (size == 0) return nullptr; - ssize_t free_slot = ReserveFreeSlot(); - if (free_slot == -1) return nullptr; // All slots are reserved. +// Reset is used by tests to ensure that subsequent allocations will be sampled. +void GuardedPageAllocator::Reset() { + // Reset sampled/guarded counters so that that we don't skip guarded sampling + // for a prolonged time due to accumulated stats. + tc_globals.total_sampled_count_.Add(-tc_globals.total_sampled_count_.value()); + successful_allocations_.Add(-successful_allocations_.value()); + // Allow allocations that are not currently covered by an existing allocation. + // Fully resetting the stack trace filter is a bad idea, because the pool may + // not be empty: a later deallocation would try to remove a non-existent entry + // from the filter. + stacktrace_filter_.DecayAll(); +} + +void GuardedPageAllocator::AcquireInternalLocks() { + guarded_page_lock_.Lock(); +} + +void GuardedPageAllocator::ReleaseInternalLocks() { + guarded_page_lock_.Unlock(); +} + +GuardedAllocWithStatus GuardedPageAllocator::TrySample( + size_t size, size_t alignment, Length num_pages, + const StackTrace& stack_trace) { + if (num_pages != Length(1)) { + skipped_allocations_toolarge_.Add(1); + return {nullptr, Profile::Sample::GuardedStatus::LargerThanOnePage}; + } + + const int64_t guarded_sampling_interval = + tcmalloc::tcmalloc_internal::Parameters::guarded_sampling_interval(); + // Guarded sampling is disabled if guarded_sampling_interval is negative. + if (guarded_sampling_interval < 0) { + return {nullptr, Profile::Sample::GuardedStatus::Disabled}; + } + // Never filter if guarded_sampling_interval == 0, or no samples yet. + const size_t num_guarded = successful_allocations(); + if (guarded_sampling_interval > 0 && num_guarded > 0) { + // The guarded page allocator should not exceed the desired sampling rate. + // To do so, we need to filter allocations while this condition holds: + // + // num_guarded * guarded_interval > num_sampled * profile_interval + // + // I.e. if the next guarded allocation should occur at total bytes allocated + // later than the next sampled allocation. Recall that sampled allocations + // are a superset of guarded sampled allocations, and num_sampled is always + // incremented _after_ num_guarded. + // + // Assuming that the number of total samples (num_sampled) must always be + // larger or equal to the guarded samples (num_guarded), and allow for a + // target num_sampled:num_guarded ratio with up to 1 decimal place, the + // above can be rewritten as: + // + // guarded_interval * 10 > + // ((num_sampled * 10) / num_guarded) * profile_interval + // + // This avoids possible overflow if num_sampled or num_guarded grows larger, + // when individually multiplied by the intervals. We can avoid floating + // point math as well. + const int64_t profile_sampling_interval = + tcmalloc::tcmalloc_internal::Parameters::profile_sampling_interval(); + const int64_t num_sampled = tc_globals.total_sampled_count_.value(); + const int64_t ratio = (num_sampled * 10) / num_guarded; + if (guarded_sampling_interval * 10 > ratio * profile_sampling_interval) { + return {nullptr, Profile::Sample::GuardedStatus::RateLimited}; + } + + if (stacktrace_filter_.Contains({stack_trace.stack, stack_trace.depth})) { + // The probability that we skip a currently covered allocation scales + // proportional to pool utilization, with pool utilization of 50% or more + // resulting in always filtering currently covered allocations. + const size_t usage_pct = (allocated_pages() * 100) / max_allocated_pages_; + if (rand_.Next() % 50 <= usage_pct) { + // Decay even if the current allocation is filtered, so that we keep + // sampling even if we only see the same allocations over and over. + stacktrace_filter_.Decay(); + skipped_allocations_filtered_.Add(1); + return {nullptr, Profile::Sample::GuardedStatus::Filtered}; + } + } + } + // The num_pages == 1 constraint ensures that size <= kPageSize. And + // since alignments above kPageSize cause size_class == 0, we're also + // guaranteed alignment <= kPageSize + // + // In all cases kPageSize <= GPA::page_size_, so Allocate's preconditions + // are met. + return Allocate(size, alignment, stack_trace); +} + +GuardedAllocWithStatus GuardedPageAllocator::Allocate( + size_t size, size_t alignment, const StackTrace& stack_trace) { + if (size == 0) { + return {nullptr, Profile::Sample::GuardedStatus::TooSmall}; + } + const ssize_t free_slot = ReserveFreeSlot(); + if (free_slot == -1) { + // All slots are reserved. + return {nullptr, Profile::Sample::GuardedStatus::NoAvailableSlots}; + } - ASSERT(size <= page_size_); - ASSERT(alignment <= page_size_); - ASSERT(alignment == 0 || absl::has_single_bit(alignment)); - void *result = reinterpret_cast(SlotToAddr(free_slot)); + TC_ASSERT_LE(size, page_size_); + TC_ASSERT_LE(alignment, page_size_); + TC_ASSERT(alignment == 0 || absl::has_single_bit(alignment)); + void* result = reinterpret_cast(SlotToAddr(free_slot)); if (mprotect(result, page_size_, PROT_READ | PROT_WRITE) == -1) { - ASSERT(false && "mprotect failed"); - absl::base_internal::SpinLockHolder h(&guarded_page_lock); - num_failed_allocations_++; + TC_ASSERT(false, "mprotect(.., PROT_READ|PROT_WRITE) failed"); + AllocationGuardSpinLockHolder h(&guarded_page_lock_); + failed_allocations_.LossyAdd(1); + successful_allocations_.LossyAdd(-1); FreeSlot(free_slot); - return nullptr; + return {nullptr, Profile::Sample::GuardedStatus::MProtectFailed}; } // Place some allocations at end of page for better overflow detection. MaybeRightAlign(free_slot, size, alignment, &result); // Record stack trace. - SlotMetadata &d = data_[free_slot]; + SlotMetadata& d = data_[free_slot]; + // Count the number of pages that have been used at least once. + if (ABSL_PREDICT_FALSE(d.allocation_start == 0)) { + pages_touched_.Add(1); + } + + static_assert(sizeof(d.alloc_trace.stack) == sizeof(stack_trace.stack)); + memcpy(d.alloc_trace.stack, stack_trace.stack, + stack_trace.depth * sizeof(stack_trace.stack[0])); + d.alloc_trace.depth = stack_trace.depth; + d.alloc_trace.thread_id = absl::base_internal::GetTID(); d.dealloc_trace.depth = 0; - d.alloc_trace.depth = absl::GetStackTrace(d.alloc_trace.stack, kMaxStackDepth, - /*skip_count=*/3); - d.alloc_trace.tid = absl::base_internal::GetTID(); d.requested_size = size; d.allocation_start = reinterpret_cast(result); + d.dealloc_count.store(0, std::memory_order_relaxed); + TC_ASSERT(!d.write_overflow_detected); + TC_ASSERT(!alignment || d.allocation_start % alignment == 0); - ASSERT(!alignment || d.allocation_start % alignment == 0); - return result; + stacktrace_filter_.Add({stack_trace.stack, stack_trace.depth}, 1); + return {result, Profile::Sample::GuardedStatus::Guarded}; } -void GuardedPageAllocator::Deallocate(void *ptr) { - ASSERT(PointerIsMine(ptr)); +// To trigger SEGV handler. +static ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NORETURN void ForceTouchPage( + void* ptr) { + // Spin, in case this thread is waiting for concurrent mprotect() to finish. + for (;;) { + *reinterpret_cast(ptr) = 'X'; + } +} + +void GuardedPageAllocator::Deallocate(void* ptr) { + TC_ASSERT(PointerIsMine(ptr)); const uintptr_t page_addr = GetPageAddr(reinterpret_cast(ptr)); - size_t slot = AddrToSlot(page_addr); + const size_t slot = AddrToSlot(page_addr); + SlotMetadata& d = data_[slot]; - absl::base_internal::SpinLockHolder h(&guarded_page_lock); - if (IsFreed(slot)) { - double_free_detected_ = true; - } else if (WriteOverflowOccurred(slot)) { - write_overflow_detected_ = true; + // On double-free, do not overwrite the original deallocation metadata, so + // that the report produced shows the original deallocation stack trace. + if (d.dealloc_count.fetch_add(1, std::memory_order_relaxed) != 0) { + ForceTouchPage(ptr); } - CHECK_CONDITION(mprotect(reinterpret_cast(page_addr), page_size_, - PROT_NONE) != -1); + // Record stack trace. Unwinding the stack is expensive, and holding the + // guarded_page_lock_ should be avoided here. + d.dealloc_trace.depth = + absl::GetStackTrace(d.dealloc_trace.stack, kMaxStackDepth, + /*skip_count=*/2); + d.dealloc_trace.thread_id = absl::base_internal::GetTID(); - if (write_overflow_detected_ || double_free_detected_) { - *reinterpret_cast(ptr) = 'X'; // Trigger SEGV handler. - CHECK_CONDITION(false); // Unreachable. + // Remove allocation (based on allocation stack trace) from filter. + stacktrace_filter_.Add({d.alloc_trace.stack, d.alloc_trace.depth}, -1); + + // Needs to be done before mprotect() because it accesses the object page to + // check canary bytes. + if (WriteOverflowOccurred(slot)) { + d.write_overflow_detected = true; } - // Record stack trace. - GpaStackTrace &trace = data_[slot].dealloc_trace; - trace.depth = absl::GetStackTrace(trace.stack, kMaxStackDepth, - /*skip_count=*/2); - trace.tid = absl::base_internal::GetTID(); + // Calling mprotect() should also be done outside the guarded_page_lock_ + // critical section, since mprotect() can have relatively large latency. + TC_CHECK_EQ( + 0, mprotect(reinterpret_cast(page_addr), page_size_, PROT_NONE)); + if (d.write_overflow_detected) { + ForceTouchPage(ptr); + } + + AllocationGuardSpinLockHolder h(&guarded_page_lock_); FreeSlot(slot); } -size_t GuardedPageAllocator::GetRequestedSize(const void *ptr) const { - ASSERT(PointerIsMine(ptr)); +size_t GuardedPageAllocator::GetRequestedSize(const void* ptr) const { + TC_ASSERT(PointerIsMine(ptr)); size_t slot = AddrToSlot(GetPageAddr(reinterpret_cast(ptr))); return data_[slot].requested_size; } std::pair GuardedPageAllocator::GetAllocationOffsetAndSize( - const void *ptr) const { - ASSERT(PointerIsMine(ptr)); + const void* ptr) const { + TC_ASSERT(PointerIsMine(ptr)); const uintptr_t addr = reinterpret_cast(ptr); const size_t slot = GetNearestSlot(addr); return {addr - data_[slot].allocation_start, data_[slot].requested_size}; } -GuardedPageAllocator::ErrorType GuardedPageAllocator::GetStackTraces( - const void *ptr, GpaStackTrace *alloc_trace, - GpaStackTrace *dealloc_trace) const { - ASSERT(PointerIsMine(ptr)); +GuardedAllocationsErrorType GuardedPageAllocator::GetStackTraces( + const void* ptr, GuardedAllocationsStackTrace** alloc_trace, + GuardedAllocationsStackTrace** dealloc_trace) const { + TC_ASSERT(PointerIsMine(ptr)); const uintptr_t addr = reinterpret_cast(ptr); size_t slot = GetNearestSlot(addr); - *alloc_trace = data_[slot].alloc_trace; - *dealloc_trace = data_[slot].dealloc_trace; + *alloc_trace = &data_[slot].alloc_trace; + *dealloc_trace = &data_[slot].dealloc_trace; return GetErrorType(addr, data_[slot]); } // We take guarded samples during periodic profiling samples. Computes the // mean number of profiled samples made for every guarded sample. -static int GetChainedRate() { - auto guarded_rate = Parameters::guarded_sampling_rate(); - auto sample_rate = Parameters::profile_sampling_rate(); - if (guarded_rate < 0 || sample_rate <= 0) { - return guarded_rate; +static int GetChainedInterval() { + auto guarded_interval = Parameters::guarded_sampling_interval(); + auto sample_interval = Parameters::profile_sampling_interval(); + if (guarded_interval < 0 || sample_interval <= 0) { + return guarded_interval; } else { - return std::ceil(static_cast(guarded_rate) / - static_cast(sample_rate)); + return std::ceil(static_cast(guarded_interval) / + static_cast(sample_interval)); } } -void GuardedPageAllocator::Print(Printer *out) { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); - out->printf( +void GuardedPageAllocator::Print(Printer& out) { + out.printf( "\n" "------------------------------------------------\n" "GWP-ASan Status\n" "------------------------------------------------\n" "Successful Allocations: %zu\n" "Failed Allocations: %zu\n" - "Slots Currently Allocated: %zu\n" - "Slots Currently Quarantined: %zu\n" - "Maximum Slots Allocated: %zu / %zu\n" + "Skipped Allocations (No Slots): %zu\n" + "Skipped Allocations (Filtered): %zu\n" + "Skipped Allocations (Too Large): %zu\n" + "Currently Allocated: %zu / %zu\n" + "Allocated High-Watermark: %zu / %zu\n" + "Object Pages Touched: %zu / %zu\n" + "Currently Quarantined: %zu\n" "PARAMETER tcmalloc_guarded_sample_parameter %d\n", - num_allocation_requests_ - num_failed_allocations_, - num_failed_allocations_, num_alloced_pages_, - total_pages_ - num_alloced_pages_, num_alloced_pages_max_, - max_alloced_pages_, GetChainedRate()); -} - -void GuardedPageAllocator::PrintInPbtxt(PbtxtRegion *gwp_asan) const { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); - gwp_asan->PrintI64("successful_allocations", - num_allocation_requests_ - num_failed_allocations_); - gwp_asan->PrintI64("failed_allocations", num_failed_allocations_); - gwp_asan->PrintI64("current_slots_allocated", num_alloced_pages_); - gwp_asan->PrintI64("current_slots_quarantined", - total_pages_ - num_alloced_pages_); - gwp_asan->PrintI64("max_slots_allocated", num_alloced_pages_max_); - gwp_asan->PrintI64("allocated_slot_limit", max_alloced_pages_); - gwp_asan->PrintI64("tcmalloc_guarded_sample_parameter", GetChainedRate()); + // Successful Allocations + successful_allocations_.value(), + // Failed Allocations + failed_allocations_.value(), + // Skipped Allocations (No Slots) + skipped_allocations_noslots_.value(), + // Skipped Allocations (Filtered) + skipped_allocations_filtered_.value(), + // Skipped Allocations (Too Large) + skipped_allocations_toolarge_.value(), + // Currently Allocated + allocated_pages(), max_allocated_pages_, + // Allocated High-Watermark + high_allocated_pages_.load(std::memory_order_relaxed), + max_allocated_pages_, + // Object Pages Touched + pages_touched_.value(), total_pages_, + // Currently Quarantined + total_pages_ - allocated_pages(), + // PARAMETER + GetChainedInterval()); +} + +void GuardedPageAllocator::PrintInPbtxt(PbtxtRegion& gwp_asan) { + gwp_asan.PrintI64("successful_allocations", successful_allocations_.value()); + gwp_asan.PrintI64("failed_allocations", failed_allocations_.value()); + gwp_asan.PrintI64("skipped_allocations_noslots", + skipped_allocations_noslots_.value()); + gwp_asan.PrintI64("skipped_allocations_filtered", + skipped_allocations_filtered_.value()); + gwp_asan.PrintI64("skipped_allocations_toolarge", + skipped_allocations_toolarge_.value()); + gwp_asan.PrintI64("allocated_pages", allocated_pages()); + gwp_asan.PrintI64("quarantine_pages", total_pages_ - allocated_pages()); + gwp_asan.PrintI64("high_allocated_pages", + high_allocated_pages_.load(std::memory_order_relaxed)); + gwp_asan.PrintI64("max_allocated_pages", max_allocated_pages_); + gwp_asan.PrintI64("pages_touched", pages_touched_.value()); + gwp_asan.PrintI64("total_pages", total_pages_); + gwp_asan.PrintI64("tcmalloc_guarded_sample_parameter", GetChainedInterval()); } // Maps 2 * total_pages_ + 1 pages so that there are total_pages_ unique pages // we can return from Allocate with guard pages before and after them. void GuardedPageAllocator::MapPages() { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); - ASSERT(!first_page_addr_); - ASSERT(page_size_ % getpagesize() == 0); + AllocationGuardSpinLockHolder h(&guarded_page_lock_); + TC_ASSERT(!first_page_addr_); + TC_ASSERT_EQ(page_size_ % GetPageSize(), 0); size_t len = (2 * total_pages_ + 1) * page_size_; - auto base_addr = reinterpret_cast( - MmapAligned(len, page_size_, MemoryTag::kSampled)); - ASSERT(base_addr); + auto base_addr = + reinterpret_cast(tc_globals.system_allocator().MmapAligned( + len, page_size_, MemoryTag::kSampled)); + TC_ASSERT(base_addr); if (!base_addr) return; // Tell TCMalloc's PageMap about the memory we own. - const PageId page = PageIdContaining(reinterpret_cast(base_addr)); + const PageId page = PageIdContaining(reinterpret_cast(base_addr)); const Length page_len = BytesToLengthFloor(len); - if (!Static::pagemap().Ensure(page, page_len)) { - ASSERT(false && "Failed to notify page map of page-guarded memory."); + if (!tc_globals.pagemap().Ensure(Range(page, page_len))) { + TC_ASSERT(false, "Failed to notify page map of page-guarded memory."); return; } // Allocate memory for slot metadata. - data_ = reinterpret_cast( - Static::arena().Alloc(sizeof(*data_) * total_pages_)); + data_ = reinterpret_cast( + tc_globals.arena().Alloc(sizeof(*data_) * total_pages_)); for (size_t i = 0; i < total_pages_; ++i) { new (&data_[i]) SlotMetadata; } @@ -240,45 +399,53 @@ void GuardedPageAllocator::MapPages() { // Align first page to page_size_. first_page_addr_ = GetPageAddr(pages_base_addr_ + page_size_); - std::fill_n(free_pages_, total_pages_, true); initialized_ = true; } -// Selects a random slot in O(total_pages_) time. +// Selects a random slot in O(1) time. ssize_t GuardedPageAllocator::ReserveFreeSlot() { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); + AllocationGuardSpinLockHolder h(&guarded_page_lock_); if (!initialized_ || !allow_allocations_) return -1; - num_allocation_requests_++; - if (num_alloced_pages_ == max_alloced_pages_) { - num_failed_allocations_++; + if (GetNumAvailablePages() == 0) { + skipped_allocations_noslots_.Add(1); return -1; } - - rand_ = Sampler::NextRandom(rand_); - size_t num_free_pages = total_pages_ - num_alloced_pages_; - size_t slot = GetIthFreeSlot(rand_ % num_free_pages); - ASSERT(free_pages_[slot]); - free_pages_[slot] = false; - num_alloced_pages_++; - num_alloced_pages_max_ = std::max(num_alloced_pages_, num_alloced_pages_max_); + successful_allocations_.LossyAdd(1); + + const size_t slot = GetFreeSlot(); + TC_ASSERT(!used_pages_.GetBit(slot)); + used_pages_.SetBit(slot); + + // Both writes to allocated_pages_ happen under the guarded_page_lock_, so + // we do not have to use an atomic fetch_add(), which is more expensive due to + // typically imposing a full memory barrier when lowered on e.g. x86. Recent + // compiler optimizations will also turn the store(load(relaxed) + N, relaxed) + // into a simple add instruction. + const size_t nalloced = allocated_pages_.load(std::memory_order_relaxed) + 1; + allocated_pages_.store(nalloced, std::memory_order_relaxed); + if (nalloced > high_allocated_pages_.load(std::memory_order_relaxed)) { + high_allocated_pages_.store(nalloced, std::memory_order_relaxed); + } return slot; } -size_t GuardedPageAllocator::GetIthFreeSlot(size_t ith_free_slot) { - ASSERT(ith_free_slot < total_pages_ - num_alloced_pages_); - for (size_t free_slot_count = 0, j = 0;; j++) { - if (free_pages_[j]) { - if (free_slot_count == ith_free_slot) return j; - free_slot_count++; - } - } +size_t GuardedPageAllocator::GetFreeSlot() { + const size_t idx = rand_.Next() % total_pages_; + // Find the closest adjacent free slot to the random index. + ssize_t slot = used_pages_.FindClearBackwards(idx); + if (slot >= 0) return slot; + slot = used_pages_.FindClear(idx); + TC_ASSERT_LT(slot, total_pages_); + return slot; } void GuardedPageAllocator::FreeSlot(size_t slot) { - ASSERT(slot < total_pages_); - ASSERT(!free_pages_[slot]); - free_pages_[slot] = true; - num_alloced_pages_--; + TC_ASSERT_LT(slot, total_pages_); + TC_ASSERT(used_pages_.GetBit(slot)); + used_pages_.ClearBit(slot); + // Cheaper decrement - see above. + allocated_pages_.store(allocated_pages_.load(std::memory_order_relaxed) - 1, + std::memory_order_relaxed); } uintptr_t GuardedPageAllocator::GetPageAddr(uintptr_t addr) const { @@ -308,10 +475,6 @@ size_t GuardedPageAllocator::GetNearestSlot(uintptr_t addr) const { return AddrToSlot(GetPageAddr(GetNearestValidPage(addr))); } -bool GuardedPageAllocator::IsFreed(size_t slot) const { - return free_pages_[slot]; -} - bool GuardedPageAllocator::WriteOverflowOccurred(size_t slot) const { if (!ShouldRightAlign(slot)) return false; uint8_t magic = GetWriteOverflowMagic(slot); @@ -320,40 +483,46 @@ bool GuardedPageAllocator::WriteOverflowOccurred(size_t slot) const { uintptr_t page_end = SlotToAddr(slot) + page_size_; uintptr_t magic_end = std::min(page_end, alloc_end + kMagicSize); for (uintptr_t p = alloc_end; p < magic_end; ++p) { - if (*reinterpret_cast(p) != magic) return true; + if (*reinterpret_cast(p) != magic) return true; } return false; } -GuardedPageAllocator::ErrorType GuardedPageAllocator::GetErrorType( - uintptr_t addr, const SlotMetadata &d) const { - if (!d.allocation_start) return ErrorType::kUnknown; - if (double_free_detected_) return ErrorType::kDoubleFree; - if (write_overflow_detected_) return ErrorType::kBufferOverflowOnDealloc; - if (d.dealloc_trace.depth) return ErrorType::kUseAfterFree; - if (addr < d.allocation_start) return ErrorType::kBufferUnderflow; +GuardedAllocationsErrorType GuardedPageAllocator::GetErrorType( + uintptr_t addr, const SlotMetadata& d) const { + if (!d.allocation_start) return GuardedAllocationsErrorType::kUnknown; + if (d.dealloc_count.load(std::memory_order_relaxed) >= 2) + return GuardedAllocationsErrorType::kDoubleFree; + if (d.write_overflow_detected) + return GuardedAllocationsErrorType::kBufferOverflowOnDealloc; + if (d.dealloc_trace.depth > 0) { + return GuardedAllocationsErrorType::kUseAfterFree; + } + if (addr < d.allocation_start) { + return GuardedAllocationsErrorType::kBufferUnderflow; + } if (addr >= d.allocation_start + d.requested_size) { - return ErrorType::kBufferOverflow; + return GuardedAllocationsErrorType::kBufferOverflow; } - return ErrorType::kUnknown; + return GuardedAllocationsErrorType::kUnknown; } uintptr_t GuardedPageAllocator::SlotToAddr(size_t slot) const { - ASSERT(slot < total_pages_); + TC_ASSERT_LT(slot, total_pages_); return first_page_addr_ + 2 * slot * page_size_; } size_t GuardedPageAllocator::AddrToSlot(uintptr_t addr) const { uintptr_t offset = addr - first_page_addr_; - ASSERT(offset % page_size_ == 0); - ASSERT((offset / page_size_) % 2 == 0); + TC_ASSERT_EQ(offset % page_size_, 0); + TC_ASSERT_EQ((offset / page_size_) % 2, 0); int slot = offset / page_size_ / 2; - ASSERT(slot >= 0 && slot < total_pages_); + TC_ASSERT(slot >= 0 && slot < total_pages_); return slot; } void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, - size_t alignment, void **ptr) { + size_t alignment, void** ptr) { if (!ShouldRightAlign(slot)) return; uintptr_t adjusted_ptr = reinterpret_cast(*ptr) + page_size_ - size; @@ -366,7 +535,7 @@ void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, // __STDCPP_DEFAULT_NEW_ALIGNMENT__, we're safe aligning to that value. size_t default_alignment = std::min(absl::bit_ceil(size), - std::max(kAlignment, + std::max(static_cast(kAlignment), static_cast(__STDCPP_DEFAULT_NEW_ALIGNMENT__))); // Ensure valid alignment. @@ -376,185 +545,9 @@ void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, // Write magic bytes in alignment padding to detect small overflow writes. size_t magic_size = std::min(alignment_padding, kMagicSize); - memset(reinterpret_cast(adjusted_ptr + size), + memset(reinterpret_cast(adjusted_ptr + size), GetWriteOverflowMagic(slot), magic_size); - *ptr = reinterpret_cast(adjusted_ptr); -} - -// If this failure occurs during "bazel test", writes a warning for Bazel to -// display. -static void RecordBazelWarning(absl::string_view error) { - const char *warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE"); - if (!warning_file) return; // Not a bazel test. - - constexpr char warning[] = "GWP-ASan error detected: "; - int fd = open(warning_file, O_CREAT | O_WRONLY | O_APPEND, 0644); - if (fd == -1) return; - (void)write(fd, warning, sizeof(warning) - 1); - (void)write(fd, error.data(), error.size()); - (void)write(fd, "\n", 1); - close(fd); -} - -// If this failure occurs during a gUnit test, writes an XML file describing the -// error type. Note that we cannot use ::testing::Test::RecordProperty() -// because it doesn't write the XML file if a test crashes (which we're about to -// do here). So we write directly to the XML file instead. -// -static void RecordTestFailure(absl::string_view error) { - const char *xml_file = thread_safe_getenv("XML_OUTPUT_FILE"); - if (!xml_file) return; // Not a gUnit test. - - // Record test failure for Sponge. - constexpr char xml_text_header[] = - "" - "" - " " - " " - " " - " " - " GWP-ASan detected a memory error. See the test log for full report." - " " - ""; - - int fd = open(xml_file, O_CREAT | O_WRONLY | O_TRUNC, 0644); - if (fd == -1) return; - (void)write(fd, xml_text_header, sizeof(xml_text_header) - 1); - (void)write(fd, error.data(), error.size()); - (void)write(fd, xml_text_footer, sizeof(xml_text_footer) - 1); - close(fd); -} -// -// If this crash occurs in a test, records test failure summaries. -// -// error contains the type of error to record. -static void RecordCrash(absl::string_view error) { - - RecordBazelWarning(error); - RecordTestFailure(error); -} - -static void PrintStackTrace(void **stack_frames, size_t depth) { - for (size_t i = 0; i < depth; ++i) { - Log(kLog, __FILE__, __LINE__, " @ ", stack_frames[i]); - } -} - -static void PrintStackTraceFromSignalHandler(void *context) { - void *stack_frames[kMaxStackDepth]; - size_t depth = absl::GetStackTraceWithContext(stack_frames, kMaxStackDepth, 1, - context, nullptr); - PrintStackTrace(stack_frames, depth); -} - -// A SEGV handler that prints stack traces for the allocation and deallocation -// of relevant memory as well as the location of the memory error. -static void SegvHandler(int signo, siginfo_t *info, void *context) { - if (signo != SIGSEGV) return; - void *fault = info->si_addr; - if (!Static::guardedpage_allocator().PointerIsMine(fault)) return; - GuardedPageAllocator::GpaStackTrace alloc_trace, dealloc_trace; - GuardedPageAllocator::ErrorType error = - Static::guardedpage_allocator().GetStackTraces(fault, &alloc_trace, - &dealloc_trace); - if (error == GuardedPageAllocator::ErrorType::kUnknown) return; - pid_t current_thread = absl::base_internal::GetTID(); - off_t offset; - size_t size; - std::tie(offset, size) = - Static::guardedpage_allocator().GetAllocationOffsetAndSize(fault); - - Log(kLog, __FILE__, __LINE__, - "*** GWP-ASan " - "(https://google.github.io/tcmalloc/gwp-asan.html) " - "has detected a memory error ***"); - Log(kLog, __FILE__, __LINE__, ">>> Access at offset", offset, - "into buffer of length", size); - Log(kLog, __FILE__, __LINE__, - "Error originates from memory allocated in thread", alloc_trace.tid, - "at:"); - PrintStackTrace(alloc_trace.stack, alloc_trace.depth); - - switch (error) { - case GuardedPageAllocator::ErrorType::kUseAfterFree: - Log(kLog, __FILE__, __LINE__, "The memory was freed in thread", - dealloc_trace.tid, "at:"); - PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth); - Log(kLog, __FILE__, __LINE__, "Use-after-free occurs in thread", - current_thread, "at:"); - RecordCrash("use-after-free"); - break; - case GuardedPageAllocator::ErrorType::kBufferUnderflow: - Log(kLog, __FILE__, __LINE__, "Buffer underflow occurs in thread", - current_thread, "at:"); - RecordCrash("buffer-underflow"); - break; - case GuardedPageAllocator::ErrorType::kBufferOverflow: - Log(kLog, __FILE__, __LINE__, "Buffer overflow occurs in thread", - current_thread, "at:"); - RecordCrash("buffer-overflow"); - break; - case GuardedPageAllocator::ErrorType::kDoubleFree: - Log(kLog, __FILE__, __LINE__, "The memory was freed in thread", - dealloc_trace.tid, "at:"); - PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth); - Log(kLog, __FILE__, __LINE__, "Double free occurs in thread", - current_thread, "at:"); - RecordCrash("double-free"); - break; - case GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc: - Log(kLog, __FILE__, __LINE__, - "Buffer overflow (write) detected in thread", current_thread, - "at free:"); - RecordCrash("buffer-overflow-detected-at-free"); - break; - case GuardedPageAllocator::ErrorType::kUnknown: - Crash(kCrash, __FILE__, __LINE__, "Unexpected ErrorType::kUnknown"); - } - PrintStackTraceFromSignalHandler(context); - if (error == GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc) { - Log(kLog, __FILE__, __LINE__, - "*** Try rerunning with --config=asan to get stack trace of overflow " - "***"); - } -} - -static struct sigaction old_sa; - -static void ForwardSignal(int signo, siginfo_t *info, void *context) { - if (old_sa.sa_flags & SA_SIGINFO) { - old_sa.sa_sigaction(signo, info, context); - } else if (old_sa.sa_handler == SIG_DFL) { - // No previous handler registered. Re-raise signal for core dump. - int err = sigaction(signo, &old_sa, nullptr); - if (err == -1) { - Log(kLog, __FILE__, __LINE__, "Couldn't restore previous sigaction!"); - } - raise(signo); - } else if (old_sa.sa_handler == SIG_IGN) { - return; // Previous sigaction ignored signal, so do the same. - } else { - old_sa.sa_handler(signo); - } -} - -static void HandleSegvAndForward(int signo, siginfo_t *info, void *context) { - SegvHandler(signo, info, context); - ForwardSignal(signo, info, context); -} - -extern "C" void MallocExtension_Internal_ActivateGuardedSampling() { - static absl::once_flag flag; - absl::call_once(flag, []() { - struct sigaction action = {}; - action.sa_sigaction = HandleSegvAndForward; - sigemptyset(&action.sa_mask); - action.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &action, &old_sa); - Static::guardedpage_allocator().AllowAllocations(); - }); + *ptr = reinterpret_cast(adjusted_ptr); } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h index e5a6118c081c..295a9d756f4d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,24 +16,29 @@ #ifndef TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ #define TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ -#include -#include -#include - +#include +#include +#include #include #include "absl/base/attributes.h" +#include "absl/base/const_init.h" #include "absl/base/internal/spinlock.h" #include "absl/base/thread_annotations.h" #include "tcmalloc/common.h" +#include "tcmalloc/guarded_allocations.h" +#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/exponential_biased.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/range_tracker.h" +#include "tcmalloc/internal/stacktrace_filter.h" +#include "tcmalloc/pages.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock; - // An allocator that gives each allocation a new region, with guard pages on // either side of the allocated region. If a buffer is overflowed to the next // guard page or underflowed to the previous guard page, a segfault occurs. @@ -44,16 +50,7 @@ ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock; // exception of calls to Init() and Destroy() (see corresponding function // comments). // -// SYNCHRONIZATION -// Requires the SpinLock guarded_page_lock to be defined externally. This is -// required so that this class may be instantiated with static storage -// duration. The lock is held by this class during initialization and when -// accessing the internal free page map. -// // Example: -// ABSL_CONST_INIT absl::base_internal::SpinLock -// guarded_page_lock(absl::kConstInit, -// absl::base_internal::SCHEDULE_KERNEL_ONLY); // ABSL_CONST_INIT GuardedPageAllocator gpa; // // void foo() { @@ -75,57 +72,39 @@ ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock; // } class GuardedPageAllocator { public: - struct GpaStackTrace { - void *stack[kMaxStackDepth]; - size_t depth = 0; - pid_t tid = 0; - }; - // Maximum number of pages this class can allocate. static constexpr size_t kGpaMaxPages = 512; - enum class ErrorType { - kUseAfterFree, - kBufferUnderflow, - kBufferOverflow, - kDoubleFree, - kBufferOverflowOnDealloc, - kUnknown, - }; - constexpr GuardedPageAllocator() - : free_pages_{}, - num_alloced_pages_(0), - num_alloced_pages_max_(0), - num_allocation_requests_(0), - num_failed_allocations_(0), + : guarded_page_lock_(absl::kConstInit, + absl::base_internal::SCHEDULE_KERNEL_ONLY), + allocated_pages_(0), + high_allocated_pages_(0), data_(nullptr), pages_base_addr_(0), pages_end_addr_(0), first_page_addr_(0), - max_alloced_pages_(0), + max_allocated_pages_(0), total_pages_(0), page_size_(0), rand_(0), initialized_(false), - allow_allocations_(false), - double_free_detected_(false), - write_overflow_detected_(false) {} + allow_allocations_(false) {} - GuardedPageAllocator(const GuardedPageAllocator &) = delete; - GuardedPageAllocator &operator=(const GuardedPageAllocator &) = delete; + GuardedPageAllocator(const GuardedPageAllocator&) = delete; + GuardedPageAllocator& operator=(const GuardedPageAllocator&) = delete; ~GuardedPageAllocator() = default; - // Configures this allocator to allocate up to max_alloced_pages pages at a + // Configures this allocator to allocate up to max_allocated_pages pages at a // time from a pool of total_pages pages, where: - // 1 <= max_alloced_pages <= total_pages <= kGpaMaxPages + // 1 <= max_allocated_pages <= total_pages <= kGpaMaxPages // // This method should be called non-concurrently and only once to complete // initialization. Dynamic initialization is deliberately done here and not // in the constructor, thereby allowing the constructor to be constexpr and // avoiding static initialization order issues. - void Init(size_t max_alloced_pages, size_t total_pages) + void Init(size_t max_allocated_pages, size_t total_pages) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Unmaps memory allocated by this class. @@ -136,27 +115,45 @@ class GuardedPageAllocator { // and avoiding use-after-destruction issues for static/global instances. void Destroy(); - // On success, returns a pointer to size bytes of page-guarded memory, aligned - // to alignment. On failure, returns nullptr. The returned pointer is - // guaranteed to be tagged. Failure can occur if memory could not be mapped - // or protected, if all guarded pages are already allocated, or if size is 0. + void AcquireInternalLocks() ABSL_LOCKS_EXCLUDED(guarded_page_lock_); + void ReleaseInternalLocks() ABSL_LOCKS_EXCLUDED(guarded_page_lock_); + + + // If this allocation can be guarded, and if it's time to do a guarded sample, + // returns an instance of GuardedAllocWithStatus, that includes guarded + // allocation Span and guarded status. Otherwise, returns nullptr and the + // status indicating why the allocation may not be guarded. + GuardedAllocWithStatus TrySample(size_t size, size_t alignment, + Length num_pages, + const StackTrace& stack_trace); + + // On success, returns an instance of GuardedAllocWithStatus which includes a + // pointer to size bytes of page-guarded memory, aligned to alignment. The + // member 'alloc' is a pointer that is guaranteed to be tagged. The 'status' + // member is set to GuardedStatus::Guarded. On failure, returns an instance + // of GuardedAllocWithStatus (the 'alloc' member is set to 'nullptr'). + // Failure can occur if memory could not be mapped or protected, if all + // guarded pages are already allocated, or if size is 0. These conditions are + // reflected in the 'status' member of the GuardedAllocWithStatus return + // value. // // Precondition: size and alignment <= page_size_ // Precondition: alignment is 0 or a power of 2 - void *Allocate(size_t size, size_t alignment) - ABSL_LOCKS_EXCLUDED(guarded_page_lock); + GuardedAllocWithStatus Allocate(size_t size, size_t alignment, + const StackTrace& stack_trace) + ABSL_LOCKS_EXCLUDED(guarded_page_lock_); // Deallocates memory pointed to by ptr. ptr must have been previously // returned by a call to Allocate. - void Deallocate(void *ptr) ABSL_LOCKS_EXCLUDED(guarded_page_lock); + void Deallocate(void* ptr) ABSL_LOCKS_EXCLUDED(guarded_page_lock_); // Returns the size requested when ptr was allocated. ptr must have been // previously returned by a call to Allocate. - size_t GetRequestedSize(const void *ptr) const; + size_t GetRequestedSize(const void* ptr) const; // Returns ptr's offset from the beginning of its allocation along with the // allocation's size. - std::pair GetAllocationOffsetAndSize(const void *ptr) const; + std::pair GetAllocationOffsetAndSize(const void* ptr) const; // Records stack traces in alloc_trace and dealloc_trace for the page nearest // to ptr. alloc_trace is the trace at the time the page was allocated. If @@ -167,56 +164,71 @@ class GuardedPageAllocator { // Returns the likely error type for an access at ptr. // // Requires that ptr points to memory mapped by this class. - ErrorType GetStackTraces(const void *ptr, GpaStackTrace *alloc_trace, - GpaStackTrace *dealloc_trace) const; + GuardedAllocationsErrorType GetStackTraces( + const void* ptr, GuardedAllocationsStackTrace** alloc_trace, + GuardedAllocationsStackTrace** dealloc_trace) const; // Writes a human-readable summary of GuardedPageAllocator's internal state to // *out. - void Print(Printer *out) ABSL_LOCKS_EXCLUDED(guarded_page_lock); - void PrintInPbtxt(PbtxtRegion *gwp_asan) const - ABSL_LOCKS_EXCLUDED(guarded_page_lock); + void Print(Printer& out) ABSL_LOCKS_EXCLUDED(guarded_page_lock_); + void PrintInPbtxt(PbtxtRegion& gwp_asan) + ABSL_LOCKS_EXCLUDED(guarded_page_lock_); // Returns true if ptr points to memory managed by this class. inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE - PointerIsMine(const void *ptr) const { + PointerIsMine(const void* ptr) const { uintptr_t addr = reinterpret_cast(ptr); return pages_base_addr_ <= addr && addr < pages_end_addr_; } // Allows Allocate() to start returning allocations. - void AllowAllocations() ABSL_LOCKS_EXCLUDED(guarded_page_lock) { - absl::base_internal::SpinLockHolder h(&guarded_page_lock); + void AllowAllocations() ABSL_LOCKS_EXCLUDED(guarded_page_lock_) { + AllocationGuardSpinLockHolder h(&guarded_page_lock_); allow_allocations_ = true; } + // Returns the number of pages available for allocation, based on how many are + // currently in use. (Should only be used in testing.) + size_t GetNumAvailablePages() const { + return max_allocated_pages_ - allocated_pages(); + } + + // Resets sampling state. + void Reset(); + + size_t page_size() const { return page_size_; } + size_t successful_allocations() const { + return successful_allocations_.value(); + } + private: // Structure for storing data about a slot. struct SlotMetadata { - GpaStackTrace alloc_trace; - GpaStackTrace dealloc_trace; - size_t requested_size = 0; - uintptr_t allocation_start = 0; + GuardedAllocationsStackTrace alloc_trace; + GuardedAllocationsStackTrace dealloc_trace; + size_t requested_size = 0; // requested allocaton size + uintptr_t allocation_start = 0; // allocation start address + std::atomic dealloc_count = 0; // deallocation counter + bool write_overflow_detected = false; // write overflow detected }; // Max number of magic bytes we use to detect write-overflows at deallocation. static constexpr size_t kMagicSize = 32; // Maps pages into memory. - void MapPages() ABSL_LOCKS_EXCLUDED(guarded_page_lock) + void MapPages() ABSL_LOCKS_EXCLUDED(guarded_page_lock_) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Reserves and returns a slot randomly selected from the free slots in - // free_pages_. Returns -1 if no slots available, or if AllowAllocations() + // used_pages_. Returns -1 if no slots available, or if AllowAllocations() // hasn't been called yet. - ssize_t ReserveFreeSlot() ABSL_LOCKS_EXCLUDED(guarded_page_lock); + ssize_t ReserveFreeSlot() ABSL_LOCKS_EXCLUDED(guarded_page_lock_); - // Returns the i-th free slot of free_pages_. i must be in the range [0, - // total_pages_ - num_alloced_pages_). - size_t GetIthFreeSlot(size_t i) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); + // Returns a random free slot in used_pages_. + size_t GetFreeSlot() ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock_); // Marks the specified slot as unreserved. - void FreeSlot(size_t slot) ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); + void FreeSlot(size_t slot) ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock_); // Returns the address of the page that addr resides on. uintptr_t GetPageAddr(uintptr_t addr) const; @@ -227,16 +239,13 @@ class GuardedPageAllocator { // Returns the slot number for the page nearest to addr. size_t GetNearestSlot(uintptr_t addr) const; - // Returns true if the specified slot has already been freed. - bool IsFreed(size_t slot) const - ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); - // Returns true if magic bytes for slot were overwritten. bool WriteOverflowOccurred(size_t slot) const; // Returns the likely error type for the given access address and metadata // associated with the nearest slot. - ErrorType GetErrorType(uintptr_t addr, const SlotMetadata &d) const; + GuardedAllocationsErrorType GetErrorType(uintptr_t addr, + const SlotMetadata& d) const; // Magic constant used for detecting write-overflows at deallocation time. static uint8_t GetWriteOverflowMagic(size_t slot) { @@ -250,58 +259,67 @@ class GuardedPageAllocator { // If slot is marked for right alignment, moves the allocation in *ptr to the // right end of the slot, maintaining the specified size and alignment. Magic // bytes are written in any alignment padding. - void MaybeRightAlign(size_t slot, size_t size, size_t alignment, void **ptr); + void MaybeRightAlign(size_t slot, size_t size, size_t alignment, void** ptr); uintptr_t SlotToAddr(size_t slot) const; size_t AddrToSlot(uintptr_t addr) const; - // Maps each bool to one page. - // true: Free. false: Reserved. - bool free_pages_[kGpaMaxPages] ABSL_GUARDED_BY(guarded_page_lock); - - // Number of currently-allocated pages. - size_t num_alloced_pages_ ABSL_GUARDED_BY(guarded_page_lock); + size_t allocated_pages() const { + return allocated_pages_.load(std::memory_order_relaxed); + } - // The high-water mark for num_alloced_pages_. - size_t num_alloced_pages_max_ ABSL_GUARDED_BY(guarded_page_lock); + // DecayingStackTraceFilter instance to limit allocations already covered by a + // current or recent allocation. + // + // With the chosen configuration, assuming 90% unique allocations in a fully + // utilized pool (in the worst case), the CBF will have a false positive + // probability of 20%. In more moderate scenarios with unique allocations of + // 80% or below, the probability of false positives will be below 10%. + DecayingStackTraceFilter stacktrace_filter_; - // Number of calls to Allocate. - size_t num_allocation_requests_ ABSL_GUARDED_BY(guarded_page_lock); + absl::base_internal::SpinLock guarded_page_lock_; - // Number of times Allocate has failed. - size_t num_failed_allocations_ ABSL_GUARDED_BY(guarded_page_lock); + // Maps each bool to one page. + // true: reserved. false: freed. + Bitmap used_pages_ ABSL_GUARDED_BY(guarded_page_lock_); + + // Number of currently allocated pages. Atomic so it may be accessed outside + // the guarded_page_lock_ to calculate heuristics based on pool utilization. + std::atomic allocated_pages_; + // The high-water mark for allocated_pages_. + std::atomic high_allocated_pages_; + + // Number of successful allocations. + tcmalloc_internal::StatsCounter successful_allocations_; + // Number of times an allocation failed due to an internal error. + tcmalloc_internal::StatsCounter failed_allocations_; + // Number of times an allocation was skipped (no available slots). + tcmalloc_internal::StatsCounter skipped_allocations_noslots_; + // Number of times an allocation was skipped (filtered). + tcmalloc_internal::StatsCounter skipped_allocations_filtered_; + // Number of times an allocation was skipped (too large). + tcmalloc_internal::StatsCounter skipped_allocations_toolarge_; + // Number of pages allocated at least once from page pool. + tcmalloc_internal::StatsCounter pages_touched_; // A dynamically-allocated array of stack trace data captured when each page // is allocated/deallocated. Printed by the SEGV handler when a memory error // is detected. - SlotMetadata *data_; + SlotMetadata* data_; - uintptr_t pages_base_addr_; // Points to start of mapped region. - uintptr_t pages_end_addr_; // Points to the end of mapped region. - uintptr_t first_page_addr_; // Points to first page returnable by Allocate. - size_t max_alloced_pages_; // Max number of pages to allocate at once. - size_t total_pages_; // Size of the page pool to allocate from. - size_t page_size_; // Size of pages we allocate. - uint64_t rand_; // RNG seed. + uintptr_t pages_base_addr_; // Points to start of mapped region. + uintptr_t pages_end_addr_; // Points to the end of mapped region. + uintptr_t first_page_addr_; // Points to first page returnable by Allocate. + size_t max_allocated_pages_; // Max number of pages to allocate at once. + size_t total_pages_; // Size of the page pool to allocate from. + size_t page_size_; // Size of pages we allocate. + Random rand_; // True if this object has been fully initialized. - bool initialized_ ABSL_GUARDED_BY(guarded_page_lock); + bool initialized_ ABSL_GUARDED_BY(guarded_page_lock_); // Flag to control whether we can return allocations or not. - bool allow_allocations_ ABSL_GUARDED_BY(guarded_page_lock); - - // Set to true if a double free has occurred. - bool double_free_detected_; - - // Set to true if a write overflow was detected on deallocation. - bool write_overflow_detected_; - - friend struct ConstexprCheck; -}; - -struct ConstexprCheck { - static_assert(GuardedPageAllocator().rand_ || true, - "GuardedPageAllocator must have a constexpr constructor"); + bool allow_allocations_ ABSL_GUARDED_BY(guarded_page_lock_); }; } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc index fb6d0ea265d5..b19a00bb41ba 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc @@ -12,48 +12,170 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +#include +#include +#include +#include +#include -#include "absl/base/internal/spinlock.h" #include "benchmark/benchmark.h" +#include "tcmalloc/common.h" #include "tcmalloc/guarded_page_allocator.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/static_vars.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { namespace { +using GuardedStatus = Profile::Sample::GuardedStatus; -static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; +constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; -// Size of pages used by GuardedPageAllocator. -static size_t PageSize() { +// Size of pages used by GuardedPageAllocator. See GuardedPageAllocator::Init(). +size_t GetGpaPageSize() { static const size_t page_size = - std::max(kPageSize, static_cast(getpagesize())); + std::max(kPageSize, static_cast(GetPageSize())); return page_size; } -void BM_AllocDealloc(benchmark::State& state) { - static GuardedPageAllocator* gpa = []() { +inline auto& GetStackTrace(size_t first_frame) { + thread_local StackTrace* s = new StackTrace; + s->stack[0] = reinterpret_cast(first_frame + 42); + s->depth = 1; + return *s; +} + +std::unique_ptr +GetGuardedPageAllocator() { + static GuardedPageAllocator* gpa = [] { auto gpa = new GuardedPageAllocator; - absl::base_internal::SpinLockHolder h(&pageheap_lock); + PageHeapSpinLockHolder l; gpa->Init(kMaxGpaPages, kMaxGpaPages); gpa->AllowAllocations(); + // Benchmark should always sample. + MallocExtension::SetProfileSamplingInterval(1); + MallocExtension::SetGuardedSamplingInterval(1); return gpa; }(); - size_t alloc_size = state.range(0); + return {gpa, +[](GuardedPageAllocator* gpa) { + // We can't reset GuardedPageAllocator before the benchmark in + // multi-threaded mode as it might race with pre-initialization and + // concurrent Reset() from all other benchmark threads. Instead, + // just reset after each benchmark. + gpa->Reset(); + }}; +} + +// Benchmark for guarded page allocation overhead only, to focus on free slot +// selection, mprotect() overhead, and allocation metadata storage. +void BM_AllocDealloc(benchmark::State& state) { + const size_t alloc_size = state.range(0); + auto gpa = GetGuardedPageAllocator(); for (auto _ : state) { - char* ptr = reinterpret_cast(gpa->Allocate(alloc_size, 0)); - CHECK_CONDITION(ptr != nullptr); + char* ptr = reinterpret_cast( + gpa->Allocate(alloc_size, 0, GetStackTrace(0)).alloc); + TC_CHECK_NE(ptr, nullptr); ptr[0] = 'X'; // Page fault first page. ptr[alloc_size - 1] = 'X'; // Page fault last page. gpa->Deallocate(ptr); } } -BENCHMARK(BM_AllocDealloc)->Range(1, PageSize()); +BENCHMARK(BM_AllocDealloc)->Range(1, GetGpaPageSize()); BENCHMARK(BM_AllocDealloc)->Arg(1)->ThreadRange(1, kMaxGpaPages); +auto& GetReserved() { + static auto* ret = + new std::vector>>; + return *ret; +} + +// Exhaust the pool first so we do not profile allocation overhead. +void ReservePool(const benchmark::State&) { + TC_CHECK(GetReserved().empty()); + auto* gpa = GetGuardedPageAllocator().release(); // do not Reset() + auto deleter = [gpa](void* p) { gpa->Deallocate(p); }; + + for (size_t stack_idx = 0;;) { + auto alloc = gpa->TrySample(1, 0, Length(1), GetStackTrace(stack_idx)); + switch (alloc.status) { + case GuardedStatus::NoAvailableSlots: + TC_CHECK(!GetReserved().empty()); + return; + case GuardedStatus::RateLimited: + // Emulate that non-guarded sampling happened. + tc_globals.total_sampled_count_.Add(1); + break; + case GuardedStatus::Filtered: + // The filter is rejecting the stack trace, give it one more unique + // stack trace. + stack_idx++; + break; + default: + if (alloc.alloc) GetReserved().emplace_back(alloc.alloc, deleter); + break; + } + } +} + +void ReleasePool(const benchmark::State&) { + TC_CHECK(!GetReserved().empty()); + GetReserved().clear(); +} + +// Benchmark that includes sampling-decision overhead. +void BM_TrySample(benchmark::State& state) { + TC_CHECK(!GetReserved().empty()); + // Shared between benchmark threads. We don't care if one of the threads + // doesn't see one of the statuses. + static std::atomic seen_filtered; + seen_filtered = false; + + const size_t alloc_size = state.range(0); + auto gpa = GetGuardedPageAllocator(); + size_t stack_idx = 0; + + for (auto _ : state) { + StackTrace& stack_trace = GetStackTrace(stack_idx++ % GetReserved().size()); + auto alloc = gpa->TrySample(alloc_size, 0, Length(1), stack_trace); + + switch (alloc.status) { + case GuardedStatus::RateLimited: + tc_globals.total_sampled_count_.Add(1); + break; + case GuardedStatus::Filtered: + seen_filtered.store(true, std::memory_order_relaxed); + break; + default: + TC_CHECK_NE(alloc.status, GuardedStatus::Guarded); + TC_CHECK_NE(alloc.status, GuardedStatus::LargerThanOnePage); + TC_CHECK_NE(alloc.status, GuardedStatus::Disabled); + TC_CHECK_NE(alloc.status, GuardedStatus::MProtectFailed); + TC_CHECK_NE(alloc.status, GuardedStatus::TooSmall); + break; + } + TC_CHECK_EQ(alloc.alloc, nullptr); + } + + if (state.iterations() > 1000) TC_CHECK(seen_filtered); +} + +BENCHMARK(BM_TrySample) + ->Range(1, GetGpaPageSize()) + ->Setup(ReservePool) + ->Teardown(ReleasePool); +BENCHMARK(BM_TrySample) + ->Arg(1) + ->ThreadRange(1, kMaxGpaPages) + ->Setup(ReservePool) + ->Teardown(ReleasePool); + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_profile_test.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_profile_test.cc new file mode 100644 index 000000000000..961bf77b652f --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_profile_test.cc @@ -0,0 +1,371 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "absl/functional/function_ref.h" +#include "absl/log/check.h" +#include "tcmalloc/common.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class GuardedPageAllocatorProfileTest : public testing::Test { + public: + struct NextSteps { + bool stop = true; // stop allocating + bool free = true; // free allocation + }; + + void SetUp() override { MallocExtension::ActivateGuardedSampling(); } + + // Return the number of allocations + int AllocateUntil(size_t size, + absl::FunctionRef evaluate_alloc) { + // The test harness may allocate, so move Reset() close to where we do the + // allocations we want to test. + tc_globals.guardedpage_allocator().Reset(); + + int alloc_count = 0; + while (true) { + void* alloc = ::operator new(size); + ++alloc_count; + benchmark::DoNotOptimize(alloc); + auto result = evaluate_alloc(alloc); + // evaluate_alloc takes responsibility for delete/free if result.free is + // set to false. + if (result.free) { + ::operator delete(alloc); + } + if (result.stop) { + break; + } + } + return alloc_count; + } + + int AllocateGuardableUntil( + size_t size, absl::FunctionRef evaluate_alloc) { + CHECK_LE(size, tc_globals.guardedpage_allocator().page_size()); + return AllocateUntil(size, evaluate_alloc); + } + + // Allocate until sample is guarded + // Called to reduce the internal counter to -1, which will trigger resetting + // the counter to the configured rate. + void AllocateUntilGuarded() { + AllocateGuardableUntil(968, [&](void* alloc) -> NextSteps { + return {!IsNormalMemory(alloc) && + tc_globals.guardedpage_allocator().PointerIsMine(alloc), + true}; + }); + } + + void ExamineSamples( + Profile& profile, Profile::Sample::GuardedStatus sought_status, + absl::FunctionRef verify = + [](const Profile::Sample& s) { /* do nothing */ }) { + absl::flat_hash_set found_statuses; + int samples = 0; + profile.Iterate([&](const Profile::Sample& s) { + ++samples; + found_statuses.insert(s.guarded_status); + verify(s); + }); + EXPECT_THAT(found_statuses, ::testing::Contains(sought_status)); + } +}; + +namespace { + +TEST_F(GuardedPageAllocatorProfileTest, Guarded) { + ScopedAlwaysSample always_sample; + AllocateUntilGuarded(); + auto token = MallocExtension::StartAllocationProfiling(); + + AllocateGuardableUntil( + 1051, [&](void* alloc) -> NextSteps { return {true, true}; }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Guarded); +} + +TEST_F(GuardedPageAllocatorProfileTest, NotAttempted) { + ScopedProfileSamplingInterval profile_sampling_interval(4096); + auto token = MallocExtension::StartAllocationProfiling(); + + constexpr size_t alloc_size = 2 * 1024 * 1024; + AllocateUntil(alloc_size, + [&](void* alloc) -> NextSteps { return {true, true}; }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::NotAttempted, + [&](const Profile::Sample& s) { + switch (s.guarded_status) { + case Profile::Sample::GuardedStatus::Guarded: + EXPECT_NE(alloc_size, s.requested_size); + break; + default: + break; + } + }); +} + +TEST_F(GuardedPageAllocatorProfileTest, LargerThanOnePage) { + ScopedAlwaysSample always_sample; + AllocateUntilGuarded(); + auto token = MallocExtension::StartAllocationProfiling(); + + constexpr size_t alloc_size = kPageSize + 1; + AllocateUntil(alloc_size, + [&](void* alloc) -> NextSteps { return {true, true}; }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::LargerThanOnePage, + [&](const Profile::Sample& s) { + switch (s.guarded_status) { + case Profile::Sample::GuardedStatus::Guarded: + EXPECT_NE(alloc_size, s.requested_size); + break; + default: + break; + } + }); +} + +TEST_F(GuardedPageAllocatorProfileTest, Disabled) { + ScopedGuardedSamplingInterval guarded_sampling_interval(-1); + ScopedProfileSamplingInterval profile_sampling_interval(1); + auto token = MallocExtension::StartAllocationProfiling(); + + AllocateGuardableUntil( + 1024, [&](void* alloc) -> NextSteps { return {true, true}; }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Disabled); +} + +TEST_F(GuardedPageAllocatorProfileTest, RateLimited) { + // For every 2 sampled allocations, have just 1 guarded allocation. + ScopedProfileSamplingInterval profile_sampling_interval(1); + ScopedGuardedSamplingInterval guarded_sampling_interval(2); + auto token = MallocExtension::StartAllocationProfiling(); + + // Keep allocating until something is sampled + constexpr size_t kAllocSize = 1033; + size_t num_guarded = 0; + size_t num_sampled = 0; + AllocateGuardableUntil(kAllocSize, [&](void* alloc) -> NextSteps { + if (!IsNormalMemory(alloc)) { + num_sampled++; + if (tc_globals.guardedpage_allocator().PointerIsMine(alloc)) { + num_guarded++; + } + // The expectation is that as soon as there are more sampled allocations + // than guarded, at least once the rate limiter kicked in. + return {num_guarded > 0 && num_sampled > num_guarded, true}; + } + return {false, true}; + }); + + // Ensure Guarded and RateLimited both occur for the alloc_size + bool success_found = false; + bool ratelimited_found = false; + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::RateLimited, + [&](const Profile::Sample& s) { + if (s.requested_size != kAllocSize) return; + switch (s.guarded_status) { + case Profile::Sample::GuardedStatus::Guarded: + success_found = true; + break; + case Profile::Sample::GuardedStatus::RateLimited: + ratelimited_found = true; + break; + default: + break; + } + }); + EXPECT_TRUE(success_found); + EXPECT_TRUE(ratelimited_found); +} + +TEST_F(GuardedPageAllocatorProfileTest, NeverRateLimited) { + ScopedProfileSamplingInterval profile_sampling_interval(42); + ScopedGuardedSamplingInterval guarded_sampling_interval(42); + ASSERT_EQ(MallocExtension::GetGuardedSamplingInterval(), + MallocExtension::GetProfileSamplingInterval()); + auto token = MallocExtension::StartAllocationProfiling(); + + constexpr size_t kAllocSize = 1033; + size_t num_guarded = 0; + AllocateGuardableUntil(kAllocSize, [&](void* alloc) -> NextSteps { + if (!IsNormalMemory(alloc)) { + // Stack trace filter may still filter. + if (tc_globals.guardedpage_allocator().PointerIsMine(alloc)) + num_guarded++; + return {num_guarded > 100, true}; + } + return {false, true}; + }); + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Guarded, + [&](const Profile::Sample& s) { + if (s.requested_size != kAllocSize) return; + EXPECT_NE(s.guarded_status, + Profile::Sample::GuardedStatus::RateLimited); + }); +} + +TEST_F(GuardedPageAllocatorProfileTest, TooSmall) { + ScopedAlwaysSample always_sample; + AllocateUntilGuarded(); + auto token = MallocExtension::StartAllocationProfiling(); + + // Next sampled allocation should be too small + constexpr size_t alloc_size = 0; + AllocateGuardableUntil( + alloc_size, [&](void* alloc) -> NextSteps { return {true, true}; }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::TooSmall, + [&](const Profile::Sample& s) { + switch (s.guarded_status) { + case Profile::Sample::GuardedStatus::Guarded: + EXPECT_NE(alloc_size, s.requested_size); + break; + case Profile::Sample::GuardedStatus::TooSmall: + EXPECT_EQ(alloc_size, s.requested_size); + break; + default: + break; + } + }); +} + +TEST_F(GuardedPageAllocatorProfileTest, NoAvailableSlots) { + ScopedAlwaysSample always_sample; + AllocateUntilGuarded(); + + std::vector> allocs; + // Guard until there are no slots available. + AllocateGuardableUntil(1039, [&](void* alloc) -> NextSteps { + if (tc_globals.guardedpage_allocator().PointerIsMine(alloc)) { + allocs.emplace_back(alloc, + static_cast(::operator delete)); + return {tc_globals.guardedpage_allocator().GetNumAvailablePages() == 0, + false}; + } + return {false, true}; + }); + + auto token = MallocExtension::StartAllocationProfiling(); + // This should fail for lack of slots + AllocateGuardableUntil(1055, [&](void* alloc) -> NextSteps { + return {!tc_globals.guardedpage_allocator().PointerIsMine(alloc), true}; + }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::NoAvailableSlots); +} + +TEST_F(GuardedPageAllocatorProfileTest, NeverSample) { + ScopedProfileSamplingInterval profile_sampling_interval(0); + auto token = MallocExtension::StartAllocationProfiling(); + + // This will not succeed in guarding anything. + int alloc_count = AllocateGuardableUntil( + 1025, [&](void* alloc) -> NextSteps { return {true, true}; }); + ASSERT_EQ(alloc_count, 1); + + auto profile = std::move(token).Stop(); + int samples = 0; + profile.Iterate([&](const Profile::Sample& s) { ++samples; }); + EXPECT_EQ(samples, 0); +} + +TEST_F(GuardedPageAllocatorProfileTest, Filtered) { + auto token = MallocExtension::StartAllocationProfiling(); + int guarded_count = 0; + AllocateGuardableUntil(1058, [&](void* alloc) -> NextSteps { + guarded_count += tc_globals.guardedpage_allocator().PointerIsMine(alloc); + return {guarded_count == 1000, true}; + }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Filtered); +} + +TEST_F(GuardedPageAllocatorProfileTest, FilteredWithRateLimiting) { + // Have to have a rate that is less than every single one. + ScopedGuardedSamplingInterval scoped_guarded_sampling_interval( + 2 * tcmalloc::tcmalloc_internal::Parameters::profile_sampling_interval()); + AllocateUntilGuarded(); + + auto token = MallocExtension::StartAllocationProfiling(); + // Obtain a few sample guarding candidates, which will eventually yield at + // least one that is filtered. + int guarded_count = 0; + int sampled_count = 0; + AllocateGuardableUntil(1062, [&](void* alloc) -> NextSteps { + if (!IsNormalMemory(alloc)) { + if (tc_globals.guardedpage_allocator().PointerIsMine(alloc)) { + ++guarded_count; + } + ++sampled_count; + } + return {guarded_count == 1000, true}; + }); + + EXPECT_GT(sampled_count, guarded_count); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Filtered); +} + +TEST_F(GuardedPageAllocatorProfileTest, DynamicParamChange) { + ScopedGuardedSamplingInterval scoped_guarded_sampling_interval( + 2 * tcmalloc::tcmalloc_internal::Parameters::profile_sampling_interval()); + for (int loop_count = 0; loop_count < 10; ++loop_count) { + AllocateUntilGuarded(); + + // Accumulate at least 2 guarded allocations. + auto token = MallocExtension::StartAllocationProfiling(); + int guarded_count = 0; + AllocateGuardableUntil(1063, [&](void* alloc) -> NextSteps { + if (tc_globals.guardedpage_allocator().PointerIsMine(alloc)) { + ++guarded_count; + } + return {guarded_count > 1, true}; + }); + + auto profile = std::move(token).Stop(); + ExamineSamples(profile, Profile::Sample::GuardedStatus::Guarded); + } +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc index 0d603de6901b..29e389576ce2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc @@ -14,31 +14,26 @@ #include "tcmalloc/guarded_page_allocator.h" -#include -#include -#include - #include +#include +#include +#include #include -#include -#include +#include #include // NOLINT(build/c++11) #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/casts.h" -#include "absl/base/internal/spinlock.h" -#include "absl/base/internal/sysinfo.h" +#include "absl/base/attributes.h" #include "absl/container/flat_hash_set.h" -#include "absl/memory/memory.h" -#include "absl/numeric/bits.h" -#include "absl/strings/str_cat.h" -#include "absl/time/clock.h" -#include "absl/time/time.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/internal/sysinfo.h" +#include "tcmalloc/malloc_extension.h" #include "tcmalloc/static_vars.h" +#include "tcmalloc/testing/testutil.h" namespace tcmalloc { namespace tcmalloc_internal { @@ -49,20 +44,28 @@ static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; // Size of pages used by GuardedPageAllocator. static size_t PageSize() { static const size_t page_size = - std::max(kPageSize, static_cast(getpagesize())); + std::max(kPageSize, static_cast(GetPageSize())); return page_size; } +inline auto GetStackTrace() { +self: + StackTrace s; + s.stack[0] = reinterpret_cast(&&self); + s.depth = 1; + return s; +} + class GuardedPageAllocatorTest : public testing::Test { protected: GuardedPageAllocatorTest() { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + PageHeapSpinLockHolder l; gpa_.Init(kMaxGpaPages, kMaxGpaPages); gpa_.AllowAllocations(); } explicit GuardedPageAllocatorTest(size_t num_pages) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + PageHeapSpinLockHolder l; gpa_.Init(num_pages, kMaxGpaPages); gpa_.AllowAllocations(); } @@ -80,7 +83,10 @@ class GuardedPageAllocatorParamTest }; TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) { - char *buf = reinterpret_cast(gpa_.Allocate(PageSize(), 0)); + auto alloc_with_status = gpa_.Allocate(PageSize(), 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, Profile::Sample::GuardedStatus::Guarded); + EXPECT_EQ(gpa_.successful_allocations(), 1); + char* buf = static_cast(alloc_with_status.alloc); EXPECT_NE(buf, nullptr); EXPECT_TRUE(gpa_.PointerIsMine(buf)); memset(buf, 'A', PageSize()); @@ -93,58 +99,78 @@ TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) { } TEST_F(GuardedPageAllocatorTest, NoAlignmentProvided) { - constexpr size_t kLargeObjectAlignment = std::max( - kAlignment, static_cast(__STDCPP_DEFAULT_NEW_ALIGNMENT__)); + constexpr size_t kLargeObjectAlignment = + std::max(static_cast(kAlignment), + static_cast(__STDCPP_DEFAULT_NEW_ALIGNMENT__)); + int allocation_count = 0; for (size_t base_size = 1; base_size <= 64; base_size <<= 1) { for (size_t size : {base_size, base_size + 1}) { SCOPED_TRACE(size); constexpr int kElements = 10; - std::array ptrs; + std::array ptrs; // Make several allocation attempts to encounter left/right-alignment in // the guarded region. for (int i = 0; i < kElements; i++) { - ptrs[i] = gpa_.Allocate(size, 0); + auto alloc_with_status = gpa_.Allocate(size, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, + Profile::Sample::GuardedStatus::Guarded); + ptrs[i] = alloc_with_status.alloc; EXPECT_NE(ptrs[i], nullptr); EXPECT_TRUE(gpa_.PointerIsMine(ptrs[i])); + ++allocation_count; size_t observed_alignment = 1 << absl::countr_zero(absl::bit_cast(ptrs[i])); EXPECT_GE(observed_alignment, std::min(size, kLargeObjectAlignment)); } - for (void *ptr : ptrs) { + for (void* ptr : ptrs) { gpa_.Deallocate(ptr); } } } + EXPECT_EQ(gpa_.successful_allocations(), allocation_count); } TEST_F(GuardedPageAllocatorTest, AllocDeallocAligned) { for (size_t align = 1; align <= PageSize(); align <<= 1) { constexpr size_t alloc_size = 1; - void *p = gpa_.Allocate(alloc_size, align); - EXPECT_NE(p, nullptr); - EXPECT_TRUE(gpa_.PointerIsMine(p)); - EXPECT_EQ(reinterpret_cast(p) % align, 0); + auto alloc_with_status = gpa_.Allocate(alloc_size, align, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, + Profile::Sample::GuardedStatus::Guarded); + EXPECT_NE(alloc_with_status.alloc, nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(alloc_with_status.alloc)); + EXPECT_EQ(reinterpret_cast(alloc_with_status.alloc) % align, 0); } + EXPECT_EQ(gpa_.successful_allocations(), (32 - __builtin_clz(PageSize()))); } TEST_P(GuardedPageAllocatorParamTest, AllocDeallocAllPages) { size_t num_pages = GetParam(); - char *bufs[kMaxGpaPages]; + char* bufs[kMaxGpaPages]; for (size_t i = 0; i < num_pages; i++) { - bufs[i] = reinterpret_cast(gpa_.Allocate(1, 0)); + auto alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, + Profile::Sample::GuardedStatus::Guarded); + bufs[i] = reinterpret_cast(alloc_with_status.alloc); EXPECT_NE(bufs[i], nullptr); EXPECT_TRUE(gpa_.PointerIsMine(bufs[i])); } - EXPECT_EQ(gpa_.Allocate(1, 0), nullptr); + EXPECT_EQ(gpa_.successful_allocations(), num_pages); + auto alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, + Profile::Sample::GuardedStatus::NoAvailableSlots); + EXPECT_EQ(alloc_with_status.alloc, nullptr); gpa_.Deallocate(bufs[0]); - bufs[0] = reinterpret_cast(gpa_.Allocate(1, 0)); + alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, Profile::Sample::GuardedStatus::Guarded); + bufs[0] = reinterpret_cast(alloc_with_status.alloc); EXPECT_NE(bufs[0], nullptr); EXPECT_TRUE(gpa_.PointerIsMine(bufs[0])); + EXPECT_EQ(gpa_.successful_allocations(), num_pages + 1); for (size_t i = 0; i < num_pages; i++) { bufs[i][0] = 'A'; gpa_.Deallocate(bufs[i]); @@ -154,9 +180,12 @@ INSTANTIATE_TEST_SUITE_P(VaryNumPages, GuardedPageAllocatorParamTest, testing::Values(1, kMaxGpaPages / 2, kMaxGpaPages)); TEST_F(GuardedPageAllocatorTest, PointerIsMine) { - void *buf = gpa_.Allocate(1, 0); + auto alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, Profile::Sample::GuardedStatus::Guarded); + EXPECT_EQ(gpa_.successful_allocations(), 1); + void* buf = alloc_with_status.alloc; int stack_var; - auto malloc_ptr = absl::make_unique(); + auto malloc_ptr = std::make_unique(); EXPECT_TRUE(gpa_.PointerIsMine(buf)); EXPECT_FALSE(gpa_.PointerIsMine(&stack_var)); EXPECT_FALSE(gpa_.PointerIsMine(malloc_ptr.get())); @@ -165,7 +194,7 @@ TEST_F(GuardedPageAllocatorTest, PointerIsMine) { TEST_F(GuardedPageAllocatorTest, Print) { char buf[1024] = {}; Printer out(buf, sizeof(buf)); - gpa_.Print(&out); + gpa_.Print(out); EXPECT_THAT(buf, testing::ContainsRegex("GWP-ASan Status")); } @@ -173,23 +202,23 @@ TEST_F(GuardedPageAllocatorTest, Print) { // extra pages are allocated when there's concurrent calls to Allocate(). TEST_F(GuardedPageAllocatorTest, ThreadedAllocCount) { constexpr size_t kNumThreads = 2; - void *allocations[kNumThreads][kMaxGpaPages]; + void* allocations[kNumThreads][kMaxGpaPages]; { std::vector threads; threads.reserve(kNumThreads); for (size_t i = 0; i < kNumThreads; i++) { threads.push_back(std::thread([this, &allocations, i]() { for (size_t j = 0; j < kMaxGpaPages; j++) { - allocations[i][j] = gpa_.Allocate(1, 0); + allocations[i][j] = gpa_.Allocate(1, 0, GetStackTrace()).alloc; } })); } - for (auto &t : threads) { + for (auto& t : threads) { t.join(); } } - absl::flat_hash_set allocations_set; + absl::flat_hash_set allocations_set; for (size_t i = 0; i < kNumThreads; i++) { for (size_t j = 0; j < kMaxGpaPages; j++) { allocations_set.insert(allocations[i][j]); @@ -197,20 +226,27 @@ TEST_F(GuardedPageAllocatorTest, ThreadedAllocCount) { } allocations_set.erase(nullptr); EXPECT_EQ(allocations_set.size(), kMaxGpaPages); + EXPECT_EQ(gpa_.successful_allocations(), kMaxGpaPages); } // Test that allocator remains in consistent state under high contention and // doesn't double-allocate pages or fail to deallocate pages. TEST_F(GuardedPageAllocatorTest, ThreadedHighContention) { - const size_t kNumThreads = 4 * absl::base_internal::NumCPUs(); + const size_t kNumThreads = 4 * NumCPUs(); { std::vector threads; threads.reserve(kNumThreads); for (size_t i = 0; i < kNumThreads; i++) { threads.push_back(std::thread([this]() { - char *buf; - while ((buf = reinterpret_cast(gpa_.Allocate(1, 0))) == - nullptr) { + char* buf; + while (true) { + auto alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + if (alloc_with_status.status == + Profile::Sample::GuardedStatus::Guarded) { + buf = reinterpret_cast(alloc_with_status.alloc); + EXPECT_NE(buf, nullptr); + break; + } absl::SleepFor(absl::Nanoseconds(5000)); } @@ -228,16 +264,119 @@ TEST_F(GuardedPageAllocatorTest, ThreadedHighContention) { })); } - for (auto &t : threads) { + for (auto& t : threads) { t.join(); } } // Verify all pages have been deallocated now that all threads are done. for (size_t i = 0; i < kMaxGpaPages; i++) { - EXPECT_NE(gpa_.Allocate(1, 0), nullptr); + auto alloc_with_status = gpa_.Allocate(1, 0, GetStackTrace()); + EXPECT_EQ(alloc_with_status.status, + Profile::Sample::GuardedStatus::Guarded); + EXPECT_NE(alloc_with_status.alloc, nullptr); + } +} + +class SampledAllocationWithFilterTest + : public GuardedPageAllocatorTest, + public testing::WithParamInterface> { + protected: + void SetUp() override { +#ifndef __cpp_sized_deallocation + GTEST_SKIP() << "requires sized delete support"; +#endif + // Sanitizers override malloc/free with their own. +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + GTEST_SKIP() << "skipping tests on sanitizers"; +#endif } +}; + +TEST_P(SampledAllocationWithFilterTest, MismatchedSizeDelete) { + constexpr int kIter = 1000000; + const auto& filter = GetParam(); + + for (int i = 0; i < kIter; ++i) { + auto deleter = [](void* ptr) { ::operator delete(ptr); }; + std::unique_ptr ptr(::operator new(1000), deleter); + if (!filter(ptr.get())) continue; + ASSERT_TRUE(!IsNormalMemory(ptr.get())); + + EXPECT_DEATH(sized_delete(ptr.get(), 2000), + "Mismatched-size-delete.*mismatched-sized-delete.md.*of 2000 " + "bytes \\(expected 1000 bytes\\) at"); + + return; + } + + GTEST_SKIP() << "can't get a sampled allocation, giving up"; } +TEST_P(SampledAllocationWithFilterTest, MismatchedSizeDeleteZero) { + constexpr int kIter = 1000000; + const auto& filter = GetParam(); + + for (int i = 0; i < kIter; ++i) { + auto deleter = [](void* ptr) { ::operator delete(ptr); }; + std::unique_ptr ptr(::operator new(1000), deleter); + if (!filter(ptr.get())) continue; + ASSERT_TRUE(!IsNormalMemory(ptr.get())); + + EXPECT_DEATH(sized_delete(ptr.get(), 0), + "Mismatched-size-delete.*mismatched-sized-delete.md.*of 0 " + "bytes \\(expected 1000 bytes\\) at"); + + return; + } + + GTEST_SKIP() << "can't get a sampled allocation, giving up"; +} + +TEST_P(SampledAllocationWithFilterTest, SizedNewMismatchedSizeDelete) { + constexpr int kIter = 1000000; + const auto& filter = GetParam(); + + for (int i = 0; i < kIter; ++i) { + auto sized_ptr = tcmalloc_size_returning_operator_new(1000); + auto deleter = [](void* ptr) { ::operator delete(ptr); }; + std::unique_ptr ptr(sized_ptr.p, deleter); + if (!filter(ptr.get())) continue; + ASSERT_TRUE(!IsNormalMemory(ptr.get())); + + if (tc_globals.guardedpage_allocator().PointerIsMine(ptr.get())) + EXPECT_DEATH( // Guarded page allocation will return exactly as requested + sized_delete(ptr.get(), 2000), + "Mismatched-size-delete.*mismatched-sized-delete.md.*of 2000 bytes " + "\\(expected 1000 bytes\\) at"); + else + EXPECT_DEATH(sized_delete(ptr.get(), 2000), + "Mismatched-size-delete.*mismatched-sized-delete.md.*of " + "2000 bytes \\(expected 1000 - " + "1024 bytes\\) at"); + + return; + } + + GTEST_SKIP() << "can't get a sampled allocation, giving up"; +} + +INSTANTIATE_TEST_SUITE_P( + VaryingSampleCases, SampledAllocationWithFilterTest, + testing::Values( + [](void* ptr) { + // Sampled page-guarded memory + return tc_globals.guardedpage_allocator().PointerIsMine(ptr); + }, + [](void* ptr) { + // Sampled memory only + return !IsNormalMemory(ptr) && + !tc_globals.guardedpage_allocator().PointerIsMine(ptr); + })); + +ABSL_CONST_INIT ABSL_ATTRIBUTE_UNUSED GuardedPageAllocator + gpa_is_constant_initializable; + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc deleted file mode 100644 index 5c2473ffedca..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include - -#include "gtest/gtest.h" -#include "benchmark/benchmark.h" -#include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/parameter_accessors.h" -#include "tcmalloc/malloc_extension.h" -#include "tcmalloc/static_vars.h" - -namespace tcmalloc { -namespace { - -int64_t ProfileSize(ProfileType type) { - int64_t total = 0; - - MallocExtension::SnapshotCurrent(type).Iterate( - [&](const Profile::Sample &e) { total += e.sum; }); - return total; -} - -class ScopedPeakGrowthFraction { - public: - explicit ScopedPeakGrowthFraction(double temporary_value) - : previous_(TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction()) { - TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(temporary_value); - } - - ~ScopedPeakGrowthFraction() { - TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(previous_); - } - - private: - double previous_; -}; - -TEST(HeapProfilingTest, PeakHeapTracking) { - // Adjust high watermark threshold for our scenario, to be independent of - // changes to the default. As we use a random value for choosing our next - // sampling point, we may overweight some allocations above their true size. - ScopedPeakGrowthFraction s(1.25); - - int64_t start_peak_sz = ProfileSize(ProfileType::kPeakHeap); - - // make a large allocation to force a new peak heap sample - // (total live: 50MiB) - void *first = ::operator new(50 << 20); - // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. - benchmark::DoNotOptimize(first); - int64_t peak_after_first = ProfileSize(ProfileType::kPeakHeap); - EXPECT_NEAR(peak_after_first, start_peak_sz + (50 << 20), 10 << 20); - - // a small allocation shouldn't increase the peak - // (total live: 54MiB) - void *second = ::operator new(4 << 20); - benchmark::DoNotOptimize(second); - int64_t peak_after_second = ProfileSize(ProfileType::kPeakHeap); - EXPECT_EQ(peak_after_second, peak_after_first); - - // but a large one should - // (total live: 254MiB) - void *third = ::operator new(200 << 20); - benchmark::DoNotOptimize(third); - int64_t peak_after_third = ProfileSize(ProfileType::kPeakHeap); - EXPECT_NEAR(peak_after_third, peak_after_second + (200 << 20), 10 << 20); - - // freeing everything shouldn't affect the peak - // (total live: 0MiB) - ::operator delete(first); - EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); - - ::operator delete(second); - EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); - - ::operator delete(third); - EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); - - // going back up less than previous peak shouldn't affect the peak - // (total live: 200MiB) - void *fourth = ::operator new(100 << 20); - benchmark::DoNotOptimize(fourth); - void *fifth = ::operator new(100 << 20); - benchmark::DoNotOptimize(fifth); - EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); - - // passing the old peak significantly, even with many small allocations, - // should generate a new one - // (total live: 200MiB + 256MiB = 456MiB, 80% over the 254MiB peak) - void *bitsy[1 << 10]; - for (int i = 0; i < 1 << 10; i++) { - bitsy[i] = ::operator new(1 << 18); - benchmark::DoNotOptimize(bitsy[i]); - } - EXPECT_GT(ProfileSize(ProfileType::kPeakHeap), peak_after_third); - - ::operator delete(fourth); - ::operator delete(fifth); - for (int i = 0; i < 1 << 10; i++) { - ::operator delete(bitsy[i]); - } -} - -} // namespace -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/hinted_tracker_lists.h b/contrib/libs/tcmalloc/tcmalloc/hinted_tracker_lists.h new file mode 100644 index 000000000000..9df21ec83712 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/hinted_tracker_lists.h @@ -0,0 +1,131 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HINTED_TRACKER_LISTS_H_ +#define TCMALLOC_HINTED_TRACKER_LISTS_H_ + +#include + +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/range_tracker.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// This class wraps an array of N TrackerLists and a Bitmap storing which +// elements are non-empty. +template +class HintedTrackerLists { + public: + using TrackerList = TList; + + constexpr HintedTrackerLists() : size_{} {} + + // Removes a TrackerType from the first non-empty freelist with index at + // least n and returns it. Returns nullptr if there is none. + TrackerType* GetLeast(const size_t n) { + TC_ASSERT_LT(n, N); + size_t i = nonempty_.FindSet(n); + if (i == N) { + return nullptr; + } + TC_ASSERT(!lists_[i].empty()); + TrackerType* pt = lists_[i].first(); + if (lists_[i].remove(pt)) { + nonempty_.ClearBit(i); + } + --size_; + return pt; + } + + // Returns a pointer to the TrackerType from the first non-empty freelist with + // index at least n and returns it. Returns nullptr if there is none. + // + // Unlike GetLeast, this does not remove the pointer from the list when it is + // found. + TrackerType* PeekLeast(const size_t n) { + TC_ASSERT_LT(n, N); + size_t i = nonempty_.FindSet(n); + if (i == N) { + return nullptr; + } + TC_ASSERT(!lists_[i].empty()); + return lists_[i].first(); + } + + // Adds pointer to the nonempty_[i] list. + // REQUIRES: i < N && pt != nullptr. + void Add(TrackerType* pt, const size_t i) { + TC_ASSERT_LT(i, N); + TC_ASSERT_NE(pt, nullptr); + lists_[i].prepend(pt); + ++size_; + nonempty_.SetBit(i); + } + + // Removes pointer from the nonempty_[i] list. + // REQUIRES: i < N && pt != nullptr. + void Remove(TrackerType* pt, const size_t i) { + TC_ASSERT_LT(i, N); + TC_ASSERT_NE(pt, nullptr); + if (lists_[i].remove(pt)) { + nonempty_.ClearBit(i); + } + --size_; + } + const TrackerList& operator[](const size_t n) const { + TC_ASSERT_LT(n, N); + return lists_[n]; + } + size_t size() const { return size_; } + bool empty() const { return size_ == 0; } + + // Returns length of the list at an index . + // REQUIRES: n < N. + size_t SizeOfList(const size_t n) const { + TC_ASSERT_LT(n, N); + return lists_[n].length(); + } + // Runs a functor on all pointers in the TrackerLists. + // This method is const but the Functor gets passed a non-const pointer. + // This quirk is inherited from TrackerList. + template + void Iter(const Functor& func, size_t start) const { + size_t i = nonempty_.FindSet(start); + while (i < N) { + auto& list = lists_[i]; + TC_ASSERT(!list.empty()); + for (TrackerType* pt : list) { + func(*pt); + } + i++; + if (i < N) i = nonempty_.FindSet(i); + } + } + + private: + TrackerList lists_[N]; + size_t size_; + Bitmap nonempty_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_HINTED_TRACKER_LISTS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc index 898c6d934a83..a979e2f9387c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc @@ -14,27 +14,29 @@ #include "tcmalloc/huge_address_map.h" -#include - #include -#include +#include +#include #include "absl/base/internal/cycleclock.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/exponential_biased.h" #include "tcmalloc/internal/logging.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -const HugeAddressMap::Node *HugeAddressMap::Node::next() const { - const Node *n = right_; +const HugeAddressMap::Node* HugeAddressMap::Node::next() const { + const Node* n = right_; if (n) { while (n->left_) n = n->left_; return n; } n = parent_; - const Node *last = this; + const Node* last = this; while (n) { if (n->left_ == last) return n; last = n; @@ -44,49 +46,49 @@ const HugeAddressMap::Node *HugeAddressMap::Node::next() const { return nullptr; } -HugeAddressMap::Node *HugeAddressMap::Node::next() { - const Node *n = static_cast(this)->next(); - return const_cast(n); +HugeAddressMap::Node* HugeAddressMap::Node::next() { + const Node* n = static_cast(this)->next(); + return const_cast(n); } -void HugeAddressMap::Node::Check(size_t *num_nodes, HugeLength *size) const { +void HugeAddressMap::Node::Check(size_t* num_nodes, HugeLength* size) const { HugeLength longest = range_.len(); *num_nodes += 1; *size += range_.len(); if (left_) { // tree - CHECK_CONDITION(left_->range_.start() < range_.start()); + TC_CHECK_LT(left_->range_.start(), range_.start()); // disjoint - CHECK_CONDITION(left_->range_.end_addr() < range_.start_addr()); + TC_CHECK_LT(left_->range_.end_addr(), range_.start_addr()); // well-formed - CHECK_CONDITION(left_->parent_ == this); + TC_CHECK_EQ(left_->parent_, this); // heap - CHECK_CONDITION(left_->prio_ <= prio_); + TC_CHECK_LE(left_->prio_, prio_); left_->Check(num_nodes, size); if (left_->longest_ > longest) longest = left_->longest_; } if (right_) { // tree - CHECK_CONDITION(right_->range_.start() > range_.start()); + TC_CHECK_GT(right_->range_.start(), range_.start()); // disjoint - CHECK_CONDITION(right_->range_.start_addr() > range_.end_addr()); + TC_CHECK_GT(right_->range_.start_addr(), range_.end_addr()); // well-formed - CHECK_CONDITION(right_->parent_ == this); + TC_CHECK_EQ(right_->parent_, this); // heap - CHECK_CONDITION(right_->prio_ <= prio_); + TC_CHECK_LE(right_->prio_, prio_); right_->Check(num_nodes, size); if (right_->longest_ > longest) longest = right_->longest_; } - CHECK_CONDITION(longest_ == longest); + TC_CHECK_EQ(longest_, longest); } -const HugeAddressMap::Node *HugeAddressMap::first() const { - const Node *n = root(); +const HugeAddressMap::Node* HugeAddressMap::first() const { + const Node* n = root(); if (!n) return nullptr; - const Node *left = n->left_; + const Node* left = n->left_; while (left) { n = left; left = n->left_; @@ -95,44 +97,44 @@ const HugeAddressMap::Node *HugeAddressMap::first() const { return n; } -HugeAddressMap::Node *HugeAddressMap::first() { - const Node *f = static_cast(this)->first(); - return const_cast(f); +HugeAddressMap::Node* HugeAddressMap::first() { + const Node* f = static_cast(this)->first(); + return const_cast(f); } void HugeAddressMap::Check() { size_t nodes = 0; HugeLength size = NHugePages(0); if (root_) { - CHECK_CONDITION(root_->parent_ == nullptr); + TC_CHECK_EQ(root_->parent_, nullptr); root_->Check(&nodes, &size); } - CHECK_CONDITION(nodes == nranges()); - CHECK_CONDITION(size == total_mapped()); - CHECK_CONDITION(total_nodes_ == used_nodes_ + freelist_size_); + TC_CHECK_EQ(nodes, nranges()); + TC_CHECK_EQ(size, total_mapped()); + TC_CHECK_EQ(total_nodes_, used_nodes_ + freelist_size_); } size_t HugeAddressMap::nranges() const { return used_nodes_; } HugeLength HugeAddressMap::total_mapped() const { return total_size_; } -void HugeAddressMap::Print(Printer *out) const { - out->printf("HugeAddressMap: treap %zu / %zu nodes used / created\n", - used_nodes_, total_nodes_); +void HugeAddressMap::Print(Printer& out) const { + out.printf("HugeAddressMap: treap %zu / %zu nodes used / created\n", + used_nodes_, total_nodes_); const size_t longest = root_ ? root_->longest_.raw_num() : 0; - out->printf("HugeAddressMap: %zu contiguous hugepages available\n", longest); + out.printf("HugeAddressMap: %zu contiguous hugepages available\n", longest); } -void HugeAddressMap::PrintInPbtxt(PbtxtRegion *hpaa) const { - hpaa->PrintI64("num_huge_address_map_treap_nodes_used", used_nodes_); - hpaa->PrintI64("num_huge_address_map_treap_nodes_created", total_nodes_); +void HugeAddressMap::PrintInPbtxt(PbtxtRegion& hpaa) const { + hpaa.PrintI64("num_huge_address_map_treap_nodes_used", used_nodes_); + hpaa.PrintI64("num_huge_address_map_treap_nodes_created", total_nodes_); const size_t longest = root_ ? root_->longest_.in_bytes() : 0; - hpaa->PrintI64("contiguous_free_bytes", longest); + hpaa.PrintI64("contiguous_free_bytes", longest); } -HugeAddressMap::Node *HugeAddressMap::Predecessor(HugePage p) { - Node *n = root(); - Node *best = nullptr; +HugeAddressMap::Node* HugeAddressMap::Predecessor(HugePage p) { + Node* n = root(); + Node* best = nullptr; while (n) { HugeRange here = n->range_; if (here.contains(p)) return n; @@ -151,7 +153,7 @@ HugeAddressMap::Node *HugeAddressMap::Predecessor(HugePage p) { return best; } -void HugeAddressMap::Merge(Node *b, HugeRange r, Node *a) { +void HugeAddressMap::Merge(Node* b, HugeRange r, Node* a) { auto merge_when = [](HugeRange x, int64_t x_when, HugeRange y, int64_t y_when) { // avoid overflow with floating-point @@ -195,10 +197,10 @@ void HugeAddressMap::Insert(HugeRange r) { total_size_ += r.len(); // First, try to merge if necessary. Note there are three possibilities: // we might need to merge before with r, r with after, or all three together. - Node *before = Predecessor(r.start()); - CHECK_CONDITION(!before || !before->range_.intersects(r)); - Node *after = before ? before->next() : first(); - CHECK_CONDITION(!after || !after->range_.intersects(r)); + Node* before = Predecessor(r.start()); + TC_CHECK(!before || !before->range_.intersects(r)); + Node* after = before ? before->next() : first(); + TC_CHECK(!after || !after->range_.intersects(r)); if (before && before->range_.precedes(r)) { if (after && r.precedes(after->range_)) { Merge(before, r, after); @@ -210,13 +212,13 @@ void HugeAddressMap::Insert(HugeRange r) { Merge(nullptr, r, after); return; } - CHECK_CONDITION(!before || !before->range_.precedes(r)); - CHECK_CONDITION(!after || !r.precedes(after->range_)); + TC_CHECK(!before || !before->range_.precedes(r)); + TC_CHECK(!after || !r.precedes(after->range_)); // No merging possible; just add a new node. - Node *n = Get(r); - Node *curr = root(); - Node *parent = nullptr; - Node **link = &root_; + Node* n = Get(r); + Node* curr = root(); + Node* parent = nullptr; + Node** link = &root_; // Walk down the tree to our correct location while (curr != nullptr && curr->prio_ >= n->prio_) { curr->longest_ = std::max(curr->longest_, r.len()); @@ -238,10 +240,10 @@ void HugeAddressMap::Insert(HugeRange r) { // We need to split the treap at curr into n's children. // This will be two treaps: one less than p, one greater, and has // a nice recursive structure. - Node **less = &n->left_; - Node *lp = n; - Node **more = &n->right_; - Node *mp = n; + Node** less = &n->left_; + Node* lp = n; + Node** more = &n->right_; + Node* mp = n; while (curr) { if (curr->range_.start() < p) { *less = curr; @@ -272,21 +274,21 @@ void HugeAddressMap::Node::FixLongest() { longest_ = new_longest; } -void HugeAddressMap::FixLongest(HugeAddressMap::Node *n) { +void HugeAddressMap::FixLongest(HugeAddressMap::Node* n) { while (n) { n->FixLongest(); n = n->parent_; } } -void HugeAddressMap::Remove(HugeAddressMap::Node *n) { +void HugeAddressMap::Remove(HugeAddressMap::Node* n) { total_size_ -= n->range_.len(); // We need to merge the left and right children of n into one // treap, then glue it into place wherever n was. - Node **link; - Node *parent = n->parent_; - Node *top = n->left_; - Node *bottom = n->right_; + Node** link; + Node* parent = n->parent_; + Node* top = n->left_; + Node* bottom = n->right_; const HugeLength child_longest = std::max(top ? top->longest_ : NHugePages(0), @@ -308,7 +310,7 @@ void HugeAddressMap::Remove(HugeAddressMap::Node *n) { // A routine op we'll need a lot: given two (possibly null) // children, put the root-ier one into top. - auto reorder_maybe = [](Node **top, Node **bottom) { + auto reorder_maybe = [](Node** top, Node** bottom) { Node *b = *bottom, *t = *top; if (b && (!t || t->prio_ < b->prio_)) { *bottom = t; @@ -343,25 +345,26 @@ void HugeAddressMap::Remove(HugeAddressMap::Node *n) { Put(n); } -void HugeAddressMap::Put(Node *n) { +void HugeAddressMap::Put(Node* n) { freelist_size_++; used_nodes_--; n->left_ = freelist_; freelist_ = n; } -HugeAddressMap::Node *HugeAddressMap::Get(HugeRange r) { - CHECK_CONDITION((freelist_ == nullptr) == (freelist_size_ == 0)); +HugeAddressMap::Node* HugeAddressMap::Get(HugeRange r) { + TC_CHECK_EQ(freelist_ == nullptr, freelist_size_ == 0); used_nodes_++; - int prio = rand_r(&seed_); + seed_ = ExponentialBiased::NextRandom(seed_); + int prio = ExponentialBiased::GetRandom(seed_); if (freelist_size_ == 0) { total_nodes_++; - Node *ret = reinterpret_cast(meta_(sizeof(Node))); + Node* ret = reinterpret_cast(meta_(sizeof(Node))); return new (ret) Node(r, prio); } freelist_size_--; - Node *ret = freelist_; + Node* ret = freelist_; freelist_ = ret->left_; return new (ret) Node(r, prio); } diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h index 3c71f19a3fcc..7e449f63064c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,8 +18,11 @@ #include #include +#include "absl/base/attributes.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/metadata_allocator.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -31,11 +35,10 @@ namespace tcmalloc_internal { // // This class scales well and is *reasonably* performant, but it is not intended // for use on extremely hot paths. -// TODO(b/134688982): extend to support other range-like types? class HugeAddressMap { public: - typedef void *(*MetadataAllocFunction)(size_t bytes); - explicit constexpr HugeAddressMap(MetadataAllocFunction meta); + explicit constexpr HugeAddressMap( + MetadataAllocator& meta ABSL_ATTRIBUTE_LIFETIME_BOUND); // IMPORTANT: DESTROYING A HUGE ADDRESS MAP DOES NOT MAKE ANY ATTEMPT // AT FREEING ALLOCATED METADATA. @@ -46,11 +49,11 @@ class HugeAddressMap { // the range stored at this point HugeRange range() const; // Tree structure - Node *left(); - Node *right(); + Node* left(); + Node* right(); // Iterate to the next node in address order - const Node *next() const; - Node *next(); + const Node* next() const; + Node* next(); // when were this node's content added (in // absl::base_internal::CycleClock::Now units)? int64_t when() const; @@ -64,28 +67,28 @@ class HugeAddressMap { HugeRange range_; int prio_; // chosen randomly Node *left_, *right_; - Node *parent_; + Node* parent_; HugeLength longest_; int64_t when_; // Expensive, recursive consistency check. // Accumulates node count and range sizes into passed arguments. - void Check(size_t *num_nodes, HugeLength *size) const; + void Check(size_t* num_nodes, HugeLength* size) const; // We've broken longest invariants somehow; fix them here. void FixLongest(); }; // Get root of the tree. - Node *root(); - const Node *root() const; + Node* root(); + const Node* root() const; // Get lowest-addressed node - const Node *first() const; - Node *first(); + const Node* first() const; + Node* first(); // Returns the highest-addressed range that does not lie completely // after p (if any). - Node *Predecessor(HugePage p); + Node* Predecessor(HugePage p); // Expensive consistency check. void Check(); @@ -93,51 +96,51 @@ class HugeAddressMap { // Statistics size_t nranges() const; HugeLength total_mapped() const; - void Print(Printer *out) const; - void PrintInPbtxt(PbtxtRegion *hpaa) const; + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& hpaa) const; // Add to the map, merging with adjacent ranges as needed. void Insert(HugeRange r); // Delete n from the map. - void Remove(Node *n); + void Remove(Node* n); private: // our tree - Node *root_{nullptr}; + Node* root_{nullptr}; size_t used_nodes_{0}; HugeLength total_size_{NHugePages(0)}; // cache of unused nodes - Node *freelist_{nullptr}; + Node* freelist_{nullptr}; size_t freelist_size_{0}; // How we get more - MetadataAllocFunction meta_; - Node *Get(HugeRange r); - void Put(Node *n); + MetadataAllocator& meta_; + Node* Get(HugeRange r); + void Put(Node* n); size_t total_nodes_{0}; - void Merge(Node *b, HugeRange r, Node *a); - void FixLongest(Node *n); + void Merge(Node* b, HugeRange r, Node* a); + void FixLongest(Node* n); // Note that we always use the same seed, currently; this isn't very random. // In practice we're not worried about adversarial input and this works well // enough. - unsigned int seed_{0}; + uint64_t seed_{0}; }; -inline constexpr HugeAddressMap::HugeAddressMap(MetadataAllocFunction meta) +inline constexpr HugeAddressMap::HugeAddressMap(MetadataAllocator& meta) : meta_(meta) {} inline HugeRange HugeAddressMap::Node::range() const { return range_; } -inline HugeAddressMap::Node *HugeAddressMap::Node::left() { return left_; } -inline HugeAddressMap::Node *HugeAddressMap::Node::right() { return right_; } +inline HugeAddressMap::Node* HugeAddressMap::Node::left() { return left_; } +inline HugeAddressMap::Node* HugeAddressMap::Node::right() { return right_; } inline int64_t HugeAddressMap::Node::when() const { return when_; } inline HugeLength HugeAddressMap::Node::longest() const { return longest_; } -inline HugeAddressMap::Node *HugeAddressMap::root() { return root_; } -inline const HugeAddressMap::Node *HugeAddressMap::root() const { +inline HugeAddressMap::Node* HugeAddressMap::root() { return root_; } +inline const HugeAddressMap::Node* HugeAddressMap::root() const { return root_; } diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc index 455cd6380964..c4ec9f810e51 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc @@ -14,12 +14,14 @@ #include "tcmalloc/huge_address_map.h" +#include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "tcmalloc/mock_metadata_allocator.h" namespace tcmalloc { namespace tcmalloc_internal { @@ -27,13 +29,7 @@ namespace { class HugeAddressMapTest : public ::testing::Test { protected: - HugeAddressMapTest() : map_(MallocMetadata) { metadata_allocs_.clear(); } - - ~HugeAddressMapTest() override { - for (void* p : metadata_allocs_) { - free(p); - } - } + HugeAddressMapTest() : map_(malloc_metadata_) {} std::vector Contents() { std::vector ret; @@ -52,17 +48,9 @@ class HugeAddressMapTest : public ::testing::Test { HugeAddressMap map_; private: - static void* MallocMetadata(size_t size) { - void* ptr = malloc(size); - metadata_allocs_.push_back(ptr); - return ptr; - } - - static std::vector metadata_allocs_; + FakeMetadataAllocator malloc_metadata_; }; -std::vector HugeAddressMapTest::metadata_allocs_; - // This test verifies that HugeAddressMap merges properly. TEST_F(HugeAddressMapTest, Merging) { const HugeRange r1 = HugeRange::Make(hp(0), hl(1)); diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc index c77f4522ad54..d7d090c281bc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc @@ -17,34 +17,36 @@ #include #include "tcmalloc/huge_address_map.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -void HugeAllocator::Print(Printer *out) { - out->printf("HugeAllocator: contiguous, unbacked hugepage(s)\n"); +void HugeAllocator::Print(Printer& out) { + out.printf("HugeAllocator: contiguous, unbacked hugepage(s)\n"); free_.Print(out); - out->printf( - "HugeAllocator: %zu requested - %zu in use = %zu hugepages free\n", - from_system_.raw_num(), in_use_.raw_num(), - (from_system_ - in_use_).raw_num()); + out.printf("HugeAllocator: %zu requested - %zu in use = %zu hugepages free\n", + from_system_.raw_num(), in_use_.raw_num(), + (from_system_ - in_use_).raw_num()); } -void HugeAllocator::PrintInPbtxt(PbtxtRegion *hpaa) const { +void HugeAllocator::PrintInPbtxt(PbtxtRegion& hpaa) const { free_.PrintInPbtxt(hpaa); - hpaa->PrintI64("num_total_requested_huge_pages", from_system_.raw_num()); - hpaa->PrintI64("num_in_use_huge_pages", in_use_.raw_num()); + hpaa.PrintI64("num_total_requested_huge_pages", from_system_.raw_num()); + hpaa.PrintI64("num_in_use_huge_pages", in_use_.raw_num()); } -HugeAddressMap::Node *HugeAllocator::Find(HugeLength n) { - HugeAddressMap::Node *curr = free_.root(); +HugeAddressMap::Node* HugeAllocator::Find(HugeLength n) { + HugeAddressMap::Node* curr = free_.root(); // invariant: curr != nullptr && curr->longest >= n // we favor smaller gaps and lower nodes and lower addresses, in that // order. The net effect is that we are neither a best-fit nor a // lowest-address allocator but vaguely close to both. - HugeAddressMap::Node *best = nullptr; + HugeAddressMap::Node* best = nullptr; while (curr && curr->longest() >= n) { if (curr->range().len() >= n) { if (!best || best->range().len() > curr->range().len()) { @@ -90,34 +92,33 @@ void HugeAllocator::CheckFreelist() { size_t num_nodes = free_.nranges(); HugeLength n = free_.total_mapped(); free_.Check(); - CHECK_CONDITION(n == from_system_ - in_use_); + TC_CHECK_EQ(n, from_system_ - in_use_); LargeSpanStats large; - AddSpanStats(nullptr, &large, nullptr); - CHECK_CONDITION(num_nodes == large.spans); - CHECK_CONDITION(n.in_pages() == large.returned_pages); + AddSpanStats(nullptr, &large); + TC_CHECK_EQ(num_nodes, large.spans); + TC_CHECK_EQ(n.in_pages(), large.returned_pages); } HugeRange HugeAllocator::AllocateRange(HugeLength n) { if (n.overflows()) return HugeRange::Nil(); - size_t actual; size_t bytes = n.in_bytes(); size_t align = kHugePageSize; - void *ptr = allocate_(bytes, &actual, align); + auto [ptr, actual] = allocate_(bytes, align); if (ptr == nullptr) { // OOM... return HugeRange::Nil(); } - CHECK_CONDITION(ptr != nullptr); + TC_CHECK_NE(ptr, nullptr); // It's possible for a request to return extra hugepages. - CHECK_CONDITION(actual % kHugePageSize == 0); + TC_CHECK_EQ(actual % kHugePageSize, 0); n = HLFromBytes(actual); from_system_ += n; return HugeRange::Make(HugePageContaining(ptr), n); } HugeRange HugeAllocator::Get(HugeLength n) { - CHECK_CONDITION(n > NHugePages(0)); - auto *node = Find(n); + TC_CHECK_GT(n, NHugePages(0)); + auto* node = Find(n); if (!node) { // Get more memory, then "delete" it HugeRange r = AllocateRange(n); @@ -125,7 +126,7 @@ HugeRange HugeAllocator::Get(HugeLength n) { in_use_ += r.len(); Release(r); node = Find(n); - CHECK_CONDITION(node != nullptr); + TC_CHECK_NE(node, nullptr); } in_use_ += n; @@ -135,8 +136,8 @@ HugeRange HugeAllocator::Get(HugeLength n) { HugeLength before = r.len(); HugeRange extra = HugeRange::Make(r.start() + n, before - n); r = HugeRange::Make(r.start(), n); - ASSERT(r.precedes(extra)); - ASSERT(r.len() + extra.len() == before); + TC_ASSERT(r.precedes(extra)); + TC_ASSERT_EQ(r.len() + extra.len(), before); in_use_ += extra.len(); Release(extra); } else { @@ -154,19 +155,15 @@ void HugeAllocator::Release(HugeRange r) { DebugCheckFreelist(); } -void HugeAllocator::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const { - for (const HugeAddressMap::Node *node = free_.first(); node != nullptr; +void HugeAllocator::AddSpanStats(SmallSpanStats* small, + LargeSpanStats* large) const { + for (const HugeAddressMap::Node* node = free_.first(); node != nullptr; node = node->next()) { HugeLength n = node->range().len(); if (large != nullptr) { large->spans++; large->returned_pages += n.in_pages(); } - - if (ages != nullptr) { - ages->RecordRange(n.in_pages(), true, node->when()); - } } } diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h index 6242805c49ac..baa4499d2b8c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,27 +20,42 @@ #include -#include "tcmalloc/common.h" +#include "absl/base/attributes.h" #include "tcmalloc/huge_address_map.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/metadata_allocator.h" #include "tcmalloc/stats.h" +#include "tcmalloc/system-alloc.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { // these typedefs allow replacement of tcmalloc::System* for tests. -typedef void *(*MemoryAllocFunction)(size_t bytes, size_t *actual, - size_t align); -typedef void *(*MetadataAllocFunction)(size_t bytes); +class VirtualAllocator { + public: + VirtualAllocator() = default; + virtual ~VirtualAllocator() = default; + + VirtualAllocator(const VirtualAllocator&) = delete; + VirtualAllocator(VirtualAllocator&&) = delete; + VirtualAllocator& operator=(const VirtualAllocator&) = delete; + VirtualAllocator& operator=(VirtualAllocator&&) = delete; + + // Allocates bytes of virtual address space with align alignment. + [[nodiscard]] virtual AddressRange operator()(size_t bytes, size_t align) = 0; +}; // This tracks available ranges of hugepages and fulfills requests for // usable memory, allocating more from the system as needed. All // hugepages are treated as (and assumed to be) unbacked. class HugeAllocator { public: - constexpr HugeAllocator(MemoryAllocFunction allocate, - MetadataAllocFunction meta_allocate) + constexpr HugeAllocator( + VirtualAllocator& allocate ABSL_ATTRIBUTE_LIFETIME_BOUND, + MetadataAllocator& meta_allocate ABSL_ATTRIBUTE_LIFETIME_BOUND) : free_(meta_allocate), allocate_(allocate) {} // Obtain a range of n unbacked hugepages, distinct from all other @@ -57,8 +73,7 @@ class HugeAllocator { // Unused memory in the allocator. HugeLength size() const { return from_system_ - in_use_; } - void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; BackingStats stats() const { BackingStats s; @@ -68,8 +83,8 @@ class HugeAllocator { return s; } - void Print(Printer *out); - void PrintInPbtxt(PbtxtRegion *hpaa) const; + void Print(Printer& out); + void PrintInPbtxt(PbtxtRegion& hpaa) const; private: // We're constrained in several ways by existing code. Hard requirements: @@ -85,7 +100,7 @@ class HugeAllocator { // don't matter, and most of the simple ideas can't hit all of the above // requirements. HugeAddressMap free_; - HugeAddressMap::Node *Find(HugeLength n); + HugeAddressMap::Node* Find(HugeLength n); void CheckFreelist(); void DebugCheckFreelist() { @@ -97,7 +112,7 @@ class HugeAllocator { HugeLength from_system_{NHugePages(0)}; HugeLength in_use_{NHugePages(0)}; - MemoryAllocFunction allocate_; + VirtualAllocator& allocate_; HugeRange AllocateRange(HugeLength n); }; diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc index 150075b88e32..a26c4b50e85f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc @@ -14,12 +14,12 @@ #include "tcmalloc/huge_allocator.h" +#include #include #include -#include #include -#include +#include #include #include @@ -29,54 +29,34 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/mock_metadata_allocator.h" +#include "tcmalloc/mock_virtual_allocator.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { class HugeAllocatorTest : public testing::TestWithParam { - private: - // Use a tiny fraction of actual size so we can test aggressively. - static void *AllocateFake(size_t bytes, size_t *actual, size_t align); - - static constexpr size_t kMaxBacking = 1024 * 1024; - // This isn't super good form but we'll never have more than one HAT - // extant at once. - static std::vector backing_; - - // We use actual malloc for metadata allocations, but we track them so they - // can be deleted. - static void *MallocMetadata(size_t size); - static std::vector metadata_allocs_; - static size_t metadata_bytes_; - static bool should_overallocate_; - static HugeLength huge_pages_requested_; - static HugeLength huge_pages_received_; - protected: - HugeLength HugePagesRequested() { return huge_pages_requested_; } - HugeLength HugePagesReceived() { return huge_pages_received_; } + HugeLength HugePagesRequested() { + return vm_allocator_.huge_pages_requested_; + } + HugeLength HugePagesReceived() { return vm_allocator_.huge_pages_received_; } HugeAllocatorTest() { - should_overallocate_ = GetParam(); - huge_pages_requested_ = NHugePages(0); - huge_pages_received_ = NHugePages(0); + vm_allocator_.should_overallocate_ = GetParam(); + vm_allocator_.huge_pages_requested_ = NHugePages(0); + vm_allocator_.huge_pages_received_ = NHugePages(0); // We don't use the first few bytes, because things might get weird // given zero pointers. - backing_.resize(1024); - metadata_bytes_ = 0; + vm_allocator_.backing_.resize(1024); } - ~HugeAllocatorTest() override { - for (void *p : metadata_allocs_) { - free(p); - } - metadata_allocs_.clear(); - backing_.clear(); - } + ~HugeAllocatorTest() override { vm_allocator_.backing_.clear(); } - size_t *GetActual(HugePage p) { return &backing_[p.index()]; } + size_t* GetActual(HugePage p) { return &vm_allocator_.backing_[p.index()]; } // We're dealing with a lot of memory, so we don't want to do full memset // and then check every byte for corruption. So set the first and last @@ -100,49 +80,11 @@ class HugeAllocatorTest : public testing::TestWithParam { EXPECT_EQ(used, expected_use); } - HugeAllocator allocator_{AllocateFake, MallocMetadata}; + FakeVirtualAllocator vm_allocator_; + FakeMetadataAllocator metadata_allocator_; + HugeAllocator allocator_{vm_allocator_, metadata_allocator_}; }; -// Use a tiny fraction of actual size so we can test aggressively. -void *HugeAllocatorTest::AllocateFake(size_t bytes, size_t *actual, - size_t align) { - CHECK_CONDITION(bytes % kHugePageSize == 0); - CHECK_CONDITION(align % kHugePageSize == 0); - HugeLength req = HLFromBytes(bytes); - huge_pages_requested_ += req; - // Test the case where our sys allocator provides too much. - if (should_overallocate_) ++req; - huge_pages_received_ += req; - *actual = req.in_bytes(); - // we'll actually provide hidden backing, one word per hugepage. - bytes = req / NHugePages(1); - align /= kHugePageSize; - size_t index = backing_.size(); - if (index % align != 0) { - index += (align - (index & align)); - } - if (index + bytes > kMaxBacking) return nullptr; - backing_.resize(index + bytes); - void *ptr = reinterpret_cast(index * kHugePageSize); - return ptr; -} - -// We use actual malloc for metadata allocations, but we track them so they -// can be deleted. -void *HugeAllocatorTest::MallocMetadata(size_t size) { - metadata_bytes_ += size; - void *ptr = malloc(size); - metadata_allocs_.push_back(ptr); - return ptr; -} - -std::vector HugeAllocatorTest::backing_; -std::vector HugeAllocatorTest::metadata_allocs_; -size_t HugeAllocatorTest::metadata_bytes_; -bool HugeAllocatorTest::should_overallocate_; -HugeLength HugeAllocatorTest::huge_pages_requested_; -HugeLength HugeAllocatorTest::huge_pages_received_; - TEST_P(HugeAllocatorTest, Basic) { std::vector> allocs; absl::BitGen rng; @@ -340,12 +282,11 @@ TEST_P(HugeAllocatorTest, Frugal) { TEST_P(HugeAllocatorTest, Stats) { struct Helper { - static void Stats(const HugeAllocator *huge, size_t *num_spans, - Length *pages, absl::Duration *avg_age) { + static void Stats(const HugeAllocator* huge, size_t* num_spans, + Length* pages) { SmallSpanStats small; LargeSpanStats large; - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - huge->AddSpanStats(&small, &large, &ages); + huge->AddSpanStats(&small, &large); for (auto i = Length(0); i < kMaxPages; ++i) { EXPECT_EQ(0, small.normal_length[i.raw_num()]); EXPECT_EQ(0, small.returned_length[i.raw_num()]); @@ -353,8 +294,6 @@ TEST_P(HugeAllocatorTest, Stats) { *num_spans = large.spans; EXPECT_EQ(Length(0), large.normal_pages); *pages = large.returned_pages; - const PageAgeHistograms::Histogram *hist = ages.GetTotalHistogram(true); - *avg_age = absl::Seconds(hist->avg_age()); } }; @@ -375,58 +314,31 @@ TEST_P(HugeAllocatorTest, Stats) { size_t num_spans; Length pages; - absl::Duration avg_age; - Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + Helper::Stats(&allocator_, &num_spans, &pages); EXPECT_EQ(0, num_spans); EXPECT_EQ(Length(0), pages); - EXPECT_EQ(absl::ZeroDuration(), avg_age); allocator_.Release(r1); - constexpr absl::Duration kDelay = absl::Milliseconds(500); - absl::SleepFor(kDelay); - Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + Helper::Stats(&allocator_, &num_spans, &pages); EXPECT_EQ(1, num_spans); EXPECT_EQ(NHugePages(1).in_pages(), pages); - // We can only do >= testing, because we might be arbitrarily delayed. - // Since avg_age is computed in floating point, we may have round-off from - // TCMalloc's internal use of absl::base_internal::CycleClock down through - // computing the average age of the spans. kEpsilon allows for a tiny amount - // of slop. - constexpr absl::Duration kEpsilon = absl::Microseconds(200); - EXPECT_LE(kDelay - kEpsilon, avg_age); allocator_.Release(r2); - absl::SleepFor(absl::Milliseconds(250)); - Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + Helper::Stats(&allocator_, &num_spans, &pages); EXPECT_EQ(2, num_spans); EXPECT_EQ(NHugePages(3).in_pages(), pages); - EXPECT_LE( - (absl::Seconds(0.75) * 1 + absl::Seconds(0.25) * 2) / (1 + 2) - kEpsilon, - avg_age); allocator_.Release(r3); - absl::SleepFor(absl::Milliseconds(125)); - Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + Helper::Stats(&allocator_, &num_spans, &pages); EXPECT_EQ(3, num_spans); EXPECT_EQ(NHugePages(6).in_pages(), pages); - EXPECT_LE((absl::Seconds(0.875) * 1 + absl::Seconds(0.375) * 2 + - absl::Seconds(0.125) * 3) / - (1 + 2 + 3) - - kEpsilon, - avg_age); allocator_.Release(b1); allocator_.Release(b2); - absl::SleepFor(absl::Milliseconds(100)); - Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + Helper::Stats(&allocator_, &num_spans, &pages); EXPECT_EQ(1, num_spans); EXPECT_EQ(NHugePages(8).in_pages(), pages); - EXPECT_LE((absl::Seconds(0.975) * 1 + absl::Seconds(0.475) * 2 + - absl::Seconds(0.225) * 3 + absl::Seconds(0.1) * 2) / - (1 + 2 + 3 + 2) - - kEpsilon, - avg_age); } // Make sure we're well-behaved in the presence of OOM (and that we do @@ -440,7 +352,7 @@ TEST_P(HugeAllocatorTest, OOM) { INSTANTIATE_TEST_SUITE_P( NormalOverAlloc, HugeAllocatorTest, testing::Values(false, true), - +[](const testing::TestParamInfo &info) { + +[](const testing::TestParamInfo& info) { return info.param ? "overallocates" : "normal"; }); diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc index 0d25da2983fc..5f1d9d56c519 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc @@ -14,13 +14,20 @@ #include "tcmalloc/huge_cache.h" +#include +#include +#include +#include #include +#include "absl/base/optimization.h" #include "absl/time/time.h" -#include "tcmalloc/common.h" #include "tcmalloc/huge_address_map.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" #include "tcmalloc/stats.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -36,9 +43,9 @@ template HugeLength MinMaxTracker::MaxOverTime(absl::Duration t) const { HugeLength m = NHugePages(0); size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength)); - timeseries_.IterBackwards([&](size_t offset, int64_t ts, - const Extrema &e) { m = std::max(m, e.max); }, - num_epochs); + timeseries_.IterBackwards( + [&](size_t offset, const Extrema& e) { m = std::max(m, e.max); }, + num_epochs); return m; } @@ -46,39 +53,40 @@ template HugeLength MinMaxTracker::MinOverTime(absl::Duration t) const { HugeLength m = kMaxVal; size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength)); - timeseries_.IterBackwards([&](size_t offset, int64_t ts, - const Extrema &e) { m = std::min(m, e.min); }, - num_epochs); + timeseries_.IterBackwards( + [&](size_t offset, const Extrema& e) { m = std::min(m, e.min); }, + num_epochs); return m; } template -void MinMaxTracker::Print(Printer *out) const { +void MinMaxTracker::Print(Printer& out) const { // Prints timestamp:min_pages:max_pages for each window with records. // Timestamp == kEpochs - 1 is the most recent measurement. const int64_t millis = absl::ToInt64Milliseconds(kEpochLength); - out->printf("\nHugeCache: window %lldms * %zu", millis, kEpochs); + out.printf("\nHugeCache: window %lldms * %zu", millis, kEpochs); int written = 0; timeseries_.Iter( - [&](size_t offset, int64_t ts, const Extrema &e) { - if ((written++) % 100 == 0) - out->printf("\nHugeCache: Usage timeseries "); - out->printf("%zu:%zu:%zd,", offset, e.min.raw_num(), e.max.raw_num()); + [&](size_t offset, const Extrema& e) { + if ((written++) % 100 == 0) { + out.printf("\nHugeCache: Usage timeseries "); + } + out.printf("%zu:%zu:%zd,", offset, e.min.raw_num(), e.max.raw_num()); }, timeseries_.kSkipEmptyEntries); - out->printf("\n"); + out.printf("\n"); } template -void MinMaxTracker::PrintInPbtxt(PbtxtRegion *hpaa) const { +void MinMaxTracker::PrintInPbtxt(PbtxtRegion& hpaa) const { // Prints content of each non-empty epoch, from oldest to most recent data - auto huge_cache_history = hpaa->CreateSubRegion("huge_cache_history"); + auto huge_cache_history = hpaa.CreateSubRegion("huge_cache_history"); huge_cache_history.PrintI64("window_ms", absl::ToInt64Milliseconds(kEpochLength)); huge_cache_history.PrintI64("epochs", kEpochs); timeseries_.Iter( - [&](size_t offset, int64_t ts, const Extrema &e) { + [&](size_t offset, const Extrema& e) { auto m = huge_cache_history.CreateSubRegion("measurements"); m.PrintI64("epoch", offset); m.PrintI64("min_bytes", e.min.in_bytes()); @@ -88,7 +96,7 @@ void MinMaxTracker::PrintInPbtxt(PbtxtRegion *hpaa) const { } template -bool MinMaxTracker::Extrema::operator==(const Extrema &other) const { +bool MinMaxTracker::Extrema::operator==(const Extrema& other) const { return (other.max == max) && (other.min == min); } @@ -98,8 +106,8 @@ template class MinMaxTracker<600>; // The logic for actually allocating from the cache or backing, and keeping // the hit rates specified. -HugeRange HugeCache::DoGet(HugeLength n, bool *from_released) { - auto *node = Find(n); +HugeRange HugeCache::DoGet(HugeLength n, bool* from_released) { + auto* node = Find(n); if (!node) { misses_++; weighted_misses_ += n.raw_num(); @@ -131,19 +139,19 @@ void HugeCache::MaybeGrowCacheLimit(HugeLength missed) { // A "dip" being a case where usage shrinks, then increases back up // to previous levels (at least partially). // - // "brief" is "returns to normal usage in < kCacheTime." (In + // "brief" is "returns to normal usage in < cache_time_." (In // other words, we ideally want to be willing to cache memory for - // kCacheTime before expecting it to be used again--we are loose + // cache_time_ before expecting it to be used again--we are loose // on the timing..) // // The interesting part is finding those dips. // This is the downward slope: we lost some usage. (This in theory could - // be as much as 2 * kCacheTime old, which is fine.) - const HugeLength shrink = off_peak_tracker_.MaxOverTime(kCacheTime); + // be as much as 2 * cache_time_ old, which is fine.) + const HugeLength shrink = off_peak_tracker_.MaxOverTime(cache_time_); // This is the upward slope: we are coming back up. - const HugeLength grow = usage_ - usage_tracker_.MinOverTime(kCacheTime); + const HugeLength grow = usage_ - usage_tracker_.MinOverTime(cache_time_); // Ideally we now know that we dipped down by some amount, then came // up. Sadly our stats aren't quite good enough to guarantee things @@ -176,35 +184,26 @@ void HugeCache::IncUsage(HugeLength n) { usage_tracker_.Report(usage_); detailed_tracker_.Report(usage_); off_peak_tracker_.Report(NHugePages(0)); - if (size() + usage() > max_rss_) max_rss_ = size() + usage(); } void HugeCache::DecUsage(HugeLength n) { usage_ -= n; usage_tracker_.Report(usage_); detailed_tracker_.Report(usage_); - const HugeLength max = usage_tracker_.MaxOverTime(kCacheTime); - ASSERT(max >= usage_); + const HugeLength max = usage_tracker_.MaxOverTime(cache_time_); + TC_ASSERT_GE(max, usage_); const HugeLength off_peak = max - usage_; off_peak_tracker_.Report(off_peak); - if (size() + usage() > max_rss_) max_rss_ = size() + usage(); } -void HugeCache::UpdateSize(HugeLength size) { - size_tracker_.Report(size); - if (size > max_size_) max_size_ = size; - if (size + usage() > max_rss_) max_rss_ = size + usage(); - - // TODO(b/134691947): moving this inside the MinMaxTracker would save one call - // to clock_.now() but all MinMaxTrackers would track regret instead. - int64_t now = clock_.now(); - if (now > last_regret_update_) { - regret_ += size.raw_num() * (now - last_regret_update_); - last_regret_update_ = now; - } +void HugeCache::UpdateSize(HugeLength size) { size_tracker_.Report(size); } + +void HugeCache::UpdateStatsTracker() { + cachestats_tracker_.Report(GetSubreleaseStats()); + hugepage_release_stats_.reset(); } -HugeRange HugeCache::Get(HugeLength n, bool *from_released) { +HugeRange HugeCache::Get(HugeLength n, bool* from_released) { HugeRange r = DoGet(n, from_released); // failure to get a range should "never" "never" happen (VSS limits // or wildly incorrect allocation sizes only...) Don't deal with @@ -213,10 +212,11 @@ HugeRange HugeCache::Get(HugeLength n, bool *from_released) { const bool miss = r.valid() && *from_released; if (miss) MaybeGrowCacheLimit(n); + UpdateStatsTracker(); return r; } -void HugeCache::Release(HugeRange r) { +void HugeCache::Release(HugeRange r, bool demand_based_unback) { DecUsage(r.len()); cache_.Insert(r); @@ -226,28 +226,31 @@ void HugeCache::Release(HugeRange r) { } else { overflows_++; } - - // Shrink the limit, if we're going to do it, before we shrink to - // the max size. (This could reduce the number of regions we break - // in half to avoid overshrinking.) - if ((clock_.now() - last_limit_change_) > (cache_time_ticks_ * 2)) { - total_fast_unbacked_ += MaybeShrinkCacheLimit(); + // Performs a (quick) unback if the demand-based release is disabled. + if (!demand_based_unback) { + // Shrink the limit, if we're going to do it, before we shrink to + // the max size. (This could reduce the number of regions we break + // in half to avoid overshrinking.) + if ((clock_.now() - last_limit_change_) > (cache_time_ticks_ * 2)) { + total_fast_unbacked_ += MaybeShrinkCacheLimit(); + } + total_fast_unbacked_ += ShrinkCache(limit()); } - total_fast_unbacked_ += ShrinkCache(limit()); - UpdateSize(size()); + UpdateStatsTracker(); } void HugeCache::ReleaseUnbacked(HugeRange r) { DecUsage(r.len()); // No point in trying to cache it, just hand it back. allocator_->Release(r); + UpdateStatsTracker(); } HugeLength HugeCache::MaybeShrinkCacheLimit() { last_limit_change_ = clock_.now(); - const HugeLength min = size_tracker_.MinOverTime(kCacheTime * 2); + const HugeLength min = size_tracker_.MinOverTime(cache_time_ * 2); // If cache size has gotten down to at most 20% of max, we assume // we're close enough to the optimal size--we don't want to fiddle // too much/too often unless we have large gaps in usage. @@ -264,8 +267,8 @@ HugeLength HugeCache::ShrinkCache(HugeLength target) { HugeLength removed = NHugePages(0); while (size_ > target) { // Remove smallest-ish nodes, to avoid fragmentation where possible. - auto *node = Find(NHugePages(1)); - CHECK_CONDITION(node); + auto* node = Find(NHugePages(1)); + TC_CHECK_NE(node, nullptr); HugeRange r = node->range(); cache_.Remove(node); // Suppose we're 10 MiB over target but the smallest available node @@ -277,7 +280,7 @@ HugeLength HugeCache::ShrinkCache(HugeLength target) { if (r.len() > delta) { HugeRange to_remove, leftover; std::tie(to_remove, leftover) = Split(r, delta); - ASSERT(leftover.valid()); + TC_ASSERT(leftover.valid()); cache_.Insert(leftover); r = to_remove; } @@ -285,7 +288,13 @@ HugeLength HugeCache::ShrinkCache(HugeLength target) { size_ -= r.len(); // Note, actual unback implementation is temporarily dropping and // re-acquiring the page heap lock here. - unback_(r.start_addr(), r.byte_len()); + if (ABSL_PREDICT_FALSE(!unback_(r))) { + // We failed to release r. Retain it in the cache instead of returning it + // to the HugeAllocator. + size_ += r.len(); + cache_.Insert(r); + break; + } allocator_->Release(r); removed += r.len(); } @@ -302,36 +311,104 @@ HugeLength HugeCache::ReleaseCachedPages(HugeLength n) { const HugeLength target = n > size() ? NHugePages(0) : size() - n; released += ShrinkCache(target); } + UpdateSize(size()); + UpdateStatsTracker(); + total_periodic_unbacked_ += released; + return released; +} +HugeLength HugeCache::GetDesiredReleaseablePages( + HugeLength desired, SkipSubreleaseIntervals intervals) { + TC_CHECK(intervals.SkipSubreleaseEnabled()); + UpdateStatsTracker(); + HugeLength required_by_demand; + required_by_demand = HLFromPages(cachestats_tracker_.GetRecentDemand( + intervals.short_interval, intervals.long_interval, CapDemandInterval())); + + HugeLength current = usage() + size(); + if (required_by_demand != NHugePages(0)) { + HugeLength new_desired; + // We can only release if the current capacity is larger than the demand. + if (required_by_demand < current) { + new_desired = current - required_by_demand; + } + if (new_desired >= desired) { + return desired; + } + // Reports the amount of free hugepages that we didn't release due to this + // mechanism. As the initial release target is capped by the cache size, + // here we simply report the reduced amount. Note, only free pages in the + // smaller of the two (current and required_by_demand) are skipped, so we + // use that as the reporting peak. + HugeLength skipped = desired - new_desired; + cachestats_tracker_.ReportSkippedSubreleasePages( + skipped.in_pages(), + std::min(current.in_pages(), required_by_demand.in_pages())); + return new_desired; + } + return desired; +} + +HugeLength HugeCache::ReleaseCachedPagesByDemand( + HugeLength n, SkipSubreleaseIntervals intervals, bool hit_limit) { + // We get here when one of the three happened: A) hit limit, B) background + // release, or C) ReleaseMemoryToSystem(). + HugeLength release_target = std::min(n, size()); + + // For all those three reasons, we want to release as much as possible to be + // efficient. However, we do not want to release a large number of hugepages + // at once because that may impact applications' performance. So we release a + // fraction of the cache. + if (size() > MinCacheLimit()) { + HugeLength increased_release_target = + std::min(HugeLength(kFractionToReleaseFromCache * size().raw_num()), + size() - MinCacheLimit()); + release_target = std::max(release_target, increased_release_target); + } + + if (release_target == NHugePages(0)) { + return NHugePages(0); + } + if (intervals.SkipSubreleaseEnabled() && !hit_limit) { + // This will reduce the target if the calculated (future) demand is higher + // than the current. In other words, we need to reserve some of the free + // hugepages to meet the future demand. It also makes sure we release the + // realized fragmentation. + release_target = GetDesiredReleaseablePages(release_target, intervals); + } + HugeLength released = ShrinkCache(size() - release_target); + hugepage_release_stats_.num_pages_subreleased += released.in_pages(); + hugepage_release_stats_.set_limit_hit(hit_limit); + if (hugepage_release_stats_.limit_hit()) { + hugepage_release_stats_.total_pages_subreleased_due_to_limit += + released.in_pages(); + } UpdateSize(size()); + UpdateStatsTracker(); total_periodic_unbacked_ += released; return released; } -void HugeCache::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const { +void HugeCache::AddSpanStats(SmallSpanStats* small, + LargeSpanStats* large) const { static_assert(kPagesPerHugePage >= kMaxPages); - for (const HugeAddressMap::Node *node = cache_.first(); node != nullptr; + for (const HugeAddressMap::Node* node = cache_.first(); node != nullptr; node = node->next()) { HugeLength n = node->range().len(); if (large != nullptr) { large->spans++; large->normal_pages += n.in_pages(); } - - if (ages != nullptr) { - ages->RecordRange(n.in_pages(), false, node->when()); - } } } -HugeAddressMap::Node *HugeCache::Find(HugeLength n) { - HugeAddressMap::Node *curr = cache_.root(); +HugeAddressMap::Node* HugeCache::Find(HugeLength n) { + HugeAddressMap::Node* curr = cache_.root(); // invariant: curr != nullptr && curr->longest >= n // we favor smaller gaps and lower nodes and lower addresses, in that // order. The net effect is that we are neither a best-fit nor a // lowest-address allocator but vaguely close to both. - HugeAddressMap::Node *best = nullptr; + HugeAddressMap::Node* best = nullptr; while (curr && curr->longest() >= n) { if (curr->range().len() >= n) { if (!best || best->range().len() > curr->range().len()) { @@ -372,11 +449,11 @@ HugeAddressMap::Node *HugeCache::Find(HugeLength n) { return best; } -void HugeCache::Print(Printer *out) { - const int64_t millis = absl::ToInt64Milliseconds(kCacheTime); - out->printf( +void HugeCache::Print(Printer& out) { + const int64_t millis = absl::ToInt64Milliseconds(cache_time_); + out.printf( "HugeCache: contains unused, backed hugepage(s) " - "(kCacheTime = %lldms)\n", + "(cache_time = %lldms)\n", millis); // a / (a + b), avoiding division by zero auto safe_ratio = [](double a, double b) { @@ -388,45 +465,53 @@ void HugeCache::Print(Printer *out) { const double hit_rate = safe_ratio(hits_, misses_); const double overflow_rate = safe_ratio(overflows_, fills_); - out->printf( + out.printf( "HugeCache: %zu / %zu hugepages cached / cache limit " "(%.3f hit rate, %.3f overflow rate)\n", size_.raw_num(), limit().raw_num(), hit_rate, overflow_rate); - out->printf("HugeCache: %zu MiB fast unbacked, %zu MiB periodic\n", - total_fast_unbacked_.in_bytes() / 1024 / 1024, - total_periodic_unbacked_.in_bytes() / 1024 / 1024); + out.printf("HugeCache: %zu MiB fast unbacked, %zu MiB periodic\n", + total_fast_unbacked_.in_bytes() / 1024 / 1024, + total_periodic_unbacked_.in_bytes() / 1024 / 1024); UpdateSize(size()); - out->printf( - "HugeCache: %zu MiB*s cached since startup\n", - NHugePages(regret_).in_mib() / static_cast(clock_.freq())); usage_tracker_.Report(usage_); - const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime); - const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime); - out->printf( + const HugeLength usage_min = usage_tracker_.MinOverTime(cache_time_); + const HugeLength usage_max = usage_tracker_.MaxOverTime(cache_time_); + out.printf( "HugeCache: recent usage range: %zu min - %zu curr - %zu max MiB\n", usage_min.in_mib(), usage_.in_mib(), usage_max.in_mib()); const HugeLength off_peak = usage_max - usage_; off_peak_tracker_.Report(off_peak); - const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime); - const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime); - out->printf( + const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(cache_time_); + const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(cache_time_); + out.printf( "HugeCache: recent offpeak range: %zu min - %zu curr - %zu max MiB\n", off_peak_min.in_mib(), off_peak.in_mib(), off_peak_max.in_mib()); - const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime); - const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime); - out->printf( + const HugeLength cache_min = size_tracker_.MinOverTime(cache_time_); + const HugeLength cache_max = size_tracker_.MaxOverTime(cache_time_); + out.printf( "HugeCache: recent cache range: %zu min - %zu curr - %zu max MiB\n", cache_min.in_mib(), size_.in_mib(), cache_max.in_mib()); detailed_tracker_.Print(out); + + // Release stats tracked by the demand-based release mechanism. + out.printf("\n"); + out.printf( + "HugeCache: Since startup, %zu hugepages released, " + "(%zu hugepages due to reaching tcmalloc limit)\n", + HLFromPages(hugepage_release_stats_.total_pages_subreleased).raw_num(), + HLFromPages(hugepage_release_stats_.total_pages_subreleased_due_to_limit) + .raw_num()); + + cachestats_tracker_.Print(out, "HugeCache"); } -void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { - hpaa->PrintI64("huge_cache_time_const", - absl::ToInt64Milliseconds(kCacheTime)); +void HugeCache::PrintInPbtxt(PbtxtRegion& hpaa) { + hpaa.PrintI64("huge_cache_time_const", + absl::ToInt64Milliseconds(cache_time_)); // a / (a + b), avoiding division by zero auto safe_ratio = [](double a, double b) { @@ -439,28 +524,24 @@ void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { const double overflow_rate = safe_ratio(overflows_, fills_); // number of bytes in HugeCache - hpaa->PrintI64("cached_huge_page_bytes", size_.in_bytes()); + hpaa.PrintI64("cached_huge_page_bytes", size_.in_bytes()); // max allowed bytes in HugeCache - hpaa->PrintI64("max_cached_huge_page_bytes", limit().in_bytes()); + hpaa.PrintI64("max_cached_huge_page_bytes", limit().in_bytes()); // lifetime cache hit rate - hpaa->PrintDouble("huge_cache_hit_rate", hit_rate); + hpaa.PrintDouble("huge_cache_hit_rate", hit_rate); // lifetime cache overflow rate - hpaa->PrintDouble("huge_cache_overflow_rate", overflow_rate); + hpaa.PrintDouble("huge_cache_overflow_rate", overflow_rate); // bytes eagerly unbacked by HugeCache - hpaa->PrintI64("fast_unbacked_bytes", total_fast_unbacked_.in_bytes()); + hpaa.PrintI64("fast_unbacked_bytes", total_fast_unbacked_.in_bytes()); // bytes unbacked by periodic releaser thread - hpaa->PrintI64("periodic_unbacked_bytes", - total_periodic_unbacked_.in_bytes()); + hpaa.PrintI64("periodic_unbacked_bytes", total_periodic_unbacked_.in_bytes()); UpdateSize(size()); - // memory cached since startup (in MiB*s) - hpaa->PrintI64("huge_cache_regret", NHugePages(regret_).in_mib() / - static_cast(clock_.freq())); usage_tracker_.Report(usage_); - const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime); - const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime); + const HugeLength usage_min = usage_tracker_.MinOverTime(cache_time_); + const HugeLength usage_max = usage_tracker_.MaxOverTime(cache_time_); { - auto usage_stats = hpaa->CreateSubRegion("huge_cache_usage_stats"); + auto usage_stats = hpaa.CreateSubRegion("huge_cache_usage_stats"); usage_stats.PrintI64("min_bytes", usage_min.in_bytes()); usage_stats.PrintI64("current_bytes", usage_.in_bytes()); usage_stats.PrintI64("max_bytes", usage_max.in_bytes()); @@ -468,25 +549,35 @@ void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { const HugeLength off_peak = usage_max - usage_; off_peak_tracker_.Report(off_peak); - const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime); - const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime); + const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(cache_time_); + const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(cache_time_); { - auto usage_stats = hpaa->CreateSubRegion("huge_cache_offpeak_stats"); + auto usage_stats = hpaa.CreateSubRegion("huge_cache_offpeak_stats"); usage_stats.PrintI64("min_bytes", off_peak_min.in_bytes()); usage_stats.PrintI64("current_bytes", off_peak.in_bytes()); usage_stats.PrintI64("max_bytes", off_peak_max.in_bytes()); } - const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime); - const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime); + const HugeLength cache_min = size_tracker_.MinOverTime(cache_time_); + const HugeLength cache_max = size_tracker_.MaxOverTime(cache_time_); { - auto usage_stats = hpaa->CreateSubRegion("huge_cache_cache_stats"); + auto usage_stats = hpaa.CreateSubRegion("huge_cache_cache_stats"); usage_stats.PrintI64("min_bytes", cache_min.in_bytes()); usage_stats.PrintI64("current_bytes", size_.in_bytes()); usage_stats.PrintI64("max_bytes", cache_max.in_bytes()); } - + hpaa.PrintI64( + "cache_num_hugepages_released", + HLFromPages(hugepage_release_stats_.total_pages_subreleased).raw_num()); + hpaa.PrintI64( + "cache_num_hugepages_released_due_to_limit", + HLFromPages(hugepage_release_stats_.total_pages_subreleased_due_to_limit) + .raw_num()); detailed_tracker_.PrintInPbtxt(hpaa); + cachestats_tracker_.PrintTimeseriesStatsInPbtxt(hpaa, + "cache_stats_timeseries"); + cachestats_tracker_.PrintSubreleaseStatsInPbtxt(hpaa, + "cache_skipped_subrelease"); } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h index 2ffda26cb227..e795d9df12ed 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,22 +23,33 @@ #include #include +#include "absl/base/attributes.h" +#include "absl/base/internal/cycleclock.h" #include "absl/time/time.h" -#include "tcmalloc/common.h" -#include "tcmalloc/experiment.h" -#include "tcmalloc/experiment_config.h" +#include "tcmalloc/huge_address_map.h" #include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/timeseries_tracker.h" +#include "tcmalloc/metadata_allocator.h" #include "tcmalloc/stats.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -typedef void (*MemoryModifyFunction)(void *start, size_t len); +class MemoryModifyFunction { + public: + virtual ~MemoryModifyFunction() = default; + + [[nodiscard]] virtual bool operator()(Range r) = 0; + [[nodiscard]] bool operator()(HugeRange r) { + return (*this)(Range{r.start().first_page(), r.len().in_pages()}); + } +}; // Track the extreme values of a HugeLength value over the past // kWindow (time ranges approximate.) @@ -48,8 +60,8 @@ class MinMaxTracker { : kEpochLength(w / kEpochs), timeseries_(clock, w) {} void Report(HugeLength val); - void Print(Printer *out) const; - void PrintInPbtxt(PbtxtRegion *hpaa) const; + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& hpaa) const; // If t < kEpochLength, these functions return statistics for last epoch. The // granularity is kEpochLength (rounded up). @@ -78,7 +90,7 @@ class MinMaxTracker { bool empty() const { return (*this == Nil()); } - bool operator==(const Extrema &other) const; + bool operator==(const Extrema& other) const; }; TimeSeriesTracker timeseries_; @@ -88,60 +100,100 @@ class MinMaxTracker { extern template class MinMaxTracker<>; extern template class MinMaxTracker<600>; -template -constexpr HugeLength MinMaxTracker::kMaxVal; - class HugeCache { public: // For use in production - HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate, - MemoryModifyFunction unback) - : HugeCache(allocator, meta_allocate, unback, + HugeCache(HugeAllocator* allocator, + MetadataAllocator& meta_allocate ABSL_ATTRIBUTE_LIFETIME_BOUND, + MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND, + absl::Duration cache_time) + : HugeCache(allocator, meta_allocate, unback, cache_time, Clock{.now = absl::base_internal::CycleClock::Now, .freq = absl::base_internal::CycleClock::Frequency}) {} - // For testing with mock clock - HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate, - MemoryModifyFunction unback, Clock clock) + // For testing with mock clock. + // + // cache_time * 2 (default cache_time = 1s) looks like an arbitrary window; it + // mostly is. + // + // Suffice to say that the below code (see MaybeGrowCacheLimit) + // tries to make sure the cache is sized to protect a working set + // that ebbs for 1 second, as a reasonable heuristic. This means it + // needs 1s of historical data to examine. + // + // Why 2s duration, then? Two reasons: + // + // - (minor) granularity of epoch boundaries make me want to err towards + // keeping a bit too much data over a bit too little. + // + // - (major) hysteresis: in ReleaseCachedPages we try to detect + // mistaken cache expansion and reverse it. I hope that using a + // longer timescale than our expansion will increase stability + // here: I will take some caches staying a bit too big over caches + // oscillating back and forth between two size estimates, so we + // require stronger evidence (longer time) to reverse an expansion + // than to make it. + // + // We also tried other algorithms, but this one is simple and suffices to + // capture the empirical dynamics we've seen. See "Beyond Malloc + // Efficiency..." (https://research.google/pubs/pub50370/) for more + // information. + HugeCache(HugeAllocator* allocator, + MetadataAllocator& meta_allocate ABSL_ATTRIBUTE_LIFETIME_BOUND, + MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND, + absl::Duration cache_time, Clock clock) : allocator_(allocator), cache_(meta_allocate), clock_(clock), - cache_time_ticks_(clock_.freq() * absl::ToDoubleSeconds(kCacheTime)), + cache_time_ticks_(clock_.freq() * absl::ToDoubleSeconds(cache_time)), nanoseconds_per_tick_(absl::ToInt64Nanoseconds(absl::Seconds(1)) / clock_.freq()), last_limit_change_(clock.now()), - last_regret_update_(clock.now()), detailed_tracker_(clock, absl::Minutes(10)), - usage_tracker_(clock, kCacheTime * 2), - off_peak_tracker_(clock, kCacheTime * 2), - size_tracker_(clock, kCacheTime * 2), - unback_(unback) {} + usage_tracker_(clock, cache_time * 2), + off_peak_tracker_(clock, cache_time * 2), + size_tracker_(clock, cache_time * 2), + unback_(unback), + cache_time_(cache_time), + cachestats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {} // Allocate a usable set of contiguous hugepages. Try to give out // memory that's currently backed from the kernel if we have it available. // *from_released is set to false if the return range is already backed; // otherwise, it is set to true (and the caller should back it.) - HugeRange Get(HugeLength n, bool *from_released); + HugeRange Get(HugeLength n, bool* from_released); // Deallocate (assumed to be backed by the kernel.) - void Release(HugeRange r); + // If demand_based_unback is set, HugeCache will not try to shrink the cache + // here but unback in ReleaseCachedPagesByDemand() instead. The flag is + // designed to separate the normal (quick) unbacking from the demand-based + // unbacking. + void Release(HugeRange r, bool demand_based_unback); + // As Release, but the range is assumed to _not_ be backed. void ReleaseUnbacked(HugeRange r); // Release to the system up to hugepages of cache contents; returns - // the number of hugepages released. + // the number of hugepages released. It also triggers cache shrinking if + // the cache becomes too big. HugeLength ReleaseCachedPages(HugeLength n); + // Release to the system up to hugepages of cache contents if recent + // demand allows; returns the number of hugepages released. The demand history + // is captured using the provided intervals, and the feature is disabled if + // either hit_limit is true or the intervals are not set. It also triggers + // cache shrinking if it has more than what demand needs. + HugeLength ReleaseCachedPagesByDemand(HugeLength n, + SkipSubreleaseIntervals intervals, + bool hit_limit); + // Backed memory available. HugeLength size() const { return size_; } - // Total memory cached (in HugeLength * nanoseconds) - uint64_t regret() const { return regret_ * nanoseconds_per_tick_; } // Current limit for how much backed memory we'll cache. HugeLength limit() const { return limit_; } // Sum total of unreleased requests. HugeLength usage() const { return usage_; } - void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; BackingStats stats() const { BackingStats s; @@ -151,11 +203,11 @@ class HugeCache { return s; } - void Print(Printer *out); - void PrintInPbtxt(PbtxtRegion *hpaa); + void Print(Printer& out); + void PrintInPbtxt(PbtxtRegion& hpaa); private: - HugeAllocator *allocator_; + HugeAllocator* allocator_; // We just cache-missed a request for pages; // should we grow? @@ -168,15 +220,20 @@ class HugeCache { // returning the number removed. HugeLength ShrinkCache(HugeLength target); - HugeRange DoGet(HugeLength n, bool *from_released); + // Calculates the desired releasing target according to the recent demand + // history, returns the updated (reduced) target if releasing the desired + // amount will cause possible future misses. + HugeLength GetDesiredReleaseablePages(HugeLength desired, + SkipSubreleaseIntervals intervals); - HugeAddressMap::Node *Find(HugeLength n); + HugeRange DoGet(HugeLength n, bool* from_released); + + HugeAddressMap::Node* Find(HugeLength n); HugeAddressMap cache_; HugeLength size_{NHugePages(0)}; HugeLength limit_{NHugePages(10)}; - const absl::Duration kCacheTime = absl::Seconds(1); size_t hits_{0}; size_t misses_{0}; @@ -203,8 +260,6 @@ class HugeCache { // However, we can go below it if we haven't used that much for 30 seconds. HugeLength MinCacheLimit() const { return NHugePages(10); } - uint64_t regret_{0}; // overflows if we cache 585 hugepages for 1 year - int64_t last_regret_update_; void UpdateSize(HugeLength size); MinMaxTracker<600> detailed_tracker_; @@ -212,13 +267,37 @@ class HugeCache { MinMaxTracker<> usage_tracker_; MinMaxTracker<> off_peak_tracker_; MinMaxTracker<> size_tracker_; - HugeLength max_size_{NHugePages(0)}; - HugeLength max_rss_{NHugePages(0)}; HugeLength total_fast_unbacked_{NHugePages(0)}; HugeLength total_periodic_unbacked_{NHugePages(0)}; - MemoryModifyFunction unback_; + MemoryModifyFunction& unback_; + absl::Duration cache_time_; + + // Interval used for capping demand calculated for demand-based release: + // making sure that it is not more than the maximum demand recorded in that + // period. When the cap applies, we also release the minimum amount of free + // hugepages that we have been consistently holding at anytime for 5 minutes + // (realized fragmentation). + absl::Duration CapDemandInterval() const { return absl::Minutes(5); } + + // The fraction of the cache that we are happy to return at a time. We use + // this to efficiently reduce the fragmenation. + static constexpr double kFractionToReleaseFromCache = 0.2; + + using StatsTrackerType = SubreleaseStatsTracker<600>; + StatsTrackerType::SubreleaseStats GetSubreleaseStats() const { + StatsTrackerType::SubreleaseStats stats; + stats.num_pages = usage().in_pages(); + stats.free_pages = size().in_pages(); + stats.huge_pages[StatsTrackerType::kRegular] = usage() + size(); + stats.num_pages_subreleased = hugepage_release_stats_.num_pages_subreleased; + return stats; + } + // Tracks recent demand history and demand-based release stats. + void UpdateStatsTracker(); + StatsTrackerType cachestats_tracker_; + SubreleaseStats hugepage_release_stats_; }; } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc index 2699b44303f3..b07451f3e8c5 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc @@ -14,142 +14,99 @@ #include "tcmalloc/huge_cache.h" +#include +#include #include -#include +#include +#include #include #include +#include #include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/internal/cycleclock.h" #include "absl/memory/memory.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" -#include "absl/time/clock.h" #include "absl/time/time.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/mock_metadata_allocator.h" +#include "tcmalloc/mock_virtual_allocator.h" +#include "tcmalloc/pages.h" #include "tcmalloc/stats.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { -class HugeCacheTest : public testing::Test { +using testing::Return; + +class HugeCacheTest + : public testing::TestWithParam> { private: // Allow tests to modify the clock used by the cache. - static int64_t clock_offset_; - static double GetClockFrequency() { - return absl::base_internal::CycleClock::Frequency(); - } - static int64_t GetClock() { - return absl::base_internal::CycleClock::Now() + - clock_offset_ * GetClockFrequency() / - absl::ToDoubleNanoseconds(absl::Seconds(1)); + static int64_t clock_; + + static int64_t FakeClock() { return clock_; } + + static double GetFakeClockFrequency() { + return absl::ToDoubleNanoseconds(absl::Seconds(2)); } - // Use a tiny fraction of actual size so we can test aggressively. - static void* AllocateFake(size_t bytes, size_t* actual, size_t align) { - if (bytes % kHugePageSize != 0) { - Crash(kCrash, __FILE__, __LINE__, "not aligned", bytes, kHugePageSize); - } - if (align % kHugePageSize != 0) { - Crash(kCrash, __FILE__, __LINE__, "not aligned", align, kHugePageSize); - } - *actual = bytes; - // we'll actually provide hidden backing, one word per hugepage. - bytes /= kHugePageSize; - align /= kHugePageSize; - size_t index = backing.size(); - if (index % align != 0) { - index += (align - (index & align)); - } - backing.resize(index + bytes); - void* ptr = reinterpret_cast(index * kHugePageSize); - return ptr; - } - // This isn't super good form but we'll never have more than one HAT - // extant at once. - static std::vector backing; - - // We use actual malloc for metadata allocations, but we track them so they - // can be deleted. (TODO make this an arena if we care, which I doubt) - static void* MallocMetadata(size_t size) { - metadata_bytes += size; - void* ptr = calloc(size, 1); - metadata_allocs.push_back(ptr); - return ptr; - } - static std::vector metadata_allocs; - static size_t metadata_bytes; - - // This is wordy, but necessary for mocking: - class BackingInterface { - public: - virtual void Unback(void* p, size_t len) = 0; - virtual ~BackingInterface() {} - }; + static void ResetClock() { clock_ = 1234; } - class MockBackingInterface : public BackingInterface { + class MockBackingInterface : public MemoryModifyFunction { public: - MOCK_METHOD2(Unback, void(void* p, size_t len)); - }; + MOCK_METHOD(bool, Unback, (PageId p, Length len), ()); - static void MockUnback(void* p, size_t len) { mock_->Unback(p, len); } + bool operator()(Range r) override { return Unback(r.p, r.n); } + }; protected: - static std::unique_ptr> mock_; + testing::NiceMock mock_unback_; HugeCacheTest() { // We don't use the first few bytes, because things might get weird // given zero pointers. - backing.resize(1024); - metadata_bytes = 0; - mock_ = absl::make_unique>(); + vm_allocator_.backing_.resize(1024); + ResetClock(); } - ~HugeCacheTest() override { - for (void* p : metadata_allocs) { - free(p); - } - metadata_allocs.clear(); - backing.clear(); - mock_.reset(nullptr); - - clock_offset_ = 0; + static void Advance(absl::Duration d) { + clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency(); } - void Advance(absl::Duration d) { - clock_offset_ += absl::ToInt64Nanoseconds(d); - } + absl::Duration GetCacheTime() { return std::get<0>(GetParam()); } + bool GetDemandBasedRelease() { return std::get<1>(GetParam()); } + void Release(HugeRange r) { cache_.Release(r, GetDemandBasedRelease()); } - HugeAllocator alloc_{AllocateFake, MallocMetadata}; - HugeCache cache_{&alloc_, MallocMetadata, MockUnback, - Clock{.now = GetClock, .freq = GetClockFrequency}}; + FakeVirtualAllocator vm_allocator_; + FakeMetadataAllocator metadata_allocator_; + HugeAllocator alloc_{vm_allocator_, metadata_allocator_}; + HugeCache cache_{&alloc_, metadata_allocator_, mock_unback_, GetCacheTime(), + Clock{.now = FakeClock, .freq = GetFakeClockFrequency}}; }; -std::vector HugeCacheTest::backing; -std::vector HugeCacheTest::metadata_allocs; -size_t HugeCacheTest::metadata_bytes; -std::unique_ptr> - HugeCacheTest::mock_; +int64_t HugeCacheTest::clock_{1234}; -int64_t HugeCacheTest::clock_offset_ = 0; - -TEST_F(HugeCacheTest, Basic) { +TEST_P(HugeCacheTest, Basic) { bool from; for (int i = 0; i < 100 * 1000; ++i) { - cache_.Release(cache_.Get(NHugePages(1), &from)); + Release(cache_.Get(NHugePages(1), &from)); } } -TEST_F(HugeCacheTest, Backing) { +TEST_P(HugeCacheTest, Backing) { bool from; - cache_.Release(cache_.Get(NHugePages(4), &from)); + Release(cache_.Get(NHugePages(4), &from)); EXPECT_TRUE(from); // We should be able to split up a large range... HugeRange r1 = cache_.Get(NHugePages(3), &from); @@ -158,28 +115,28 @@ TEST_F(HugeCacheTest, Backing) { EXPECT_FALSE(from); // and then merge it back. - cache_.Release(r1); - cache_.Release(r2); + Release(r1); + Release(r2); HugeRange r = cache_.Get(NHugePages(4), &from); EXPECT_FALSE(from); - cache_.Release(r); + Release(r); } -TEST_F(HugeCacheTest, Release) { +TEST_P(HugeCacheTest, Release) { bool from; const HugeLength one = NHugePages(1); - cache_.Release(cache_.Get(NHugePages(5), &from)); + Release(cache_.Get(NHugePages(5), &from)); HugeRange r1, r2, r3, r4, r5; r1 = cache_.Get(one, &from); r2 = cache_.Get(one, &from); r3 = cache_.Get(one, &from); r4 = cache_.Get(one, &from); r5 = cache_.Get(one, &from); - cache_.Release(r1); - cache_.Release(r2); - cache_.Release(r3); - cache_.Release(r4); - cache_.Release(r5); + Release(r1); + Release(r2); + Release(r3); + Release(r4); + Release(r5); r1 = cache_.Get(one, &from); ASSERT_EQ(false, from); @@ -191,38 +148,68 @@ TEST_F(HugeCacheTest, Release) { ASSERT_EQ(false, from); r5 = cache_.Get(one, &from); ASSERT_EQ(false, from); - cache_.Release(r1); - cache_.Release(r2); - cache_.Release(r5); + Release(r1); + Release(r2); + Release(r5); ASSERT_EQ(NHugePages(3), cache_.size()); - EXPECT_CALL(*mock_, Unback(r5.start_addr(), kHugePageSize * 1)).Times(1); + EXPECT_CALL(mock_unback_, Unback(r5.start().first_page(), kPagesPerHugePage)) + .WillOnce(Return(true)); EXPECT_EQ(NHugePages(1), cache_.ReleaseCachedPages(NHugePages(1))); - cache_.Release(r3); - cache_.Release(r4); + Release(r3); + Release(r4); - EXPECT_CALL(*mock_, Unback(r1.start_addr(), 4 * kHugePageSize)).Times(1); + EXPECT_CALL(mock_unback_, + Unback(r1.start().first_page(), 4 * kPagesPerHugePage)) + .WillOnce(Return(true)); EXPECT_EQ(NHugePages(4), cache_.ReleaseCachedPages(NHugePages(200))); } -TEST_F(HugeCacheTest, Regret) { +TEST_P(HugeCacheTest, ReleaseFailure) { bool from; - HugeRange r = cache_.Get(NHugePages(20), &from); - cache_.Release(r); - HugeLength cached = cache_.size(); - absl::Duration d = absl::Seconds(20); - Advance(d); - char buf[512]; - Printer out(buf, 512); - cache_.Print(&out); // To update the regret - uint64_t expected_regret = absl::ToInt64Nanoseconds(d) * cached.raw_num(); - // Not exactly accurate since the mock clock advances with real time, and - // when we measure regret will be updated. - EXPECT_NEAR(cache_.regret(), expected_regret, expected_regret / 1000); - EXPECT_GE(cache_.regret(), expected_regret); + const HugeLength one = NHugePages(1); + Release(cache_.Get(NHugePages(5), &from)); + HugeRange r1, r2, r3, r4, r5; + r1 = cache_.Get(one, &from); + r2 = cache_.Get(one, &from); + r3 = cache_.Get(one, &from); + r4 = cache_.Get(one, &from); + r5 = cache_.Get(one, &from); + Release(r1); + Release(r2); + Release(r3); + Release(r4); + Release(r5); + + r1 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r2 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r3 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r4 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r5 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + Release(r1); + Release(r2); + Release(r5); + + ASSERT_EQ(NHugePages(3), cache_.size()); + EXPECT_CALL(mock_unback_, + Unback(r5.start().first_page(), 1 * kPagesPerHugePage)) + .WillOnce(Return(false)); + EXPECT_EQ(NHugePages(0), cache_.ReleaseCachedPages(NHugePages(1))); + Release(r3); + Release(r4); + + EXPECT_CALL(mock_unback_, + Unback(r1.start().first_page(), 5 * kPagesPerHugePage)) + .WillOnce(Return(false)); + EXPECT_EQ(NHugePages(0), cache_.ReleaseCachedPages(NHugePages(200))); } -TEST_F(HugeCacheTest, Stats) { +TEST_P(HugeCacheTest, Stats) { bool from; HugeRange r = cache_.Get(NHugePages(1 + 1 + 2 + 1 + 3), &from); HugeRange r1, r2, r3, spacer1, spacer2; @@ -230,9 +217,9 @@ TEST_F(HugeCacheTest, Stats) { std::tie(spacer1, r2) = Split(spacer1, NHugePages(1)); std::tie(r2, spacer2) = Split(r2, NHugePages(2)); std::tie(spacer2, r3) = Split(spacer2, NHugePages(1)); - cache_.Release(r1); - cache_.Release(r2); - cache_.Release(r3); + Release(r1); + Release(r2); + Release(r3); ASSERT_EQ(NHugePages(6), cache_.size()); r1 = cache_.Get(NHugePages(1), &from); @@ -244,55 +231,51 @@ TEST_F(HugeCacheTest, Stats) { struct Helper { static void Stat(const HugeCache& cache, size_t* spans, - Length* pages_backed, Length* pages_unbacked, - double* avg_age) { - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + Length* pages_backed, Length* pages_unbacked) { LargeSpanStats large; - cache.AddSpanStats(nullptr, &large, &ages); + cache.AddSpanStats(nullptr, &large); - const PageAgeHistograms::Histogram* hist = ages.GetTotalHistogram(false); *spans = large.spans; *pages_backed = large.normal_pages; *pages_unbacked = large.returned_pages; - *avg_age = hist->avg_age(); } }; - double avg_age; size_t spans; Length pages_backed; Length pages_unbacked; - cache_.Release(r1); - absl::SleepFor(absl::Microseconds(5000)); - Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + Release(r1); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked); EXPECT_EQ(Length(0), pages_unbacked); EXPECT_EQ(1, spans); EXPECT_EQ(NHugePages(1).in_pages(), pages_backed); - EXPECT_LE(0.005, avg_age); - cache_.Release(r2); - absl::SleepFor(absl::Microseconds(2500)); - Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + Release(r2); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked); EXPECT_EQ(Length(0), pages_unbacked); EXPECT_EQ(2, spans); EXPECT_EQ(NHugePages(3).in_pages(), pages_backed); - EXPECT_LE((0.0075 * 1 + 0.0025 * 2) / (1 + 2), avg_age); - cache_.Release(r3); - absl::SleepFor(absl::Microseconds(1250)); - Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + Release(r3); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked); EXPECT_EQ(Length(0), pages_unbacked); EXPECT_EQ(3, spans); EXPECT_EQ(NHugePages(6).in_pages(), pages_backed); - EXPECT_LE((0.00875 * 1 + 0.00375 * 2 + 0.00125 * 3) / (1 + 2 + 3), avg_age); } static double Frac(HugeLength num, HugeLength denom) { return static_cast(num.raw_num()) / denom.raw_num(); } -TEST_F(HugeCacheTest, Growth) { +// Tests that the cache can grow to fit a working set. The two cache shrinking +// mechanisms, demand-based release and limit-based release, use two different +// paths to shrink the cache (ReleaseCachedPagesByDemand vs. Release). We +// test both paths here. +TEST_P(HugeCacheTest, Growth) { + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + bool released; absl::BitGen rng; // fragmentation is a bit of a challenge @@ -306,13 +289,20 @@ TEST_F(HugeCacheTest, Growth) { } for (auto r : drop) { - cache_.Release(r); + Release(r); } // See the TODO in HugeCache::MaybeGrowCache; without this delay, // the above fragmentation plays merry havoc with our instrumentation. Advance(absl::Seconds(30)); - + // Requests a best-effort demand-based release to shrink the cache. + if (GetDemandBasedRelease()) { + cache_.ReleaseCachedPagesByDemand( + cache_.size(), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(10), + .long_interval = absl::Seconds(10)}, + /*hit_limit=*/false); + } // Test that our cache can grow to fit a working set. HugeLength hot_set_sizes[] = {NHugePages(5), NHugePages(10), NHugePages(100), NHugePages(10000)}; @@ -334,7 +324,17 @@ TEST_F(HugeCacheTest, Growth) { if (released) needed_backing += l; } for (auto r : items) { - cache_.Release(r); + Release(r); + } + // Requests a demand-based release. The target will increase to + // kFractionToReleaseFromCache of the cache, and that is enough to trim + // the fragmentation. + if (GetDemandBasedRelease()) { + cache_.ReleaseCachedPagesByDemand( + NHugePages(0), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)}, + /*hit_limit=*/false); } return {needed_backing, got}; }; @@ -363,26 +363,35 @@ TEST_F(HugeCacheTest, Growth) { // approximately, given the randomized sizing... const double ratio = Frac(needed_backing, total); - EXPECT_LE(ratio, 0.2); + EXPECT_LE(ratio, 0.3); } } // If we repeatedly grow and shrink, but do so very slowly, we should *not* // cache the large variation. -TEST_F(HugeCacheTest, SlowGrowthUncached) { +TEST_P(HugeCacheTest, SlowGrowthUncached) { + // This test expects the cache to stay small when using unbacking with + // Release(). Hence we skip it when demand-based release is enabled. + if (GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + absl::Duration cache_time = GetCacheTime(); + absl::BitGen rng; std::uniform_int_distribution sizes(1, 10); for (int i = 0; i < 20; ++i) { std::vector rs; for (int j = 0; j < 20; ++j) { - Advance(absl::Milliseconds(600)); + Advance(cache_time); bool released; rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released)); } HugeLength max_cached = NHugePages(0); for (auto r : rs) { - Advance(absl::Milliseconds(600)); - cache_.Release(r); + Advance(cache_time); + Release(r); max_cached = std::max(max_cached, cache_.size()); } EXPECT_GE(NHugePages(10), max_cached); @@ -390,7 +399,15 @@ TEST_F(HugeCacheTest, SlowGrowthUncached) { } // If very rarely we have a huge increase in usage, it shouldn't be cached. -TEST_F(HugeCacheTest, SpikesUncached) { +TEST_P(HugeCacheTest, SpikesUncached) { + // This test expects the cache to stay small when using unbacking with + // Release(). Hence we skip it when demand-based release is enabled. + if (GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + absl::Duration cache_time = GetCacheTime(); absl::BitGen rng; std::uniform_int_distribution sizes(1, 10); for (int i = 0; i < 20; ++i) { @@ -401,18 +418,50 @@ TEST_F(HugeCacheTest, SpikesUncached) { } HugeLength max_cached = NHugePages(0); for (auto r : rs) { - cache_.Release(r); + Release(r); max_cached = std::max(max_cached, cache_.size()); } EXPECT_GE(NHugePages(10), max_cached); - Advance(absl::Seconds(30)); + Advance(10 * cache_time); + } +} + +// If we allocate a spike occasionally but having demand-based release enabled, +// all freed hugepages will be cached even though the cache limit is low. This +// is because the cache shrinking mechanism in Release() is bypassed when +// demand-based release is enabled. +TEST_P(HugeCacheTest, SpikesCachedNoUnback) { + // This test expects no cache shirking in Release(). Hence we skip it when + // demand-based release is disabled. + if (!GetDemandBasedRelease()) { + GTEST_SKIP(); + } + absl::Duration cache_time = GetCacheTime(); + for (int i = 0; i < 20; ++i) { + std::vector rs; + for (int j = 0; j < 200; ++j) { + bool released; + rs.push_back(cache_.Get(NHugePages(5), &released)); + } + HugeLength max_cached = NHugePages(0); + for (auto r : rs) { + Release(r); + max_cached = std::max(max_cached, cache_.size()); + } + EXPECT_EQ(max_cached, NHugePages(1000)); + // The limit never changed as the growth mechanism sees no value in + // preparing for occasional peaks (i.e., shrink and grow in cache_time + // are not balanced). + EXPECT_EQ(cache_.limit(), NHugePages(10)); + Advance(10 * cache_time); } } // If very rarely we have a huge *decrease* in usage, it *should* be cached. -TEST_F(HugeCacheTest, DipsCached) { +TEST_P(HugeCacheTest, DipsCached) { absl::BitGen rng; std::uniform_int_distribution sizes(1, 10); + absl::Duration cache_time = GetCacheTime(); for (int i = 0; i < 20; ++i) { std::vector rs; HugeLength got = NHugePages(0); @@ -425,24 +474,32 @@ TEST_F(HugeCacheTest, DipsCached) { if (released) uncached += n; } // Most of our time is at high usage... - Advance(absl::Seconds(30)); + Advance(10 * cache_time); // Now immediately release and reallocate. for (auto r : rs) { - cache_.Release(r); + Release(r); } // warmup if (i >= 2) { - EXPECT_GE(0.06, Frac(uncached, got)); + EXPECT_GE(0.07, Frac(uncached, got)); } } } // Suppose in a previous era of behavior we needed a giant cache, // but now we don't. Do we figure this out promptly? -TEST_F(HugeCacheTest, Shrink) { +TEST_P(HugeCacheTest, Shrink) { + // This test expects the cache to shrink in Release() after the working set + // size is reduced. Hence we skip it when demand-based release is enabled. + if (GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); absl::BitGen rng; std::uniform_int_distribution sizes(1, 10); + absl::Duration cache_time = GetCacheTime(); for (int i = 0; i < 20; ++i) { std::vector rs; for (int j = 0; j < 2000; ++j) { @@ -451,30 +508,431 @@ TEST_F(HugeCacheTest, Shrink) { rs.push_back(cache_.Get(n, &released)); } for (auto r : rs) { - cache_.Release(r); + Release(r); } } ASSERT_LE(NHugePages(10000), cache_.size()); for (int i = 0; i < 30; ++i) { - // New working set <= 20 pages. - Advance(absl::Seconds(1)); + // New working set <= 20 pages, arranging the allocation rounds happen in + // different cache limit updating windows (> cache_time * 2) so we can + // shrink the cache gradually in each round. + Advance(cache_time * 3); // And do some work. for (int j = 0; j < 100; ++j) { bool released; HugeRange r1 = cache_.Get(NHugePages(sizes(rng)), &released); HugeRange r2 = cache_.Get(NHugePages(sizes(rng)), &released); - cache_.Release(r1); - cache_.Release(r2); + Release(r1); + Release(r2); } } - + // The cache should have shrunk to the working set size. + ASSERT_GE(NHugePages(25), cache_.size()); ASSERT_GE(NHugePages(25), cache_.limit()); } -TEST_F(HugeCacheTest, Usage) { +// In demand-based release, we want to release as much as possible when the +// hit_limit is set. +TEST_P(HugeCacheTest, ReleaseByDemandHardRelease) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (!GetDemandBasedRelease() || (kPagesPerHugePage != Length(256))) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + bool released; + HugeRange r = cache_.Get(NHugePages(1000), &released); + Release(r); + ASSERT_EQ(cache_.size(), NHugePages(1000)); + // Releases half of the cache with hit_limit set. + HugeLength unbacked_1 = cache_.ReleaseCachedPagesByDemand( + NHugePages(500), SkipSubreleaseIntervals{}, /*hit_limit=*/true); + EXPECT_EQ(unbacked_1, NHugePages(500)); + // Releases the remaining using invalid intervals. + HugeLength unbacked_2 = cache_.ReleaseCachedPagesByDemand( + NHugePages(1000), SkipSubreleaseIntervals{}, /*hit_limit=*/false); + EXPECT_EQ(unbacked_2, NHugePages(500)); + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + cache_.Print(printer); + } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeCache: 0 MiB fast unbacked, 2000 MiB periodic +)")); + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeCache: Since startup, 1000 hugepages released, (500 hugepages due to reaching tcmalloc limit) +)")); + // The skip-subrelease mechanism is bypassed for both requests. + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeCache: Since the start of the execution, 0 subreleases (0 pages) were skipped due to the sum of short-term (0s) fluctuations and long-term (0s) trends. +HugeCache: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). +HugeCache: Subrelease stats last 10 min: total 256000 pages subreleased (0 pages from partial allocs), 0 hugepages broken +)")); +} + +// Tests that we can increase the release target to a fraction +// (kFractionToReleaseFromCache) of HugeCache. This can happen regardless of the +// initial value of the target. +TEST_P(HugeCacheTest, ReleaseByDemandIncreaseTarget) { + if (!GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + bool released; + // (Current - 3 min) Max: 60 hps, Min: 50 hps. + HugeRange peak_1a = cache_.Get(NHugePages(50), &released); + HugeRange peak_1b = cache_.Get(NHugePages(10), &released); + Advance(absl::Minutes(1)); + + // (Current - 2 min) Max: 170 hps, Min: 70 hps. + HugeRange peak_2a = cache_.Get(NHugePages(100), &released); + HugeRange peak_2b = cache_.Get(NHugePages(10), &released); + Release(peak_2a); + Advance(absl::Minutes(1)); + + // (Current - 1 minute) Max: 20 hps, Min: 10 hps. + Release(peak_1a); + Release(peak_2b); + Advance(absl::Minutes(1)); + + // (Current) Max: 0 hps, Min: 0 hps. + Release(peak_1b); + EXPECT_EQ(cache_.size(), NHugePages(170)); + EXPECT_EQ(cache_.usage(), NHugePages(0)); + + // The past demand is 80 hps (short 10 hps + long 70 hps), and we can unback + // 34 hps (170 hps * kFractionToReleaseFromCache), more than the release + // target (0 hps). + HugeLength unbacked_1 = cache_.ReleaseCachedPagesByDemand( + NHugePages(0), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(120), + .long_interval = absl::Seconds(180)}, + /*hit_limit=*/false); + EXPECT_EQ(unbacked_1, NHugePages(34)); + // Repeats the test using a non-zero target. + EXPECT_EQ(cache_.size(), NHugePages(136)); + HugeLength unbacked_2 = cache_.ReleaseCachedPagesByDemand( + NHugePages(10), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(120), + .long_interval = absl::Seconds(180)}, + /*hit_limit=*/false); + EXPECT_EQ(unbacked_2, NHugePages(28)); + + // Tests that we always manage to protect the cache limit (10 hps) while + // increasing the target. First, force the cache close to the limit using a + // crafted target. + HugeLength unbacked_3 = cache_.ReleaseCachedPagesByDemand( + NHugePages(97), SkipSubreleaseIntervals{}, /*hit_limit=*/true); + EXPECT_EQ(unbacked_3, NHugePages(97)); + EXPECT_EQ(cache_.size(), NHugePages(11)); + // Then, ask for release using target zero. + HugeLength unbacked_4 = cache_.ReleaseCachedPagesByDemand( + NHugePages(0), SkipSubreleaseIntervals{}, /*hit_limit=*/true); + EXPECT_EQ(unbacked_4, NHugePages(1)); + EXPECT_EQ(cache_.size(), NHugePages(10)); + // Now the cache is at the limit. Checks if that can be protected. + HugeLength unbacked_5 = cache_.ReleaseCachedPagesByDemand( + NHugePages(0), SkipSubreleaseIntervals{}, /*hit_limit=*/true); + EXPECT_EQ(unbacked_5, NHugePages(0)); + + // Finally, show that we can release the limit if requested. There has been no + // demand in the past 10s so we can release the rest of the cache. + HugeLength unbacked_6 = cache_.ReleaseCachedPagesByDemand( + NHugePages(100), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(10), + .long_interval = absl::Seconds(10)}, + /*hit_limit=*/false); + EXPECT_EQ(unbacked_6, NHugePages(10)); +} + +// Tests releasing zero pages when the cache size and demand are both zero. +TEST_P(HugeCacheTest, ReleaseByDemandReleaseZero) { + if (!GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand( + NHugePages(0), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)}, + /*hit_limit=*/false), + NHugePages(0)); +} + +// Tests that releasing target is not affected if the demand history is empty. +TEST_P(HugeCacheTest, ReleaseByDemandNoHistory) { + if (!GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + // First we make sure that the cache is not empty. + bool released; + Release(cache_.Get(NHugePages(10), &released)); + EXPECT_EQ(cache_.size(), NHugePages(10)); + // Then we advance the time to make sure that the demand history is empty. + Advance(absl::Minutes(30)); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand( + NHugePages(10), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)}, + /*hit_limit=*/false), + NHugePages(10)); +} + +// Tests that the demand is capped by peak within the default interval (5 mins). +TEST_P(HugeCacheTest, ReleaseByDemandCappedByDemandPeak) { + if (!GetDemandBasedRelease()) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + // Generates a demand pattern that can cause the sum-of-peak issue. + bool released; + // The diff peak: 20 hps - 1 hps = 19 hps. + HugeRange diff_a = cache_.Get(NHugePages(1), &released); + HugeRange diff_b = cache_.Get(NHugePages(20), &released); + Release(diff_a); + Release(diff_b); + Advance(absl::Minutes(5)); + // The long-term demand peak: 15 hps. + HugeRange peak = cache_.Get(NHugePages(15), &released); + Advance(absl::Minutes(1)); + Release(peak); + EXPECT_EQ(cache_.size(), NHugePages(21)); + // Releases partial of the cache as the demand is capped by the 5-mins' peak + // (15 hps). + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand( + NHugePages(100), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(10), + .long_interval = absl::Minutes(10)}, + /*hit_limit=*/false), + NHugePages(6)); + // Releases the rest of the cache. + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(100), + SkipSubreleaseIntervals{}, + /*hit_limit=*/false), + NHugePages(15)); +} + +// Tests demand-based skip release. The test is a modified version of the +// FillerTest.SkipSubrelease test by removing parts designed particularly for +// subrelease. +TEST_P(HugeCacheTest, ReleaseByDemandSkipRelease) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (!GetDemandBasedRelease() || (kPagesPerHugePage != Length(256))) { + GTEST_SKIP(); + } + + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + // First it generates a peak (the long-term demand peak) and waits for + // time_interval(a). Then, it generates a higher peak that contains the + // short-term fluctuation peak, and waits for time_interval(b). It then + // generates a trough in demand and asks to release. Finally, it waits for + // time_interval(c) to generate the highest peak which is used for evaluating + // release correctness. + const auto demand_pattern = + [&](absl::Duration a, absl::Duration b, absl::Duration c, + SkipSubreleaseIntervals intervals, bool expected_release) { + bool released; + // First peak: min_demand 10 hps , max_demand 15 hps, diff 10 hps. + HugeRange peak_1a = cache_.Get(NHugePages(10), &released); + HugeRange peak_1b = cache_.Get(NHugePages(5), &released); + Advance(a); + // Second peak: min_demand 0 hps, max_demand 20 hps, diff 20 hps. + Release(peak_1a); + Release(peak_1b); + HugeRange peak_2a = cache_.Get(NHugePages(15), &released); + HugeRange peak_2b = cache_.Get(NHugePages(5), &released); + EXPECT_EQ(cache_.usage(), NHugePages(20)); + EXPECT_EQ(cache_.size(), NHugePages(0)); + Advance(b); + // Trough: min_demand 5 hps, max_demand 5 hps, diff 0 hps. + Release(peak_2a); + EXPECT_EQ(cache_.usage(), NHugePages(5)); + EXPECT_EQ(cache_.size(), NHugePages(15)); + // Release is capped by the cache size. + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(100), intervals, + /*hit_limit=*/false), + expected_release ? NHugePages(15) : NHugePages(0)); + Advance(c); + // Third peak: min_demand 25 hps, max_demand 30 hps, diff 5 hps. + // Note, skip-subrelease evaluates the correctness of skipped releases + // using the first demand update recorded in an epoch (25 hps for this + // case). + HugeRange peak_3a = cache_.Get(NHugePages(20), &released); + HugeRange peak_3b = cache_.Get(NHugePages(5), &released); + EXPECT_EQ(cache_.usage(), NHugePages(30)); + Release(peak_2b); + Release(peak_3a); + Release(peak_3b); + // If the previous release is skipped, the cache size is larger due to + // fragmentation. + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(100), + SkipSubreleaseIntervals{}, + /*hit_limit=*/false), + expected_release ? NHugePages(30) : NHugePages(40)); + Advance(absl::Minutes(30)); + }; + { + // Skip release feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, /*expected_release=*/true); + } + { + // Uses short-term and long-term intervals (combined demand is 30 hps but + // capped by maximum demand in 10 mins, 20 hps), incorrectly skipped 15 hps. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + /*expected_release=*/false); + } + { + // Uses short-term and long-term intervals (combined demand 5 hps), released + // all free hps. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + /*expected_release=*/true); + } + { + // Uses only short-term interval (demand 20 hps), correctly skipped 15 hps. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + /*expected_release=*/false); + } + { + // Uses only long-term interval (demand 5 hps), released all free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + /*expected_release=*/true); + } + // This captures a corner case: If we hit another peak immediately after a + // release decision (recorded in the same epoch), do not count this as + // a correct release decision. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + /*expected_release=*/false); + } + // Ensure that the tracker is updated. + bool released; + HugeRange tiny = cache_.Get(NHugePages(1), &released); + Release(tiny); + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + cache_.Print(printer); + } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeCache: Since the start of the execution, 3 subreleases (11520 pages) were skipped due to the sum of short-term (60s) fluctuations and long-term (120s) trends. +HugeCache: 33.3333% of decisions confirmed correct, 0 pending (33.3333% of pages, 0 pending). +HugeCache: Subrelease stats last 10 min: total 0 pages subreleased (0 pages from partial allocs), 0 hugepages broken +)")); +} + +// Tests the skipping decisions are reported correctly, particularly for the +// demand peaks used in correctness evaluation. +TEST_P(HugeCacheTest, ReleaseByDemandSkipReleaseReport) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (!GetDemandBasedRelease() || (kPagesPerHugePage != Length(256))) { + GTEST_SKIP(); + } + EXPECT_CALL(mock_unback_, Unback(testing::_, testing::_)) + .WillRepeatedly(Return(true)); + + // Reports skip release using the recent demand peak (23 hps): it is + // smaller than the current capacity (33 hps) when 8 hps are skipped. + // The skipping is correct as the future demand is 25 hps. + bool released; + HugeRange peak_1a = cache_.Get(NHugePages(10), &released); + HugeRange peak_1b = cache_.Get(NHugePages(8), &released); + Advance(absl::Minutes(2)); + Release(peak_1a); + HugeRange peak_2a = cache_.Get(NHugePages(15), &released); + Release(peak_1b); + EXPECT_EQ(cache_.usage(), NHugePages(15)); + EXPECT_EQ(cache_.size(), NHugePages(18)); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand( + NHugePages(30), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(3)}, + /*hit_limit=*/false), + NHugePages(10)); + Advance(absl::Minutes(3)); + HugeRange peak_3a = cache_.Get(NHugePages(10), &released); + Release(peak_2a); + Release(peak_3a); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(100), + SkipSubreleaseIntervals{}, + /*hit_limit=*/false), + NHugePages(33)); + Advance(absl::Minutes(30)); + + // Reports skip release using the current capacity (15 hps): it + // is smaller than the recent peak (20 hps) when 10 hps are skipped. They are + // correctly skipped as the future demand is 18 hps. + HugeRange peak_4a = cache_.Get(NHugePages(10), &released); + HugeRange peak_4b = cache_.Get(NHugePages(10), &released); + Release(peak_4a); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(10), + SkipSubreleaseIntervals{}, false), + NHugePages(10)); + Advance(absl::Minutes(2)); + HugeRange peak_5a = cache_.Get(NHugePages(5), &released); + Release(peak_4b); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand( + NHugePages(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(3)}, + /*hit_limit=*/false), + NHugePages(0)); + Advance(absl::Minutes(3)); + HugeRange peak_6a = cache_.Get(NHugePages(10), &released); + HugeRange peak_6b = cache_.Get(NHugePages(3), &released); + Release(peak_5a); + Release(peak_6a); + Release(peak_6b); + EXPECT_EQ(cache_.ReleaseCachedPagesByDemand(NHugePages(100), + SkipSubreleaseIntervals{}, + /*hit_limit=*/false), + NHugePages(18)); + Advance(absl::Minutes(30)); + + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + cache_.Print(printer); + } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeCache: Since the start of the execution, 2 subreleases (4608 pages) were skipped due to the sum of short-term (180s) fluctuations and long-term (180s) trends. +HugeCache: 100.0000% of decisions confirmed correct, 0 pending (100.0000% of pages, 0 pending). +)")); +} + +TEST_P(HugeCacheTest, Usage) { bool released; auto r1 = cache_.Get(NHugePages(10), &released); @@ -483,7 +941,7 @@ TEST_F(HugeCacheTest, Usage) { auto r2 = cache_.Get(NHugePages(100), &released); EXPECT_EQ(NHugePages(110), cache_.usage()); - cache_.Release(r1); + Release(r1); EXPECT_EQ(NHugePages(100), cache_.usage()); // Pretend we unbacked this. @@ -558,6 +1016,16 @@ TEST_F(MinMaxTrackerTest, Works) { EXPECT_EQ(NHugePages(1), tracker.MinOverTime(kDuration)); } +INSTANTIATE_TEST_SUITE_P( + All, HugeCacheTest, + testing::Combine(testing::Values(absl::Seconds(1), absl::Seconds(30)), + testing::Bool()), + [](const testing::TestParamInfo info) { + return "Cachetime_" + absl::FormatDuration(std::get<0>(info.param)) + + "DemandBasedRelease_" + + (std::get<1>(info.param) ? "Enabled" : "Disabled"); + }); + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc index e662456df6bb..57b45a425351 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc @@ -14,69 +14,44 @@ #include "tcmalloc/huge_page_aware_allocator.h" -#include -#include +#include -#include - -#include "absl/base/internal/cycleclock.h" -#include "absl/base/internal/spinlock.h" -#include "absl/time/time.h" -#include "tcmalloc/common.h" -#include "tcmalloc/experiment.h" -#include "tcmalloc/experiment_config.h" -#include "tcmalloc/huge_allocator.h" +#include "absl/base/attributes.h" +#include "tcmalloc/arena.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/memory_tag.h" #include "tcmalloc/pagemap.h" -#include "tcmalloc/parameters.h" +#include "tcmalloc/pages.h" #include "tcmalloc/span.h" #include "tcmalloc/static_vars.h" -#include "tcmalloc/stats.h" +#include "tcmalloc/system-alloc.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -bool decide_want_hpaa(); -ABSL_ATTRIBUTE_WEAK int default_want_hpaa(); ABSL_ATTRIBUTE_WEAK int default_subrelease(); -bool decide_subrelease() { - if (!decide_want_hpaa()) { - // Subrelease is off if HPAA is off. - return false; - } +namespace huge_page_allocator_internal { - const char *e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); +bool decide_subrelease() { + const char* e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); if (e) { switch (e[0]) { case '0': - if (kPageShift <= 12) { - return false; - } - - if (default_want_hpaa != nullptr) { - int default_hpaa = default_want_hpaa(); - if (default_hpaa < 0) { - return false; - } - } - - Log(kLog, __FILE__, __LINE__, - "Runtime opt-out from HPAA requires building with " - "//tcmalloc:want_no_hpaa." - ); + // If we're forcing HPAA on, we want to converge towards our default + // of subrelease on, rather than off (where it is moot without HPAA). break; case '1': return false; case '2': return true; default: - Crash(kCrash, __FILE__, __LINE__, "bad env var", e); - return false; + TC_BUG("bad env var '%s'", e); } } @@ -87,589 +62,91 @@ bool decide_subrelease() { } } - if (tcmalloc::IsExperimentActive(tcmalloc::Experiment::TCMALLOC_TEMERAIRE)) { - return false; - } - return true; } -FillerPartialRerelease decide_partial_rerelease() { - const char *e = thread_safe_getenv("TCMALLOC_PARTIAL_RELEASE_CONTROL"); - if (e) { - if (e[0] == '0') { - return FillerPartialRerelease::Return; - } - if (e[0] == '1') { - return FillerPartialRerelease::Retain; - } - Crash(kCrash, __FILE__, __LINE__, "bad env var", e); - } - - return FillerPartialRerelease::Retain; -} - -// Some notes: locking discipline here is a bit funny, because -// we want to *not* hold the pageheap lock while backing memory. - -// We have here a collection of slightly different allocators each -// optimized for slightly different purposes. This file has two main purposes: -// - pick the right one for a given allocation -// - provide enough data to figure out what we picked last time! - -HugePageAwareAllocator::HugePageAwareAllocator(MemoryTag tag) - : PageAllocatorInterface("HugePageAware", tag), - filler_(decide_partial_rerelease()), - alloc_( - [](MemoryTag tag) { - // TODO(ckennelly): Remove the template parameter. - switch (tag) { - case MemoryTag::kNormal: - return AllocAndReport; - case MemoryTag::kNormalP1: - return AllocAndReport; - case MemoryTag::kSampled: - return AllocAndReport; - default: - ASSUME(false); - __builtin_unreachable(); - } - }(tag), - MetaDataAlloc), - cache_(HugeCache{&alloc_, MetaDataAlloc, UnbackWithoutLock}) { - tracker_allocator_.Init(&Static::arena()); - region_allocator_.Init(&Static::arena()); -} - -HugePageAwareAllocator::FillerType::Tracker *HugePageAwareAllocator::GetTracker( - HugePage p) { - void *v = Static::pagemap().GetHugepage(p.first_page()); - FillerType::Tracker *pt = reinterpret_cast(v); - ASSERT(pt == nullptr || pt->location() == p); - return pt; -} - -void HugePageAwareAllocator::SetTracker( - HugePage p, HugePageAwareAllocator::FillerType::Tracker *pt) { - Static::pagemap().SetHugepage(p.first_page(), pt); -} - -PageId HugePageAwareAllocator::AllocAndContribute(HugePage p, Length n, - bool donated) { - CHECK_CONDITION(p.start_addr() != nullptr); - FillerType::Tracker *pt = tracker_allocator_.New(); - new (pt) FillerType::Tracker(p, absl::base_internal::CycleClock::Now()); - ASSERT(pt->longest_free_range() >= n); - PageId page = pt->Get(n).page; - ASSERT(page == p.first_page()); - SetTracker(p, pt); - filler_.Contribute(pt, donated); - return page; -} - -PageId HugePageAwareAllocator::RefillFiller(Length n, bool *from_released) { - HugeRange r = cache_.Get(NHugePages(1), from_released); - if (!r.valid()) return PageId{0}; - // This is duplicate to Finalize, but if we need to break up - // hugepages to get to our usage limit it would be very bad to break - // up what's left of r after we allocate from there--while r is - // mostly empty, clearly what's left in the filler is too fragmented - // to be very useful, and we would rather release those - // pages. Otherwise, we're nearly guaranteed to release r (if n - // isn't very large), and the next allocation will just repeat this - // process. - Static::page_allocator().ShrinkToUsageLimit(); - return AllocAndContribute(r.start(), n, /*donated=*/false); -} - -Span *HugePageAwareAllocator::Finalize(Length n, PageId page) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - ASSERT(page != PageId{0}); - Span *ret = Span::New(page, n); - Static::pagemap().Set(page, ret); - ASSERT(!ret->sampled()); - info_.RecordAlloc(page, n); - Static::page_allocator().ShrinkToUsageLimit(); - return ret; -} - -// For anything <= half a huge page, we will unconditionally use the filler -// to pack it into a single page. If we need another page, that's fine. -Span *HugePageAwareAllocator::AllocSmall(Length n, bool *from_released) { - auto [pt, page] = filler_.TryGet(n); - if (ABSL_PREDICT_TRUE(pt != nullptr)) { - *from_released = false; - return Finalize(n, page); - } - - page = RefillFiller(n, from_released); - if (ABSL_PREDICT_FALSE(page == PageId{0})) { - return nullptr; - } - return Finalize(n, page); -} +extern "C" ABSL_ATTRIBUTE_WEAK bool +default_want_disable_huge_region_more_often(); -Span *HugePageAwareAllocator::AllocLarge(Length n, bool *from_released) { - // If it's an exact page multiple, just pull it from pages directly. - HugeLength hl = HLFromPages(n); - if (hl.in_pages() == n) { - return AllocRawHugepages(n, from_released); +bool use_huge_region_more_often() { + // Disable huge regions more often feature if built against an opt-out. + if (default_want_disable_huge_region_more_often != nullptr) { + return false; } - PageId page; - // If we fit in a single hugepage, try the Filler first. - if (n < kPagesPerHugePage) { - auto [pt, page] = filler_.TryGet(n); - if (ABSL_PREDICT_TRUE(pt != nullptr)) { - *from_released = false; - return Finalize(n, page); + // TODO(b/296281171): Remove this opt-out. + const char* e = + thread_safe_getenv("TCMALLOC_USE_HUGE_REGION_MORE_OFTEN_DISABLE"); + if (e) { + switch (e[0]) { + case '0': + return true; + case '1': + return false; + default: + TC_BUG("bad env var '%s'", e); } } - // If we're using regions in this binary (see below comment), is - // there currently available space there? - if (regions_.MaybeGet(n, &page, from_released)) { - return Finalize(n, page); - } - - // We have two choices here: allocate a new region or go to - // hugepages directly (hoping that slack will be filled by small - // allocation.) The second strategy is preferrable, as it's - // typically faster and usually more space efficient, but it's sometimes - // catastrophic. - // - // See https://github.com/google/tcmalloc/tree/master/docs/regions-are-not-optional.md - // - // So test directly if we're in the bad case--almost no binaries are. - // If not, just fall back to direct allocation (and hope we do hit that case!) - const Length slack = info_.slack(); - // Don't bother at all until the binary is reasonably sized - if (slack < HLFromBytes(64 * 1024 * 1024).in_pages()) { - return AllocRawHugepages(n, from_released); - } - - // In the vast majority of binaries, we have many small allocations which - // will nicely fill slack. (Fleetwide, the average ratio is 15:1; only - // a handful of binaries fall below 1:1.) - const Length small = info_.small(); - if (slack < small) { - return AllocRawHugepages(n, from_released); - } - - // We couldn't allocate a new region. They're oversized, so maybe we'd get - // lucky with a smaller request? - if (!AddRegion()) { - return AllocRawHugepages(n, from_released); - } - - CHECK_CONDITION(regions_.MaybeGet(n, &page, from_released)); - return Finalize(n, page); -} - -Span *HugePageAwareAllocator::AllocEnormous(Length n, bool *from_released) { - return AllocRawHugepages(n, from_released); -} - -Span *HugePageAwareAllocator::AllocRawHugepages(Length n, bool *from_released) { - HugeLength hl = HLFromPages(n); - - HugeRange r = cache_.Get(hl, from_released); - if (!r.valid()) return nullptr; - - // We now have a huge page range that covers our request. There - // might be some slack in it if n isn't a multiple of - // kPagesPerHugePage. Add the hugepage with slack to the filler, - // pretending the non-slack portion is a smaller allocation. - Length total = hl.in_pages(); - Length slack = total - n; - HugePage first = r.start(); - SetTracker(first, nullptr); - HugePage last = first + r.len() - NHugePages(1); - if (slack == Length(0)) { - SetTracker(last, nullptr); - return Finalize(total, r.start().first_page()); - } - - ++donated_huge_pages_; - - Length here = kPagesPerHugePage - slack; - ASSERT(here > Length(0)); - AllocAndContribute(last, here, /*donated=*/true); - return Finalize(n, r.start().first_page()); -} - -static void BackSpan(Span *span) { - SystemBack(span->start_address(), span->bytes_in_span()); -} - -// public -Span *HugePageAwareAllocator::New(Length n) { - CHECK_CONDITION(n > Length(0)); - bool from_released; - Span *s = LockAndAlloc(n, &from_released); - if (s) { - // Prefetch for writing, as we anticipate using the memory soon. - __builtin_prefetch(s->start_address(), 1, 3); - if (from_released) BackSpan(s); - } - ASSERT(!s || GetMemoryTag(s->start_address()) == tag_); - return s; -} - -Span *HugePageAwareAllocator::LockAndAlloc(Length n, bool *from_released) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - // Our policy depends on size. For small things, we will pack them - // into single hugepages. - if (n <= kPagesPerHugePage / 2) { - return AllocSmall(n, from_released); - } - - // For anything too big for the filler, we use either a direct hugepage - // allocation, or possibly the regions if we are worried about slack. - if (n <= HugeRegion::size().in_pages()) { - return AllocLarge(n, from_released); - } - - // In the worst case, we just fall back to directly allocating a run - // of hugepages. - return AllocEnormous(n, from_released); -} - -// public -Span *HugePageAwareAllocator::NewAligned(Length n, Length align) { - if (align <= Length(1)) { - return New(n); - } - - // we can do better than this, but... - // TODO(b/134690769): support higher align. - CHECK_CONDITION(align <= kPagesPerHugePage); - bool from_released; - Span *s; - { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - s = AllocRawHugepages(n, &from_released); - } - if (s && from_released) BackSpan(s); - ASSERT(!s || GetMemoryTag(s->start_address()) == tag_); - return s; -} - -void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt, - PageId p, Length n) { - if (ABSL_PREDICT_TRUE(filler_.Put(pt, p, n) == nullptr)) return; - if (pt->donated()) { - --donated_huge_pages_; - } - ReleaseHugepage(pt); -} - -bool HugePageAwareAllocator::AddRegion() { - HugeRange r = alloc_.Get(HugeRegion::size()); - if (!r.valid()) return false; - HugeRegion *region = region_allocator_.New(); - new (region) HugeRegion(r, SystemRelease); - regions_.Contribute(region); return true; } -void HugePageAwareAllocator::Delete(Span *span) { - ASSERT(!span || GetMemoryTag(span->start_address()) == tag_); - PageId p = span->first_page(); - HugePage hp = HugePageContaining(p); - Length n = span->num_pages(); - info_.RecordFree(p, n); - - Span::Delete(span); - - // The tricky part, as with so many allocators: where did we come from? - // There are several possibilities. - FillerType::Tracker *pt = GetTracker(hp); - // a) We got packed by the filler onto a single hugepage - return our - // allocation to that hugepage in the filler. - if (ABSL_PREDICT_TRUE(pt != nullptr)) { - ASSERT(hp == HugePageContaining(p + n - Length(1))); - DeleteFromHugepage(pt, p, n); - return; - } - - // b) We got put into a region, possibly crossing hugepages - - // return our allocation to the region. - if (regions_.MaybePut(p, n)) return; - - // c) we came straight from the HugeCache - return straight there. (We - // might have had slack put into the filler - if so, return that virtual - // allocation to the filler too!) - ASSERT(n >= kPagesPerHugePage); - HugeLength hl = HLFromPages(n); - HugePage last = hp + hl - NHugePages(1); - Length slack = hl.in_pages() - n; - if (slack == Length(0)) { - ASSERT(GetTracker(last) == nullptr); - } else { - pt = GetTracker(last); - CHECK_CONDITION(pt != nullptr); - // We put the slack into the filler (see AllocEnormous.) - // Handle this page separately as a virtual allocation - // onto the last hugepage. - PageId virt = last.first_page(); - Length virt_len = kPagesPerHugePage - slack; - pt = filler_.Put(pt, virt, virt_len); - // We may have used the slack, which would prevent us from returning - // the entire range now. If filler returned a Tracker, we are fully empty. - if (pt == nullptr) { - // Last page isn't empty -- pretend the range was shorter. - --hl; - } else { - // Last page was empty - but if we sub-released it, we still - // have to split it off and release it independently.) - if (pt->released()) { - --hl; - ReleaseHugepage(pt); - } else { - // Get rid of the tracker *object*, but not the *hugepage* - // (which is still part of our range.) We were able to reclaim the - // contributed slack. - --donated_huge_pages_; - SetTracker(pt->location(), nullptr); - tracker_allocator_.Delete(pt); - } - } - } - cache_.Release({hp, hl}); -} - -void HugePageAwareAllocator::ReleaseHugepage(FillerType::Tracker *pt) { - ASSERT(pt->used_pages() == Length(0)); - HugeRange r = {pt->location(), NHugePages(1)}; - SetTracker(pt->location(), nullptr); - - if (pt->released()) { - cache_.ReleaseUnbacked(r); - } else { - cache_.Release(r); - } - - tracker_allocator_.Delete(pt); -} - -// public -BackingStats HugePageAwareAllocator::stats() const { - BackingStats stats = alloc_.stats(); - const auto actual_system = stats.system_bytes; - stats += cache_.stats(); - stats += filler_.stats(); - stats += regions_.stats(); - // the "system" (total managed) byte count is wildly double counted, - // since it all comes from HugeAllocator but is then managed by - // cache/regions/filler. Adjust for that. - stats.system_bytes = actual_system; - return stats; -} - -// public -void HugePageAwareAllocator::GetSmallSpanStats(SmallSpanStats *result) { - GetSpanStats(result, nullptr, nullptr); -} - -// public -void HugePageAwareAllocator::GetLargeSpanStats(LargeSpanStats *result) { - GetSpanStats(nullptr, result, nullptr); +HugeRegionUsageOption huge_region_option() { + // By default, we use slack to determine when to use HugeRegion. When slack is + // greater than 64MB (to ignore small binaries), and greater than the number + // of small allocations, we allocate large allocations from HugeRegion. + // + // When huge-region-more-often feature is enabled, we use number of abandoned + // pages in addition to slack to make a decision. If the size of abandoned + // pages plus slack exceeds 64MB (to ignore small binaries), we use HugeRegion + // for large allocations. This results in using HugeRegions for all the large + // allocations once the size exceeds 64MB. + return use_huge_region_more_often() + ? HugeRegionUsageOption::kUseForAllLargeAllocs + : HugeRegionUsageOption::kDefault; } -void HugePageAwareAllocator::GetSpanStats(SmallSpanStats *small, - LargeSpanStats *large, - PageAgeHistograms *ages) { - if (small != nullptr) { - *small = SmallSpanStats(); - } - if (large != nullptr) { - *large = LargeSpanStats(); - } +Arena& StaticForwarder::arena() { return tc_globals.arena(); } - alloc_.AddSpanStats(small, large, ages); - filler_.AddSpanStats(small, large, ages); - regions_.AddSpanStats(small, large, ages); - cache_.AddSpanStats(small, large, ages); +void* StaticForwarder::GetHugepage(HugePage p) { + return tc_globals.pagemap().GetHugepage(p.first_page()); } -// public -Length HugePageAwareAllocator::ReleaseAtLeastNPages(Length num_pages) { - Length released; - released += cache_.ReleaseCachedPages(HLFromPages(num_pages)).in_pages(); - - // This is our long term plan but in current state will lead to insufficent - // THP coverage. It is however very useful to have the ability to turn this on - // for testing. - // TODO(b/134690769): make this work, remove the flag guard. - if (Parameters::hpaa_subrelease()) { - if (released < num_pages) { - released += filler_.ReleasePages( - num_pages - released, Parameters::filler_skip_subrelease_interval(), - /*hit_limit*/ false); - } - } - - // TODO(b/134690769): - // - perhaps release region? - // - refuse to release if we're too close to zero? - info_.RecordRelease(num_pages, released); - return released; -} +bool StaticForwarder::Ensure(Range r) { return tc_globals.pagemap().Ensure(r); } -static double BytesToMiB(size_t bytes) { - const double MiB = 1048576.0; - return bytes / MiB; +void StaticForwarder::Set(PageId page, Span* span) { + tc_globals.pagemap().Set(page, span); } -static void BreakdownStats(Printer *out, const BackingStats &s, - const char *label) { - out->printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label, - BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes), - BytesToMiB(s.free_bytes), BytesToMiB(s.unmapped_bytes)); +void StaticForwarder::SetHugepage(HugePage p, void* pt) { + tc_globals.pagemap().SetHugepage(p.first_page(), pt); } -static void BreakdownStatsInPbtxt(PbtxtRegion *hpaa, const BackingStats &s, - const char *key) { - auto usage = hpaa->CreateSubRegion(key); - usage.PrintI64("used", s.system_bytes - s.free_bytes - s.unmapped_bytes); - usage.PrintI64("free", s.free_bytes); - usage.PrintI64("unmapped", s.unmapped_bytes); +void StaticForwarder::ShrinkToUsageLimit(Length n) { + tc_globals.page_allocator().ShrinkToUsageLimit(n); } -// public -void HugePageAwareAllocator::Print(Printer *out) { Print(out, true); } - -void HugePageAwareAllocator::Print(Printer *out, bool everything) { - SmallSpanStats small; - LargeSpanStats large; - BackingStats bstats; - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - absl::base_internal::SpinLockHolder h(&pageheap_lock); - bstats = stats(); - GetSpanStats(&small, &large, &ages); - PrintStats("HugePageAware", out, bstats, small, large, everything); - out->printf( - "\nHuge page aware allocator components:\n" - "------------------------------------------------\n"); - out->printf("HugePageAware: breakdown of used / free / unmapped space:\n"); - - auto fstats = filler_.stats(); - BreakdownStats(out, fstats, "HugePageAware: filler"); - - auto rstats = regions_.stats(); - BreakdownStats(out, rstats, "HugePageAware: region"); - - auto cstats = cache_.stats(); - // Everything in the filler came from the cache - - // adjust the totals so we see the amount used by the mutator. - cstats.system_bytes -= fstats.system_bytes; - BreakdownStats(out, cstats, "HugePageAware: cache "); - - auto astats = alloc_.stats(); - // Everything in *all* components came from here - - // so again adjust the totals. - astats.system_bytes -= (fstats + rstats + cstats).system_bytes; - BreakdownStats(out, astats, "HugePageAware: alloc "); - out->printf("\n"); - - out->printf("HugePageAware: filler donations %zu\n", - donated_huge_pages_.raw_num()); - - // Component debug output - // Filler is by far the most important; print (some) of it - // unconditionally. - filler_.Print(out, everything); - out->printf("\n"); - if (everything) { - regions_.Print(out); - out->printf("\n"); - cache_.Print(out); - out->printf("\n"); - alloc_.Print(out); - out->printf("\n"); - - // Use statistics - info_.Print(out); - - // and age tracking. - ages.Print("HugePageAware", out); - } - - out->printf("PARAMETER hpaa_subrelease %d\n", - Parameters::hpaa_subrelease() ? 1 : 0); +Span* StaticForwarder::NewSpan(Range r) { + // TODO(b/134687001): Delete this when span_allocator moves. + return Span::New(r); } -void HugePageAwareAllocator::PrintInPbtxt(PbtxtRegion *region) { - SmallSpanStats small; - LargeSpanStats large; - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - absl::base_internal::SpinLockHolder h(&pageheap_lock); - GetSpanStats(&small, &large, &ages); - PrintStatsInPbtxt(region, small, large, ages); - { - auto hpaa = region->CreateSubRegion("huge_page_allocator"); - hpaa.PrintBool("using_hpaa", true); - hpaa.PrintBool("using_hpaa_subrelease", Parameters::hpaa_subrelease()); - - // Fill HPAA Usage - auto fstats = filler_.stats(); - BreakdownStatsInPbtxt(&hpaa, fstats, "filler_usage"); - - auto rstats = regions_.stats(); - BreakdownStatsInPbtxt(&hpaa, rstats, "region_usage"); +void StaticForwarder::DeleteSpan(Span* span) { Span::Delete(span); } - auto cstats = cache_.stats(); - // Everything in the filler came from the cache - - // adjust the totals so we see the amount used by the mutator. - cstats.system_bytes -= fstats.system_bytes; - BreakdownStatsInPbtxt(&hpaa, cstats, "cache_usage"); - - auto astats = alloc_.stats(); - // Everything in *all* components came from here - - // so again adjust the totals. - astats.system_bytes -= (fstats + rstats + cstats).system_bytes; - BreakdownStatsInPbtxt(&hpaa, astats, "alloc_usage"); - - filler_.PrintInPbtxt(&hpaa); - regions_.PrintInPbtxt(&hpaa); - cache_.PrintInPbtxt(&hpaa); - alloc_.PrintInPbtxt(&hpaa); - - // Use statistics - info_.PrintInPbtxt(&hpaa, "hpaa_stat"); - - hpaa.PrintI64("filler_donated_huge_pages", donated_huge_pages_.raw_num()); - } -} - -template -void *HugePageAwareAllocator::AllocAndReport(size_t bytes, size_t *actual, - size_t align) { - void *p = SystemAlloc(bytes, actual, align, tag); - if (p == nullptr) return p; - const PageId page = PageIdContaining(p); - const Length page_len = BytesToLengthFloor(*actual); - Static::pagemap().Ensure(page, page_len); - return p; +AddressRange StaticForwarder::AllocatePages(size_t bytes, size_t align, + MemoryTag tag) { + return tc_globals.system_allocator().Allocate(bytes, align, tag); } -void *HugePageAwareAllocator::MetaDataAlloc(size_t bytes) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - return Static::arena().Alloc(bytes); +void StaticForwarder::Back(Range r) { + tc_globals.system_allocator().Back(r.start_addr(), r.in_bytes()); } -Length HugePageAwareAllocator::ReleaseAtLeastNPagesBreakingHugepages(Length n) { - // We desparately need to release memory, and are willing to - // compromise on hugepage usage. That means releasing from the filler. - return filler_.ReleasePages(n, absl::ZeroDuration(), /*hit_limit*/ true); +bool StaticForwarder::ReleasePages(Range r) { + return tc_globals.system_allocator().Release(r.start_addr(), r.in_bytes()); } -void HugePageAwareAllocator::UnbackWithoutLock(void *start, size_t length) { - pageheap_lock.Unlock(); - SystemRelease(start, length); - pageheap_lock.Lock(); -} +} // namespace huge_page_allocator_internal } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h index c36a1e515e15..ea2bb02d9dcf 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,18 +18,29 @@ #include +#include "absl/base/attributes.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/base/optimization.h" #include "absl/base/thread_annotations.h" +#include "absl/time/time.h" #include "tcmalloc/arena.h" +#include "tcmalloc/central_freelist.h" #include "tcmalloc/common.h" #include "tcmalloc/huge_allocator.h" #include "tcmalloc/huge_cache.h" #include "tcmalloc/huge_page_filler.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" #include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/allocation_guard.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/prefetch.h" +#include "tcmalloc/metadata_allocator.h" +#include "tcmalloc/metadata_object_allocator.h" #include "tcmalloc/page_allocator_interface.h" -#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/parameters.h" #include "tcmalloc/span.h" #include "tcmalloc/stats.h" #include "tcmalloc/system-alloc.h" @@ -36,31 +48,130 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +namespace huge_page_allocator_internal { bool decide_subrelease(); -// An implementation of the PageAllocator interface that is hugepage-efficent. +HugeRegionUsageOption huge_region_option(); +bool use_huge_region_more_often(); + +class StaticForwarder { + public: + // Runtime parameters. This can change between calls. + static absl::Duration filler_skip_subrelease_short_interval() { + return Parameters::filler_skip_subrelease_short_interval(); + } + static absl::Duration filler_skip_subrelease_long_interval() { + return Parameters::filler_skip_subrelease_long_interval(); + } + static absl::Duration cache_demand_release_short_interval() { + return Parameters::cache_demand_release_short_interval(); + } + static absl::Duration cache_demand_release_long_interval() { + return Parameters::cache_demand_release_long_interval(); + } + + static bool release_partial_alloc_pages() { + return Parameters::release_partial_alloc_pages(); + } + + static bool huge_region_demand_based_release() { + return Parameters::huge_region_demand_based_release(); + } + + static bool huge_cache_demand_based_release() { + return Parameters::huge_cache_demand_based_release(); + } + + static bool hpaa_subrelease() { return Parameters::hpaa_subrelease(); } + + // Arena state. + static Arena& arena(); + + // PageAllocator state. + + // Check page heap memory limit. `n` indicates the size of the allocation + // currently being made, which will not be included in the sampled memory heap + // for realized fragmentation estimation. + static void ShrinkToUsageLimit(Length n) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // PageMap state. + static void* GetHugepage(HugePage p); + [[nodiscard]] static bool Ensure(Range r) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + static void Set(PageId page, Span* span); + static void SetHugepage(HugePage p, void* pt); + + // SpanAllocator state. + static Span* NewSpan(Range r) +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) +#else + ABSL_LOCKS_EXCLUDED(pageheap_lock) +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_ATTRIBUTE_RETURNS_NONNULL; + static void DeleteSpan(Span* span) +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_ATTRIBUTE_NONNULL(); + + // SystemAlloc state. + [[nodiscard]] static AddressRange AllocatePages(size_t bytes, size_t align, + MemoryTag tag); + static void Back(Range r); + [[nodiscard]] static bool ReleasePages(Range r); +}; + +struct HugePageAwareAllocatorOptions { + MemoryTag tag; + HugeRegionUsageOption use_huge_region_more_often = huge_region_option(); + HugePageFillerDenseTrackerType dense_tracker_type = + Parameters::dense_trackers_sorted_on_spans_allocated() + ? HugePageFillerDenseTrackerType::kSpansAllocated + : HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks; + absl::Duration huge_cache_time = Parameters::huge_cache_release_time(); +}; + +// An implementation of the PageAllocator interface that is hugepage-efficient. // Attempts to pack allocations into full hugepages wherever possible, // and aggressively returns empty ones to the system. +// +// Some notes: locking discipline here is a bit funny, because +// we want to *not* hold the pageheap lock while backing memory. +// +// We have here a collection of slightly different allocators each +// optimized for slightly different purposes. This file has two main purposes: +// - pick the right one for a given allocation +// - provide enough data to figure out what we picked last time! + +template class HugePageAwareAllocator final : public PageAllocatorInterface { public: - explicit HugePageAwareAllocator(MemoryTag tag); + explicit HugePageAwareAllocator(const HugePageAwareAllocatorOptions& options); ~HugePageAwareAllocator() override = default; // Allocate a run of "n" pages. Returns zero if out of memory. // Caller should not pass "n == 0" -- instead, n should have // been rounded up already. - Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; + Span* New(Length n, SpanAllocInfo span_alloc_info) + ABSL_LOCKS_EXCLUDED(pageheap_lock) override; // As New, but the returned span is aligned to a -page boundary. // must be a power of two. - Span* NewAligned(Length n, Length align) + Span* NewAligned(Length n, Length align, SpanAllocInfo span_alloc_info) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; // Delete the span "[p, p+n-1]". // REQUIRES: span was returned by earlier call to New() and // has not yet been deleted. +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + + void Delete(AllocationState s) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; BackingStats stats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; @@ -77,52 +188,164 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { // may also be larger than num_pages since page_heap might decide to // release one large range instead of fragmenting it into two // smaller released and unreleased ranges. - Length ReleaseAtLeastNPages(Length num_pages) + Length ReleaseAtLeastNPages(Length num_pages, PageReleaseReason reason) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; - Length ReleaseAtLeastNPagesBreakingHugepages(Length n) + Length ReleaseAtLeastNPagesBreakingHugepages(Length n, + PageReleaseReason reason) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + PageReleaseStats GetReleaseStats() const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + // Prints stats about the page heap to *out. - void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; + void Print(Printer& out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; // Print stats to *out, excluding long/likely uninteresting things // unless is true. - void Print(Printer* out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock); + void Print(Printer& out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock); - void PrintInPbtxt(PbtxtRegion* region) + void PrintInPbtxt(PbtxtRegion& region) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; + BackingStats FillerStats() const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return filler_.stats(); + } + + BackingStats RegionsStats() const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return regions_.stats(); + } + + BackingStats CacheStats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return cache_.stats(); + } + HugeLength DonatedHugePages() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { return donated_huge_pages_; } + HugeLength RegionsFreeBacked() const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return regions_.free_backed(); + } + + // Number of pages that have been retained on huge pages by donations that did + // not reassemble by the time the larger allocation was deallocated. + Length AbandonedPages() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return abandoned_pages_; + } + const HugeCache* cache() const { return &cache_; } + const HugeRegionSet& region() const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return regions_; + }; + + // IsValidSizeClass verifies size class parameters from the HPAA perspective. + static bool IsValidSizeClass(size_t size, size_t pages); + + Forwarder& forwarder() { return forwarder_; } + private: - typedef HugePageFiller> FillerType; - FillerType filler_; + static constexpr Length kSmallAllocPages = kPagesPerHugePage / 2; - // Calls SystemRelease, but with dropping of pageheap_lock around the call. - static void UnbackWithoutLock(void* start, size_t length) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + class Unback final : public MemoryModifyFunction { + public: + explicit Unback(HugePageAwareAllocator& hpaa ABSL_ATTRIBUTE_LIFETIME_BOUND) + : hpaa_(hpaa) {} + + [[nodiscard]] bool operator()(Range r) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { +#ifndef NDEBUG + pageheap_lock.AssertHeld(); +#endif // NDEBUG + return hpaa_.forwarder_.ReleasePages(r); + } + + public: + HugePageAwareAllocator& hpaa_; + }; + + class UnbackWithoutLock final : public MemoryModifyFunction { + public: + explicit UnbackWithoutLock( + HugePageAwareAllocator& hpaa ABSL_ATTRIBUTE_LIFETIME_BOUND) + : hpaa_(hpaa) {} + + [[nodiscard]] bool operator()(Range r) override + ABSL_NO_THREAD_SAFETY_ANALYSIS { +#ifndef NDEBUG + pageheap_lock.AssertHeld(); +#endif // NDEBUG + pageheap_lock.Unlock(); + bool ret = hpaa_.forwarder_.ReleasePages(r); + pageheap_lock.Lock(); + return ret; + } + + public: + HugePageAwareAllocator& hpaa_; + }; + + ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Forwarder forwarder_; + + Unback unback_ ABSL_GUARDED_BY(pageheap_lock); + UnbackWithoutLock unback_without_lock_ ABSL_GUARDED_BY(pageheap_lock); + + typedef HugePageFiller FillerType; + FillerType filler_ ABSL_GUARDED_BY(pageheap_lock); + + class VirtualMemoryAllocator final : public VirtualAllocator { + public: + explicit VirtualMemoryAllocator( + HugePageAwareAllocator& hpaa ABSL_ATTRIBUTE_LIFETIME_BOUND) + : hpaa_(hpaa) {} + + [[nodiscard]] AddressRange operator()(size_t bytes, size_t align) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return hpaa_.AllocAndReport(bytes, align); + } + + private: + HugePageAwareAllocator& hpaa_; + }; - HugeRegionSet regions_; + class ArenaMetadataAllocator final : public MetadataAllocator { + public: + explicit ArenaMetadataAllocator( + HugePageAwareAllocator& hpaa ABSL_ATTRIBUTE_LIFETIME_BOUND) + : hpaa_(hpaa) {} - PageHeapAllocator tracker_allocator_; - PageHeapAllocator region_allocator_; + [[nodiscard]] void* operator()(size_t bytes) override { + return hpaa_.forwarder_.arena().Alloc(bytes); + } + + public: + HugePageAwareAllocator& hpaa_; + }; + + HugeRegionSet regions_ ABSL_GUARDED_BY(pageheap_lock); + + MetadataObjectAllocator tracker_allocator_ + ABSL_GUARDED_BY(pageheap_lock); + MetadataObjectAllocator region_allocator_ + ABSL_GUARDED_BY(pageheap_lock); FillerType::Tracker* GetTracker(HugePage p); void SetTracker(HugePage p, FillerType::Tracker* pt); - template - static void* AllocAndReport(size_t bytes, size_t* actual, size_t align) + AddressRange AllocAndReport(size_t bytes, size_t align) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - static void* MetaDataAlloc(size_t bytes); - HugeAllocator alloc_; - HugeCache cache_; + + VirtualMemoryAllocator vm_allocator_ ABSL_GUARDED_BY(pageheap_lock); + ArenaMetadataAllocator metadata_allocator_ ABSL_GUARDED_BY(pageheap_lock); + HugeAllocator alloc_ ABSL_GUARDED_BY(pageheap_lock); + HugeCache cache_ ABSL_GUARDED_BY(pageheap_lock); // donated_huge_pages_ measures the number of huge pages contributed to the // filler from left overs of large huge page allocations. When the large @@ -130,30 +353,50 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { // fully reassemble the address range (that is, the partial hugepage did not // get stuck in the filler). HugeLength donated_huge_pages_ ABSL_GUARDED_BY(pageheap_lock); + // abandoned_pages_ tracks the number of pages contributed to the filler after + // a donating allocation is deallocated but the entire huge page has not been + // reassembled. + Length abandoned_pages_ ABSL_GUARDED_BY(pageheap_lock); - void GetSpanStats(SmallSpanStats* small, LargeSpanStats* large, - PageAgeHistograms* ages); + void GetSpanStats(SmallSpanStats* small, LargeSpanStats* large) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - PageId RefillFiller(Length n, bool* from_released) + PageId RefillFiller(Length n, SpanAllocInfo span_alloc_info, + bool* from_released) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Allocate the first from p, and contribute the rest to the filler. If // "donated" is true, the contribution will be marked as coming from the // tail of a multi-hugepage alloc. Returns the allocated section. - PageId AllocAndContribute(HugePage p, Length n, bool donated) + PageId AllocAndContribute(HugePage p, Length n, SpanAllocInfo span_alloc_info, + bool donated) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Helpers for New(). - Span* LockAndAlloc(Length n, bool* from_released); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + using FinalizeType = Span*; +#else // !TCMALLOC_INTERNAL_LEGACY_LOCKING + struct FinalizeType { + Range r; + bool donated = false; + }; +#endif // !TCMALLOC_INTERNAL_LEGACY_LOCKING + + FinalizeType LockAndAlloc(Length n, SpanAllocInfo span_alloc_info, + bool* from_released); - Span* AllocSmall(Length n, bool* from_released) + FinalizeType AllocSmall(Length n, SpanAllocInfo span_alloc_info, + bool* from_released) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - Span* AllocLarge(Length n, bool* from_released) + FinalizeType AllocLarge(Length n, SpanAllocInfo span_alloc_info, + bool* from_released) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - Span* AllocEnormous(Length n, bool* from_released) + FinalizeType AllocEnormous(Length n, SpanAllocInfo span_alloc_info, + bool* from_released) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - Span* AllocRawHugepages(Length n, bool* from_released) + FinalizeType AllocRawHugepages(Length n, SpanAllocInfo span_alloc_info, + bool* from_released) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); bool AddRegion() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); @@ -161,13 +404,808 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { void ReleaseHugepage(FillerType::Tracker* pt) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Return an allocation from a single hugepage. - void DeleteFromHugepage(FillerType::Tracker* pt, PageId p, Length n) + void DeleteFromHugepage(FillerType::Tracker* pt, Range r, bool might_abandon) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Finish an allocation request - give it a span and mark it in the pagemap. - Span* Finalize(Length n, PageId page); + FinalizeType Finalize(Range r); + + Span* Spanify(FinalizeType f); + + // Whether this HPAA should use subrelease. This delegates to the appropriate + // parameter depending whether this is for the cold heap or another heap. + bool hpaa_subrelease() const; }; +template +inline HugePageAwareAllocator::HugePageAwareAllocator( + const HugePageAwareAllocatorOptions& options) + : PageAllocatorInterface("HugePageAware", options.tag), + unback_(*this), + unback_without_lock_(*this), + filler_(options.dense_tracker_type, unback_, unback_without_lock_), + regions_(options.use_huge_region_more_often), + tracker_allocator_(forwarder_.arena()), + region_allocator_(forwarder_.arena()), + vm_allocator_(*this), + metadata_allocator_(*this), + alloc_(vm_allocator_, metadata_allocator_), + cache_(HugeCache{&alloc_, metadata_allocator_, unback_without_lock_, + options.huge_cache_time}) {} + +template +inline HugePageAwareAllocator::FillerType::Tracker* +HugePageAwareAllocator::GetTracker(HugePage p) { + void* v = forwarder_.GetHugepage(p); + FillerType::Tracker* pt = reinterpret_cast(v); + TC_ASSERT(pt == nullptr || pt->location() == p); + return pt; +} + +template +inline void HugePageAwareAllocator::SetTracker( + HugePage p, HugePageAwareAllocator::FillerType::Tracker* pt) { + forwarder_.SetHugepage(p, pt); +} + +template +inline PageId HugePageAwareAllocator::AllocAndContribute( + HugePage p, Length n, SpanAllocInfo span_alloc_info, bool donated) { + TC_CHECK_NE(p.start_addr(), nullptr); + FillerType::Tracker* pt = tracker_allocator_.New( + p, donated, absl::base_internal::CycleClock::Now()); + TC_ASSERT_GE(pt->longest_free_range(), n); + TC_ASSERT_EQ(pt->was_donated(), donated); + // if the page was donated, we track its size so that we can potentially + // measure it in abandoned_count_ once this large allocation gets deallocated. + if (pt->was_donated()) { + pt->set_abandoned_count(n); + } + PageId page = pt->Get(n).page; + TC_ASSERT_EQ(page, p.first_page()); + SetTracker(p, pt); + filler_.Contribute(pt, donated, span_alloc_info); + TC_ASSERT_EQ(pt->was_donated(), donated); + return page; +} + +template +inline PageId HugePageAwareAllocator::RefillFiller( + Length n, SpanAllocInfo span_alloc_info, bool* from_released) { + HugeRange r = cache_.Get(NHugePages(1), from_released); + if (!r.valid()) return PageId{0}; + // This is duplicate to Finalize, but if we need to break up + // hugepages to get to our usage limit it would be very bad to break + // up what's left of r after we allocate from there--while r is + // mostly empty, clearly what's left in the filler is too fragmented + // to be very useful, and we would rather release those + // pages. Otherwise, we're nearly guaranteed to release r (if n + // isn't very large), and the next allocation will just repeat this + // process. + forwarder_.ShrinkToUsageLimit(n); + return AllocAndContribute(r.start(), n, span_alloc_info, /*donated=*/false); +} + +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::Finalize(Range r) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + TC_ASSERT_NE(r.p, PageId{0}); + info_.RecordAlloc(r); + forwarder_.ShrinkToUsageLimit(r.n); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + // TODO(b/175334169): Lift Span creation out of LockAndAlloc. + Span* ret = forwarder_.NewSpan(r); + forwarder_.Set(r.p, ret); + TC_ASSERT(!ret->sampled()); + return ret; +#else + return {r, false}; +#endif +} + +// For anything <= half a huge page, we will unconditionally use the filler +// to pack it into a single page. If we need another page, that's fine. +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::AllocSmall(Length n, + SpanAllocInfo span_alloc_info, + bool* from_released) { + auto [pt, page, released] = filler_.TryGet(n, span_alloc_info); + *from_released = released; + if (ABSL_PREDICT_TRUE(pt != nullptr)) { + return Finalize(Range(page, n)); + } + + page = RefillFiller(n, span_alloc_info, from_released); + if (ABSL_PREDICT_FALSE(page == PageId{0})) { + return {}; + } + return Finalize(Range(page, n)); +} + +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::AllocLarge(Length n, + SpanAllocInfo span_alloc_info, + bool* from_released) { + // If it's an exact page multiple, just pull it from pages directly. + HugeLength hl = HLFromPages(n); + if (hl.in_pages() == n) { + return AllocRawHugepages(n, span_alloc_info, from_released); + } + + PageId page; + // If we fit in a single hugepage, try the Filler.p. + if (n < kPagesPerHugePage) { + auto [pt, page, released] = filler_.TryGet(n, span_alloc_info); + *from_released = released; + if (ABSL_PREDICT_TRUE(pt != nullptr)) { + return Finalize(Range(page, n)); + } + } + + // If we're using regions in this binary (see below comment), is + // there currently available space there? + if (regions_.MaybeGet(n, &page, from_released)) { + return Finalize(Range(page, n)); + } + + // We have two choices here: allocate a new region or go to + // hugepages directly (hoping that slack will be filled by small + // allocation.) The second strategy is preferable, as it's + // typically faster and usually more space efficient, but it's sometimes + // catastrophic. + // + // See https://github.com/google/tcmalloc/tree/master/docs/regions-are-not-optional.md + // + // So test directly if we're in the bad case--almost no binaries are. + // If not, just fall back to direct allocation (and hope we do hit that case!) + const Length slack = info_.slack(); + const Length donated = + regions_.UseHugeRegionMoreOften() ? abandoned_pages_ + slack : slack; + // Don't bother at all until the binary is reasonably sized. + if (donated < HLFromBytes(64 * 1024 * 1024).in_pages()) { + return AllocRawHugepages(n, span_alloc_info, from_released); + } + + // In the vast majority of binaries, we have many small allocations which + // will nicely fill slack. (Fleetwide, the average ratio is 15:1; only + // a handful of binaries fall below 1:1.) + // + // If we enable an experiment that tries to use huge regions more frequently, + // we skip the check. + const Length small = info_.small(); + if (slack < small && !regions_.UseHugeRegionMoreOften()) { + return AllocRawHugepages(n, span_alloc_info, from_released); + } + + // We couldn't allocate a new region. They're oversized, so maybe we'd get + // lucky with a smaller request? + if (!AddRegion()) { + return AllocRawHugepages(n, span_alloc_info, from_released); + } + + TC_CHECK(regions_.MaybeGet(n, &page, from_released)); + return Finalize(Range(page, n)); +} + +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::AllocEnormous(Length n, + SpanAllocInfo span_alloc_info, + bool* from_released) { + return AllocRawHugepages(n, span_alloc_info, from_released); +} + +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::AllocRawHugepages( + Length n, SpanAllocInfo span_alloc_info, bool* from_released) { + HugeLength hl = HLFromPages(n); + + HugeRange r = cache_.Get(hl, from_released); + if (!r.valid()) return {}; + + // We now have a huge page range that covers our request. There + // might be some slack in it if n isn't a multiple of + // kPagesPerHugePage. Add the hugepage with slack to the filler, + // pretending the non-slack portion is a smaller allocation. + Length total = hl.in_pages(); + Length slack = total - n; + HugePage first = r.start(); + SetTracker(first, nullptr); + HugePage last = first + r.len() - NHugePages(1); + if (slack == Length(0)) { + SetTracker(last, nullptr); + return Finalize(Range(r.start().first_page(), total)); + } + + ++donated_huge_pages_; + + Length here = kPagesPerHugePage - slack; + TC_ASSERT_GT(here, Length(0)); + AllocAndContribute(last, here, span_alloc_info, /*donated=*/true); + auto span = Finalize(Range(r.start().first_page(), n)); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + span->set_donated(/*value=*/true); + return span; +#else + span.donated = true; + return span; +#endif +} + +// public +template +inline Span* HugePageAwareAllocator::New( + Length n, SpanAllocInfo span_alloc_info) { + TC_CHECK_GT(n, Length(0)); + bool from_released; + Span* s = Spanify(LockAndAlloc(n, span_alloc_info, &from_released)); + if (s) { + // Prefetch for writing, as we anticipate using the memory soon. + PrefetchW(s->start_address()); + if (from_released) { + forwarder_.Back(Range(s->first_page(), s->num_pages())); + } + } + TC_ASSERT(!s || GetMemoryTag(s->start_address()) == tag_); + return s; +} + +template +inline typename HugePageAwareAllocator::FinalizeType +HugePageAwareAllocator::LockAndAlloc(Length n, + SpanAllocInfo span_alloc_info, + bool* from_released) { + PageHeapSpinLockHolder l; + // Our policy depends on size. For small things, we will pack them + // into single hugepages. + if (n <= kSmallAllocPages) { + return AllocSmall(n, span_alloc_info, from_released); + } + + // For anything too big for the filler, we use either a direct hugepage + // allocation, or possibly the regions if we are worried about slack. + if (n <= HugeRegion::size().in_pages()) { + return AllocLarge(n, span_alloc_info, from_released); + } + + // In the worst case, we just fall back to directly allocating a run + // of hugepages. + return AllocEnormous(n, span_alloc_info, from_released); +} + +// public +template +inline Span* HugePageAwareAllocator::NewAligned( + Length n, Length align, SpanAllocInfo span_alloc_info) { + if (align <= Length(1)) { + return New(n, span_alloc_info); + } + + // we can do better than this, but... + // TODO(b/134690769): support higher align. + TC_CHECK_LE(align, kPagesPerHugePage); + bool from_released; + FinalizeType f; + { + PageHeapSpinLockHolder l; + f = AllocRawHugepages(n, span_alloc_info, &from_released); + } + Span* s = Spanify(f); + if (s && from_released) { + forwarder_.Back(Range(s->first_page(), s->num_pages())); + } + + TC_ASSERT(!s || GetMemoryTag(s->start_address()) == tag_); + return s; +} + +template +inline Span* HugePageAwareAllocator::Spanify(FinalizeType f) { +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + return f; +#else + if (ABSL_PREDICT_FALSE(f.r.p == PageId{0})) { + return nullptr; + } + + Span* s = forwarder_.NewSpan(f.r); + forwarder_.Set(f.r.p, s); + TC_ASSERT(!s->sampled()); + s->set_donated(f.donated); + return s; +#endif +} + +template +inline void HugePageAwareAllocator::DeleteFromHugepage( + FillerType::Tracker* pt, Range r, bool might_abandon) { + if (ABSL_PREDICT_TRUE(filler_.Put(pt, r) == nullptr)) { + // If this allocation had resulted in a donation to the filler, we record + // these pages as abandoned. + if (ABSL_PREDICT_FALSE(might_abandon)) { + TC_ASSERT(pt->was_donated()); + abandoned_pages_ += pt->abandoned_count(); + pt->set_abandoned(true); + } + return; + } + if (pt->was_donated()) { + --donated_huge_pages_; + if (pt->abandoned()) { + abandoned_pages_ -= pt->abandoned_count(); + pt->set_abandoned(false); + } + } else { + TC_ASSERT_EQ(pt->abandoned_count(), Length(0)); + } + ReleaseHugepage(pt); +} + +template +inline bool HugePageAwareAllocator::AddRegion() { + HugeRange r = alloc_.Get(HugeRegion::size()); + if (!r.valid()) return false; + HugeRegion* region = region_allocator_.New(r, unback_); + regions_.Contribute(region); + return true; +} + +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING +template +inline void HugePageAwareAllocator::Delete(Span* span) { + TC_ASSERT(!span || GetMemoryTag(span->start_address()) == tag_); + PageId p = span->first_page(); + Length n = span->num_pages(); + + bool donated = span->donated(); + forwarder_.DeleteSpan(span); + + Delete(AllocationState{Range{p, n}, donated}); +} +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + +template +inline void HugePageAwareAllocator::Delete(AllocationState s) { + const PageId p = s.r.p; + const HugePage hp = HugePageContaining(p); + const Length n = s.r.n; + info_.RecordFree(Range(p, n)); + + // Clear the descriptor of the page so a second pass through the same page + // could trigger the check on `span != nullptr` in do_free_pages. + forwarder_.Set(p, nullptr); + + const bool might_abandon = s.donated; + + // The tricky part, as with so many allocators: where did we come from? + // There are several possibilities. + FillerType::Tracker* pt = GetTracker(hp); + // a) We got packed by the filler onto a single hugepage - return our + // allocation to that hugepage in the filler. + if (ABSL_PREDICT_TRUE(pt != nullptr)) { + TC_ASSERT_EQ(hp, HugePageContaining(p + n - Length(1))); + DeleteFromHugepage(pt, Range(p, n), might_abandon); + return; + } + + // b) We got put into a region, possibly crossing hugepages - + // return our allocation to the region. + if (regions_.MaybePut(Range(p, n))) return; + + // c) we came straight from the HugeCache - return straight there. (We + // might have had slack put into the filler - if so, return that virtual + // allocation to the filler too!) + TC_ASSERT_GE(n, kPagesPerHugePage); + HugeLength hl = HLFromPages(n); + HugePage last = hp + hl - NHugePages(1); + Length slack = hl.in_pages() - n; + if (slack == Length(0)) { + TC_ASSERT_EQ(GetTracker(last), nullptr); + } else { + pt = GetTracker(last); + TC_CHECK_NE(pt, nullptr); + TC_ASSERT(pt->was_donated()); + // We put the slack into the filler (see AllocEnormous.) + // Handle this page separately as a virtual allocation + // onto the last hugepage. + PageId virt = last.first_page(); + Length virt_len = kPagesPerHugePage - slack; + // We may have used the slack, which would prevent us from returning + // the entire range now. If filler returned a Tracker, we are fully empty. + if (filler_.Put(pt, Range(virt, virt_len)) == nullptr) { + // Last page isn't empty -- pretend the range was shorter. + --hl; + + // Note that we abandoned virt_len pages with pt. These can be reused for + // other allocations, but this can contribute to excessive slack in the + // filler. + abandoned_pages_ += pt->abandoned_count(); + pt->set_abandoned(true); + } else { + // Last page was empty - but if we sub-released it, we still + // have to split it off and release it independently.) + // + // We were able to reclaim the donated slack. + --donated_huge_pages_; + TC_ASSERT(!pt->abandoned()); + + if (pt->released()) { + --hl; + ReleaseHugepage(pt); + } else { + // Get rid of the tracker *object*, but not the *hugepage* (which is + // still part of our range.) + SetTracker(pt->location(), nullptr); + tracker_allocator_.Delete(pt); + } + } + } + // We release in the background task instead (i.e., ReleaseAtLeastNPages()) if + // the demand-based release is enabled. + cache_.Release( + {hp, hl}, + /*demand_based_unback=*/forwarder_.huge_cache_demand_based_release()); +} + +template +inline void HugePageAwareAllocator::ReleaseHugepage( + FillerType::Tracker* pt) { + TC_ASSERT_EQ(pt->used_pages(), Length(0)); + HugeRange r = {pt->location(), NHugePages(1)}; + SetTracker(pt->location(), nullptr); + + if (pt->released()) { + cache_.ReleaseUnbacked(r); + } else { + // We release in the background task instead (i.e., ReleaseAtLeastNPages()) + // if the demand-based release is enabled. + cache_.Release( + r, + /*demand_based_unback=*/forwarder_.huge_cache_demand_based_release()); + } + + tracker_allocator_.Delete(pt); +} + +// public +template +inline BackingStats HugePageAwareAllocator::stats() const { + BackingStats stats = alloc_.stats(); + const auto actual_system = stats.system_bytes; + stats += cache_.stats(); + stats += filler_.stats(); + stats += regions_.stats(); + // the "system" (total managed) byte count is wildly double counted, + // since it all comes from HugeAllocator but is then managed by + // cache/regions/filler. Adjust for that. + stats.system_bytes = actual_system; + return stats; +} + +// public +template +inline void HugePageAwareAllocator::GetSmallSpanStats( + SmallSpanStats* result) { + GetSpanStats(result, nullptr); +} + +// public +template +inline void HugePageAwareAllocator::GetLargeSpanStats( + LargeSpanStats* result) { + GetSpanStats(nullptr, result); +} + +template +inline void HugePageAwareAllocator::GetSpanStats( + SmallSpanStats* small, LargeSpanStats* large) { + if (small != nullptr) { + *small = SmallSpanStats(); + } + if (large != nullptr) { + *large = LargeSpanStats(); + } + + alloc_.AddSpanStats(small, large); + filler_.AddSpanStats(small, large); + regions_.AddSpanStats(small, large); + cache_.AddSpanStats(small, large); +} + +// public +template +inline Length HugePageAwareAllocator::ReleaseAtLeastNPages( + Length num_pages, PageReleaseReason reason) { + // We use demand-based release for the background release but not for the + // other cases (e.g., limit hit). We achieve this by configuring the intervals + // and hit_limit accordingly. + SkipSubreleaseIntervals cache_release_intervals; + if (reason == PageReleaseReason::kProcessBackgroundActions) { + cache_release_intervals.short_interval = + forwarder_.cache_demand_release_short_interval(); + cache_release_intervals.long_interval = + forwarder_.cache_demand_release_long_interval(); + } + bool hit_limit = (reason == PageReleaseReason::kSoftLimitExceeded || + reason == PageReleaseReason::kHardLimitExceeded); + Length released; + if (forwarder_.huge_cache_demand_based_release()) { + released += + cache_ + .ReleaseCachedPagesByDemand(HLFromPages(num_pages), + cache_release_intervals, hit_limit) + .in_pages(); + } else { + released += cache_.ReleaseCachedPages(HLFromPages(num_pages)).in_pages(); + } + + // Release all backed-but-free hugepages from HugeRegion. + // TODO(b/199203282): We release all the free hugepages from HugeRegions when + // the experiment is enabled. We can also explore releasing only a desired + // number of pages. + if (regions_.UseHugeRegionMoreOften()) { + if (forwarder_.huge_region_demand_based_release()) { + Length desired = released > num_pages ? Length(0) : num_pages - released; + released += regions_.ReleasePagesByPeakDemand( + desired, + SkipSubreleaseIntervals{ + .short_interval = + forwarder_.filler_skip_subrelease_short_interval(), + .long_interval = + forwarder_.filler_skip_subrelease_long_interval()}, + /*hit_limit*/ false); + } else { + released += regions_.ReleasePages(kFractionToReleaseFromRegion); + } + } + + // This is our long term plan but in current state will lead to insufficient + // THP coverage. It is however very useful to have the ability to turn this on + // for testing. + // TODO(b/134690769): make this work, remove the flag guard. + if (hpaa_subrelease()) { + if (released < num_pages) { + released += filler_.ReleasePages( + num_pages - released, + SkipSubreleaseIntervals{ + .short_interval = + forwarder_.filler_skip_subrelease_short_interval(), + .long_interval = + forwarder_.filler_skip_subrelease_long_interval()}, + forwarder_.release_partial_alloc_pages(), + /*hit_limit*/ false); + } + } + + info_.RecordRelease(num_pages, released, reason); + return released; +} + +inline static double BytesToMiB(size_t bytes) { + const double MiB = 1048576.0; + return bytes / MiB; +} + +inline static void BreakdownStats(Printer& out, const BackingStats& s, + const char* label) { + out.printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label, + BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes), + BytesToMiB(s.free_bytes), BytesToMiB(s.unmapped_bytes)); +} + +inline static void BreakdownStatsInPbtxt(PbtxtRegion& hpaa, + const BackingStats& s, + const char* key) { + auto usage = hpaa.CreateSubRegion(key); + usage.PrintI64("used", s.system_bytes - s.free_bytes - s.unmapped_bytes); + usage.PrintI64("free", s.free_bytes); + usage.PrintI64("unmapped", s.unmapped_bytes); +} + +// public +template +inline void HugePageAwareAllocator::Print(Printer& out) { + Print(out, true); +} + +template +inline void HugePageAwareAllocator::Print(Printer& out, + bool everything) { + SmallSpanStats small; + LargeSpanStats large; + BackingStats bstats; + PageHeapSpinLockHolder l; + bstats = stats(); + GetSpanStats(&small, &large); + PrintStats("HugePageAware", out, bstats, small, large, everything); + out.printf( + "\nHuge page aware allocator components:\n" + "------------------------------------------------\n"); + out.printf("HugePageAware: breakdown of used / free / unmapped space:\n"); + + auto fstats = filler_.stats(); + BreakdownStats(out, fstats, "HugePageAware: filler "); + + auto rstats = regions_.stats(); + BreakdownStats(out, rstats, "HugePageAware: region "); + + auto cstats = cache_.stats(); + // Everything in the filler came from the cache - + // adjust the totals so we see the amount used by the mutator. + cstats.system_bytes -= fstats.system_bytes; + BreakdownStats(out, cstats, "HugePageAware: cache "); + + auto astats = alloc_.stats(); + // Everything in *all* components came from here - + // so again adjust the totals. + astats.system_bytes -= (fstats + rstats + cstats).system_bytes; + BreakdownStats(out, astats, "HugePageAware: alloc "); + out.printf("\n"); + + out.printf( + "HugePageAware: filler donations %zu (%zu pages from abandoned " + "donations)\n", + donated_huge_pages_.raw_num(), abandoned_pages_.raw_num()); + + // Component debug output + // Filler is by far the most important; print (some) of it + // unconditionally. + filler_.Print(out, everything); + out.printf("\n"); + if (everything) { + regions_.Print(out); + out.printf("\n"); + cache_.Print(out); + alloc_.Print(out); + out.printf("\n"); + + // Use statistics + info_.Print(out); + } + + out.printf("PARAMETER use_huge_region_more_often %d\n", + regions_.UseHugeRegionMoreOften() ? 1 : 0); + out.printf("PARAMETER hpaa_subrelease %d\n", hpaa_subrelease() ? 1 : 0); +} + +template +inline void HugePageAwareAllocator::PrintInPbtxt( + PbtxtRegion& region) { + SmallSpanStats small; + LargeSpanStats large; + PageHeapSpinLockHolder l; + GetSpanStats(&small, &large); + PrintStatsInPbtxt(region, small, large); + { + auto hpaa = region.CreateSubRegion("huge_page_allocator"); + hpaa.PrintBool("using_hpaa", true); + hpaa.PrintBool("using_hpaa_subrelease", hpaa_subrelease()); + hpaa.PrintBool("use_huge_region_more_often", + regions_.UseHugeRegionMoreOften()); + + // Fill HPAA Usage + auto fstats = filler_.stats(); + BreakdownStatsInPbtxt(hpaa, fstats, "filler_usage"); + + auto rstats = regions_.stats(); + BreakdownStatsInPbtxt(hpaa, rstats, "region_usage"); + + auto cstats = cache_.stats(); + // Everything in the filler came from the cache - + // adjust the totals so we see the amount used by the mutator. + cstats.system_bytes -= fstats.system_bytes; + BreakdownStatsInPbtxt(hpaa, cstats, "cache_usage"); + + auto astats = alloc_.stats(); + // Everything in *all* components came from here - + // so again adjust the totals. + astats.system_bytes -= (fstats + rstats + cstats).system_bytes; + + BreakdownStatsInPbtxt(hpaa, astats, "alloc_usage"); + + filler_.PrintInPbtxt(hpaa); + regions_.PrintInPbtxt(hpaa); + cache_.PrintInPbtxt(hpaa); + alloc_.PrintInPbtxt(hpaa); + + // Use statistics + info_.PrintInPbtxt(hpaa, "hpaa_stat"); + + hpaa.PrintI64("filler_donated_huge_pages", donated_huge_pages_.raw_num()); + hpaa.PrintI64("filler_abandoned_pages", abandoned_pages_.raw_num()); + } +} + +template +inline AddressRange HugePageAwareAllocator::AllocAndReport( + size_t bytes, size_t align) { + auto ret = forwarder_.AllocatePages(bytes, align, tag_); + if (ret.ptr == nullptr) return ret; + const PageId page = PageIdContaining(ret.ptr); + const Length page_len = BytesToLengthFloor(ret.bytes); + TC_CHECK(forwarder_.Ensure(Range(page, page_len)), + "Is something limiting virtual address space?"); + return ret; +} + +template +inline Length +HugePageAwareAllocator::ReleaseAtLeastNPagesBreakingHugepages( + Length n, PageReleaseReason reason) { + // We desperately need to release memory, and are willing to + // compromise on hugepage usage. That means releasing from the region and + // filler. + + Length released; + + if (forwarder_.huge_cache_demand_based_release()) { + released += cache_ + .ReleaseCachedPagesByDemand(HLFromPages(n), + SkipSubreleaseIntervals{}, + /*hit_limit=*/true) + .in_pages(); + } else { + released += cache_.ReleaseCachedPages(HLFromPages(n)).in_pages(); + } + + // We try to release as many free hugepages from HugeRegion as possible. + if (forwarder_.huge_region_demand_based_release()) { + released += regions_.ReleasePagesByPeakDemand( + n - released, SkipSubreleaseIntervals{}, /*hit_limit=*/true); + } else { + released += regions_.ReleasePages(/*release_fraction=*/1.0); + } + + if (released >= n) { + info_.RecordRelease(n, released, reason); + return released; + } + + released += filler_.ReleasePages(n - released, SkipSubreleaseIntervals{}, + /*release_partial_alloc_pages=*/false, + /*hit_limit=*/true); + + info_.RecordRelease(n, released, reason); + return released; +} + +template +inline PageReleaseStats HugePageAwareAllocator::GetReleaseStats() + const { + return info_.GetRecordedReleases(); +} + +template +bool HugePageAwareAllocator::IsValidSizeClass(size_t size, + size_t pages) { + // We assume that dense spans won't be donated. + size_t objects = Length(pages).in_bytes() / size; + if (objects > central_freelist_internal::kFewObjectsAllocMaxLimit && + Length(pages) > kSmallAllocPages) { + return false; + } + return true; +} + +template +inline bool HugePageAwareAllocator::hpaa_subrelease() const { + if (tag_ == MemoryTag::kCold) { + return true; + } else { + return forwarder_.hpaa_subrelease(); + } +} + +} // namespace huge_page_allocator_internal + +using HugePageAwareAllocator = + huge_page_allocator_internal::HugePageAwareAllocator< + huge_page_allocator_internal::StaticForwarder>; + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_fuzz.cc new file mode 100644 index 000000000000..60158b0ca67a --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_fuzz.cc @@ -0,0 +1,550 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "fuzztest/fuzztest.h" +#include "absl/base/attributes.h" +#include "absl/log/check.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_page_aware_allocator.h" +#include "tcmalloc/huge_page_filler.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/mock_huge_page_static_forwarder.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/sizemap.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { +using huge_page_allocator_internal::FakeStaticForwarder; +using huge_page_allocator_internal::HugePageAwareAllocator; +using huge_page_allocator_internal::HugePageAwareAllocatorOptions; + +class FakeStaticForwarderWithUnback : public FakeStaticForwarder { + public: + bool ReleasePages(Range r) { + pending_release_ += r.n; + release_callback_(); + pending_release_ -= r.n; + + return FakeStaticForwarder::ReleasePages(r); + } + + Length pending_release_; + std::function release_callback_; +}; + +void FuzzHPAA(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); + + if (size < 13 || size > 100000) { + // size < 13 for needing some entropy to initialize huge page aware + // allocator. + // + // size > 100000 for avoiding overly large inputs given we do extra + // checking. + return; + } + +#if ABSL_HAVE_ADDRESS_SANITIZER + // Since asan introduces runtime overhead, limit size of fuzz targets further. + if (size > 10000) { + return; + } +#endif + + // We interpret data as a small DSL for exploring the state space of + // HugePageAwareAllocator. + // + // [0] - Memory tag. + // [1] - HugeRegionsMode. + // [2] - HugeCache release time + // [3:4] - Reserved. + // [5] - Dense tracker type + // [6:12] - Reserved. + // + // TODO(b/271282540): Convert these to strongly typed fuzztest parameters. + // + // Afterwards, we read 9 bytes at a time until the buffer is exhausted. + // [i + 0] - Specifies an operation to perform on the allocator + // [i + 1, i + 8] - Specifies an integer. We use this as a source of + // deterministic entropy to allow inputs to be replayed. + // For example, this input can provide a Length to + // allocate, or the index of the previous allocation to + // deallocate. + + constexpr MemoryTag kTagOptions[] = { + MemoryTag::kSampled, MemoryTag::kNormalP0, MemoryTag::kNormalP1, + MemoryTag::kNormal, MemoryTag::kCold}; + constexpr int kTagSize = sizeof(kTagOptions) / sizeof(MemoryTag); + static_assert(kTagSize > 0); + MemoryTag tag = kTagOptions[static_cast(data[0]) % kTagSize]; + // Use kNormalP1 memory tag only if we have more than one NUMA partitions. + tag = (kNumaPartitions == 1 && tag == MemoryTag::kNormalP1) + ? MemoryTag::kNormalP0 + : tag; + + const HugeRegionUsageOption huge_region_option = + static_cast(data[1]) >= 128 + ? HugeRegionUsageOption::kDefault + : HugeRegionUsageOption::kUseForAllLargeAllocs; + + const HugePageFillerDenseTrackerType dense_tracker_type = + static_cast(data[5]) >= 128 + ? HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks + : HugePageFillerDenseTrackerType::kSpansAllocated; + + const int32_t huge_cache_release_s = std::max(data[2], 1); + + // data[6:12] - Reserve additional bytes for any features we might want to add + // in the future. + data += 13; + size -= 13; + + // HugePageAwareAllocator can't be destroyed cleanly, so we store a pointer + // to one and construct in place. + void* p = + malloc(sizeof(HugePageAwareAllocator)); + HugePageAwareAllocatorOptions options; + options.tag = tag; + options.use_huge_region_more_often = huge_region_option; + options.huge_cache_time = absl::Seconds(huge_cache_release_s); + options.dense_tracker_type = dense_tracker_type; + HugePageAwareAllocator* allocator; + allocator = + new (p) HugePageAwareAllocator(options); + auto& forwarder = allocator->forwarder(); + + struct SpanInfo { + Span* span; + size_t objects_per_span; + }; + std::vector allocs; + Length allocated; + PageReleaseStats expected_stats; + + std::vector> reentrant; + std::string output; + output.resize(1 << 20); + + auto run_dsl = [&](const char* data, size_t size) { + for (size_t i = 0; i + 9 <= size; i += 9) { + const uint16_t op = data[i]; + uint64_t value; + memcpy(&value, &data[i + 1], sizeof(value)); + + switch (op & 0x7) { + case 0: { + // Aligned allocate. We divide up our random value by: + // + // value[0:15] - We choose a Length to allocate. + // value[16:31] - We select num_to_objects, i.e. the number of + // objects to allocate. + // value[32:47] - Alignment. + // value[48] - Should we use aligned allocate? + // value[49] - Is the span sparsely- or densely-accessed? + // value[63:50] - Reserved. + Length length(std::clamp(value & 0xFFFF, 1, + kPagesPerHugePage.raw_num() - 1)); + size_t num_objects = std::max((value >> 16) & 0xFFFF, 1); + size_t object_size = length.in_bytes() / num_objects; + const bool use_aligned = ((value >> 48) & 0x1) == 0; + const Length align( + use_aligned ? std::clamp((value >> 32) & 0xFFFF, 1, + kPagesPerHugePage.raw_num() - 1) + : 1); + + AccessDensityPrediction density = + ((value >> 49) & 0x1) == 0 ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense; + if (object_size > kMaxSize || align > Length(1)) { + // Truncate to a single object. + num_objects = 1; + // TODO(b/283843066): Revisit this once we have fluid + // partitioning. + density = AccessDensityPrediction::kSparse; + } else if (!SizeMap::IsValidSizeClass(object_size, length.raw_num(), + kMinObjectsToMove)) { + // This is an invalid size class, so skip it. + break; + } + if (dense_tracker_type == + HugePageFillerDenseTrackerType::kSpansAllocated && + density == AccessDensityPrediction::kDense) { + length = Length(1); + } + + // Allocation is too big for filler if we try to allocate > + // kPagesPerHugePage / 2 run of pages. The allocations may go to + // HugeRegion and that might lead to donations with kSparse + // density. + if (length > kPagesPerHugePage / 2) { + density = AccessDensityPrediction::kSparse; + } + + Span* s; + SpanAllocInfo alloc_info = {.objects_per_span = num_objects, + .density = density}; + TC_CHECK( + dense_tracker_type == + HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks || + density == AccessDensityPrediction::kSparse || + length == Length(1)); + if (use_aligned) { + s = allocator->NewAligned(length, align, alloc_info); + } else { + s = allocator->New(length, alloc_info); + } + TC_CHECK_NE(s, nullptr); + TC_CHECK_GE(s->num_pages().raw_num(), length.raw_num()); + + allocs.push_back(SpanInfo{s, num_objects}); + + allocated += s->num_pages(); + break; + } + case 1: { + // Deallocate. We divide up our random value by: + // + // value - We choose index in allocs to deallocate a span. + + if (allocs.empty()) break; + + const size_t pos = value % allocs.size(); + std::swap(allocs[pos], allocs[allocs.size() - 1]); + + SpanInfo span_info = allocs[allocs.size() - 1]; + allocs.resize(allocs.size() - 1); + allocated -= span_info.span->num_pages(); + + { +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; + allocator->Delete(span_info.span); +#else + PageAllocatorInterface::AllocationState a{ + Range(span_info.span->first_page(), + span_info.span->num_pages()), + span_info.span->donated(), + }; + allocator->forwarder().DeleteSpan(span_info.span); + PageHeapSpinLockHolder l; + allocator->Delete(a); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + } + break; + } + case 2: { + // Release pages. We divide up our random value by: + // + // value[7:0] - Choose number of pages to release. + // value[8:9] - Choose page release reason. + // value[63:9] - Reserved. + Length desired(value & 0x00FF); + PageReleaseReason reason; + switch ((value >> 8) & 0x3) { + case 0: + reason = PageReleaseReason::kReleaseMemoryToSystem; + break; + case 1: + reason = PageReleaseReason::kProcessBackgroundActions; + break; + case 2: + reason = PageReleaseReason::kSoftLimitExceeded; + break; + case 3: + reason = PageReleaseReason::kHardLimitExceeded; + break; + } + Length released; + PageReleaseStats actual_stats; + { + PageHeapSpinLockHolder l; + released = allocator->ReleaseAtLeastNPages(desired, reason); + actual_stats = allocator->GetReleaseStats(); + } + expected_stats.total += released; + switch (reason) { + case PageReleaseReason::kReleaseMemoryToSystem: + expected_stats.release_memory_to_system += released; + break; + case PageReleaseReason::kProcessBackgroundActions: + expected_stats.process_background_actions += released; + break; + case PageReleaseReason::kSoftLimitExceeded: + expected_stats.soft_limit_exceeded += released; + break; + case PageReleaseReason::kHardLimitExceeded: + expected_stats.hard_limit_exceeded += released; + break; + } + TC_CHECK_EQ(actual_stats, expected_stats); + + break; + } + case 3: { + // Release pages by breaking hugepages. We divide up our random + // value by: + // + // value[15:0] - Choose number of pages to release. + // value[16] - Choose page release reason. SoftLimitExceeded if + // zero, HardLimitExceeded otherwise. + // value[63:17] - Reserved. + Length desired(value & 0xFFFF); + const PageReleaseReason reason = + ((value & (uint64_t{1} << 16)) == 0) + ? PageReleaseReason::kSoftLimitExceeded + : PageReleaseReason::kHardLimitExceeded; + Length released; + size_t releasable_bytes; + PageReleaseStats actual_stats; + { + PageHeapSpinLockHolder l; + releasable_bytes = allocator->FillerStats().free_bytes + + allocator->RegionsFreeBacked().in_bytes() + + allocator->CacheStats().free_bytes; + released = allocator->ReleaseAtLeastNPagesBreakingHugepages(desired, + reason); + actual_stats = allocator->GetReleaseStats(); + } + + if (forwarder.release_succeeds()) { + const size_t min_released = + std::min(desired.in_bytes(), releasable_bytes); + TC_CHECK_GE(released.in_bytes(), min_released); + } else { + // TODO(b/271282540): This is not strict equality due to + // HugePageFiller's unmapping_unaccounted_ state. Narrow this + // bound. + TC_CHECK_GE(released.in_bytes(), 0); + } + + expected_stats.total += released; + if (reason == PageReleaseReason::kSoftLimitExceeded) { + expected_stats.soft_limit_exceeded += released; + } else { + expected_stats.hard_limit_exceeded += released; + } + + TC_CHECK_EQ(actual_stats, expected_stats); + + break; + } + case 4: { + // Gather stats in pbtxt format. + // + // value is unused. + Printer p(&output[0], output.size()); + { + PbtxtRegion region(p, kTop); + allocator->PrintInPbtxt(region); + } + CHECK_LE(p.SpaceRequired(), output.size()); + break; + } + case 5: { + // Print stats. + // + // value[0]: Choose if we print everything. + // value[63:1]: Reserved. + Printer p(&output[0], output.size()); + bool everything = (value % 2 == 0); + allocator->Print(p, everything); + break; + } + case 6: { + // Gather and check stats. + // + // value is unused. + BackingStats stats; + { + PageHeapSpinLockHolder l; + stats = allocator->stats(); + } + uint64_t used_bytes = + stats.system_bytes - stats.free_bytes - stats.unmapped_bytes; + TC_CHECK_EQ(used_bytes, allocated.in_bytes() + + forwarder.pending_release_.in_bytes()); + break; + } + case 7: { + // Change a runtime parameter. + // + // value[0:3] - Select parameter + // value[4:7] - Reserved + // value[8:63] - The value + const uint64_t actual_value = value >> 8; + switch (value & 0xF) { + case 0: + forwarder.set_filler_skip_subrelease_interval( + absl::Nanoseconds(actual_value)); + forwarder.set_filler_skip_subrelease_short_interval( + absl::ZeroDuration()); + forwarder.set_filler_skip_subrelease_long_interval( + absl::ZeroDuration()); + break; + case 1: + forwarder.set_filler_skip_subrelease_interval( + absl::ZeroDuration()); + forwarder.set_filler_skip_subrelease_short_interval( + absl::Nanoseconds(actual_value)); + break; + case 2: + forwarder.set_filler_skip_subrelease_interval( + absl::ZeroDuration()); + forwarder.set_filler_skip_subrelease_long_interval( + absl::Nanoseconds(actual_value)); + break; + case 3: + forwarder.set_release_partial_alloc_pages(actual_value & 0x1); + break; + case 4: + forwarder.set_hpaa_subrelease(actual_value & 0x1); + break; + case 5: + forwarder.set_release_succeeds(actual_value & 0x1); + break; + case 6: + forwarder.set_huge_region_demand_based_release(actual_value & + 0x1); + break; + case 7: { + // Not quite a runtime parameter: Interpret actual_value as a + // subprogram in our dsl. + size_t subprogram = std::min(size - i - 9, actual_value); + reentrant.emplace_back(data + i + 9, subprogram); + i += size; + break; + } + case 8: { + // Flips the settings used by demand-based release in HugeCache: + // actual_value[0] - release enabled + // actual_value[1:16] - interval_1 + // actual_value[17:32] - interval_2 + forwarder.set_huge_cache_demand_based_release(actual_value & 0x1); + if (forwarder.huge_cache_demand_based_release()) { + const uint64_t interval_1 = (actual_value >> 1) & 0xffff; + const uint64_t interval_2 = (actual_value >> 17) & 0xffff; + forwarder.set_cache_demand_release_long_interval( + interval_1 >= interval_2 ? absl::Nanoseconds(interval_1) + : absl::Nanoseconds(interval_2)); + forwarder.set_cache_demand_release_short_interval( + interval_1 >= interval_2 ? absl::Nanoseconds(interval_2) + : absl::Nanoseconds(interval_1)); + } + break; + } + } + break; + } + } + } + }; + + forwarder.release_callback_ = [&]() { + if (tcmalloc::tcmalloc_internal::pageheap_lock.IsHeld()) { + // This permits a slight degree of nondeterminism when linked against + // TCMalloc for the real memory allocator, as a background thread could + // also be holding the lock. Nevertheless, HPAA doesn't make it clear + // when we are releasing with/without the pageheap_lock. + // + // TODO(b/73749855): When all release paths unconditionally release the + // lock, remove this check and take the lock for an instant to ensure it + // can be taken. + return; + } + + if (reentrant.empty()) { + return; + } + + ABSL_CONST_INIT static int depth = 0; + if (depth >= 5) { + return; + } + + auto [data, size] = reentrant.back(); + reentrant.pop_back(); + + depth++; + run_dsl(data, size); + depth--; + }; + + run_dsl(data, size); + + // Stop recursing, since allocator->Delete below might cause us to "release" + // more pages to the system. + reentrant.clear(); + + // Clean up. + const PageReleaseStats final_stats = [&] { + for (auto span_info : allocs) { + Span* span = span_info.span; + allocated -= span->num_pages(); +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; + allocator->Delete(span_info.span); +#else + PageAllocatorInterface::AllocationState a{ + Range(span_info.span->first_page(), span_info.span->num_pages()), + span_info.span->donated(), + }; + allocator->forwarder().DeleteSpan(span_info.span); + PageHeapSpinLockHolder l; + allocator->Delete(a); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + } + + PageHeapSpinLockHolder l; + return allocator->GetReleaseStats(); + }(); + + TC_CHECK_EQ(allocated.in_bytes(), 0); + TC_CHECK_EQ(final_stats, expected_stats); + + free(allocator); +} + +FUZZ_TEST(HugePageAwareAllocatorTest, FuzzHPAA) + ; + +TEST(HugePageAwareAllocatorTest, FuzzHPAARegression) { + FuzzHPAA( + "\370n,,,\3708\304\320\327\311[" + "PXG\"Y\037\216\366\366b\216\340\375\332\362"); +} + +TEST(HugePageAwareAllocatorTest, FuzzHPAARegression2) { + FuzzHPAA( + "\376\006\366>\354{{{{{\347\242\2048:\204\177{{" + "9\376d\027\224\312\257\276\252\026?\013\010\010"); +} + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc index 83ae930e44a6..247502d5b876 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc @@ -14,75 +14,130 @@ #include "tcmalloc/huge_page_aware_allocator.h" -#include +#include +#include #include #include #include -#include +#include #include +#include #include #include -#include +#include #include #include // NOLINT(build/c++11) #include #include +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/attributes.h" -#include "absl/base/const_init.h" #include "absl/base/internal/spinlock.h" -#include "absl/base/internal/sysinfo.h" +#include "absl/base/nullability.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" #include "absl/container/flat_hash_map.h" -#include "absl/flags/flag.h" +#include "absl/meta/type_traits.h" +#include "absl/random/bit_gen_ref.h" +#include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/barrier.h" -#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" #include "absl/time/time.h" -#include "benchmark/benchmark.h" #include "tcmalloc/common.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" #include "tcmalloc/malloc_extension.h" +#include "tcmalloc/mock_huge_page_static_forwarder.h" #include "tcmalloc/page_allocator_test_util.h" -#include "tcmalloc/parameters.h" +#include "tcmalloc/pages.h" #include "tcmalloc/span.h" -#include "tcmalloc/static_vars.h" #include "tcmalloc/stats.h" #include "tcmalloc/system-alloc.h" #include "tcmalloc/testing/thread_manager.h" -ABSL_FLAG(std::string, tracefile, "", "file to pull trace from"); -ABSL_FLAG(uint64_t, limit, 0, ""); -ABSL_FLAG(bool, always_check_usage, false, "enable expensive memory checks"); - namespace tcmalloc { namespace tcmalloc_internal { namespace { +using huge_page_allocator_internal::HugePageAwareAllocatorOptions; using testing::HasSubstr; -class HugePageAwareAllocatorTest : public ::testing::Test { +class HugePageAwareAllocatorTest + : public ::testing::TestWithParam { + class FakeStaticForwarderWithReleaseCheck + : public huge_page_allocator_internal::FakeStaticForwarder { + public: + [[nodiscard]] bool ReleasePages(Range r) { + const uintptr_t start = reinterpret_cast(r.p.start_addr()); + bool ret = + huge_page_allocator_internal::FakeStaticForwarder::ReleasePages(r); + // Try to acquire the lock. It is possible that we are holding + // pageheap_lock while calling ReleasePages, so it might result in a + // deadlock as RecordAllocation/RecordDeallocation may allocate. + // This makes the release check a best effort. + if (!lock_.TryLock()) return ret; + + // Remove from the list of allocations, if the address was previously + // allocated. + auto it = std::find(allocations_.begin(), allocations_.end(), start); + if (it != allocations_.end()) { + *it = allocations_.back(); + allocations_.pop_back(); + } + lock_.Unlock(); + return ret; + } + + void RecordAllocation(uintptr_t start_addr) { + absl::base_internal::SpinLockHolder h(&lock_); + allocations_.push_back(start_addr); + } + void RecordDeallocation(uintptr_t start_addr) { + absl::base_internal::SpinLockHolder h(&lock_); + // Make sure the address was previously allocated and wasn't removed from + // the list when it was released. + auto it = std::find(allocations_.begin(), allocations_.end(), start_addr); + TC_CHECK(it != allocations_.end()); + *it = allocations_.back(); + allocations_.pop_back(); + } + + private: + std::vector allocations_; + absl::base_internal::SpinLock lock_; + }; + using MockedHugePageAwareAllocator = + huge_page_allocator_internal::HugePageAwareAllocator< + FakeStaticForwarderWithReleaseCheck>; + protected: - HugePageAwareAllocatorTest() : rng_() { + HugePageAwareAllocatorTest() { before_ = MallocExtension::GetRegionFactory(); extra_ = new ExtraRegionFactory(before_); MallocExtension::SetRegionFactory(extra_); - // HugePageAwareAllocator can't be destroyed cleanly, so we store a pointer - // to one and construct in place. - void* p = malloc(sizeof(HugePageAwareAllocator)); - allocator_ = new (p) HugePageAwareAllocator(MemoryTag::kNormal); + // HugePageAwareAllocator can't be destroyed cleanly, so we store a + // pointer to one and construct in place. + void* p = malloc(sizeof(MockedHugePageAwareAllocator)); + HugePageAwareAllocatorOptions options; + options.tag = MemoryTag::kNormal; + // TODO(b/242550501): Parameterize other parts of the options. + options.use_huge_region_more_often = GetParam(); + allocator_ = new (p) MockedHugePageAwareAllocator(options); } ~HugePageAwareAllocatorTest() override { - CHECK_CONDITION(ids_.empty()); - CHECK_CONDITION(total_ == Length(0)); + TC_CHECK(ids_.empty()); + TC_CHECK_EQ(total_, Length(0)); // We end up leaking both the backing allocations and the metadata. // The backing allocations are unmapped--it's silly, but not // costing us muchin a 64-bit address space. @@ -90,12 +145,9 @@ class HugePageAwareAllocatorTest : public ::testing::Test { // It'd be very complicated to rebuild the allocator to support // teardown, so we just put up with it. { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + PageHeapSpinLockHolder l; auto stats = allocator_->stats(); - if (stats.free_bytes + stats.unmapped_bytes != stats.system_bytes) { - Crash(kCrash, __FILE__, __LINE__, stats.free_bytes, - stats.unmapped_bytes, "!=", stats.system_bytes); - } + TC_CHECK_EQ(stats.free_bytes + stats.unmapped_bytes, stats.system_bytes); } free(allocator_); @@ -108,7 +160,7 @@ class HugePageAwareAllocatorTest : public ::testing::Test { size_t actual_used_bytes = total_.in_bytes(); BackingStats stats; { - absl::base_internal::SpinLockHolder h2(&pageheap_lock); + PageHeapSpinLockHolder l; stats = allocator_->stats(); } uint64_t used_bytes = @@ -119,62 +171,101 @@ class HugePageAwareAllocatorTest : public ::testing::Test { uint64_t GetFreeBytes() { BackingStats stats; { - absl::base_internal::SpinLockHolder h2(&pageheap_lock); + PageHeapSpinLockHolder l; stats = allocator_->stats(); } return stats.free_bytes; } - Span* AllocatorNew(Length n) { return allocator_->New(n); } + Span* AllocatorNew(Length n, SpanAllocInfo span_alloc_info) { + Span* s = allocator_->New(n, span_alloc_info); + uintptr_t start = reinterpret_cast(s->start_address()); + allocator_->forwarder().RecordAllocation( + reinterpret_cast(start)); + return s; + } - void AllocatorDelete(Span* s) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + void AllocatorDelete(Span* s, size_t objects_per_span) { +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; allocator_->Delete(s); +#else + uintptr_t start = reinterpret_cast(s->start_address()); + allocator_->forwarder().RecordDeallocation(start); + PageAllocatorInterface::AllocationState a{ + Range(s->first_page(), s->num_pages()), + s->donated(), + }; + allocator_->forwarder().DeleteSpan(s); + PageHeapSpinLockHolder l; + allocator_->Delete(a); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING } - Span* New(Length n) { + Span* New(Length n, SpanAllocInfo span_alloc_info) { absl::base_internal::SpinLockHolder h(&lock_); - Span* span = AllocatorNew(n); - CHECK_CONDITION(span != nullptr); + Span* span = AllocatorNew(n, span_alloc_info); + TC_CHECK_NE(span, nullptr); EXPECT_GE(span->num_pages(), n); const size_t id = next_id_++; total_ += n; CheckStats(); // and distinct spans... - CHECK_CONDITION(ids_.insert({span, id}).second); + TC_CHECK(ids_.insert({span, id}).second); return span; } - void Delete(Span* span) { + void Delete(Span* span, size_t objects_per_span) { Length n = span->num_pages(); { absl::base_internal::SpinLockHolder h(&lock_); auto i = ids_.find(span); - CHECK_CONDITION(i != ids_.end()); + TC_CHECK(i != ids_.end()); const size_t id = i->second; ids_.erase(i); - AllocatorDelete(span); + AllocatorDelete(span, objects_per_span); total_ -= n; CheckStats(); } } // Mostly small things, some large ones. - Length RandomAllocSize() { - // TODO(b/128521238): scalable RNG - absl::base_internal::SpinLockHolder h(&lock_); - if (absl::Bernoulli(rng_, 1.0 / 1000)) { - Length n = - Length(1024) * (1 + absl::LogUniform(rng_, 0, (1 << 8) - 1)); - n += Length(absl::Uniform(rng_, 0, 1024)); - return n; + std::pair RandomAllocSize(absl::BitGenRef rng) { + Length n; + if (absl::Bernoulli(rng, 1.0 / 1000)) { + n = Length(1024) * (1 + absl::LogUniform(rng, 0, (1 << 8) - 1)); + n += Length(absl::Uniform(rng, 0, 1024)); + } else { + n = Length(1 + absl::LogUniform(rng, 0, (1 << 9) - 1)); } - return Length(1 + absl::LogUniform(rng_, 0, (1 << 9) - 1)); + // The condition used here ensures that if the allocated hugepage is donated + // to the HugePageFiller, then it is expected to be short lived. + size_t objects = (n <= kPagesPerHugePage / 2) + ? absl::Uniform(rng, 1, 256) + : absl::Uniform(rng, 1, 16); + + AccessDensityPrediction density = + (n <= kPagesPerHugePage / 2) + ? (absl::Bernoulli(rng, 1.0) ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense) + : AccessDensityPrediction::kSparse; + return {n, {objects, density}}; + } + + Length ReleasePages(Length k, PageReleaseReason reason) { + PageHeapSpinLockHolder l; + return allocator_->ReleaseAtLeastNPages(k, reason); + } + + Length ReleaseAtLeastNPagesBreakingHugepages(Length n, + PageReleaseReason reason) { + PageHeapSpinLockHolder l; + return allocator_->ReleaseAtLeastNPagesBreakingHugepages(n, reason); } - Length ReleasePages(Length k) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - return allocator_->ReleaseAtLeastNPages(k); + bool UseHugeRegionMoreOften() { + PageHeapSpinLockHolder l; + return allocator_->region().UseHugeRegionMoreOften(); } std::string Print() { @@ -182,63 +273,73 @@ class HugePageAwareAllocatorTest : public ::testing::Test { const size_t kSize = 1 << 20; ret.resize(kSize); Printer p(&ret[0], kSize); - allocator_->Print(&p); + allocator_->Print(p); ret.erase(p.SpaceRequired()); return ret; } - std::string PrintInPbTxt() { + std::string PrintInPbtxt() { std::string ret; const size_t kSize = 1 << 20; ret.resize(kSize); Printer p(&ret[0], kSize); { - PbtxtRegion region(&p, kNested, 0); - allocator_->PrintInPbtxt(®ion); + PbtxtRegion region(p, kNested); + allocator_->PrintInPbtxt(region); } ret.erase(p.SpaceRequired()); return ret; } - HugePageAwareAllocator* allocator_; + // TODO(b/242550501): Replace this with one templated with a different + // forwarder, as to facilitate mocks. + MockedHugePageAwareAllocator* allocator_; ExtraRegionFactory* extra_; AddressRegionFactory* before_; - absl::BitGen rng_; absl::base_internal::SpinLock lock_; absl::flat_hash_map ids_; size_t next_id_{0}; Length total_; }; -TEST_F(HugePageAwareAllocatorTest, Fuzz) { - std::vector allocs; +struct SpanInfo { + Span* span; + SpanAllocInfo span_alloc_info; +}; + +TEST_P(HugePageAwareAllocatorTest, Fuzz) { + absl::BitGen rng; + std::vector allocs; for (int i = 0; i < 5000; ++i) { - Length n = RandomAllocSize(); - allocs.push_back(New(n)); + auto [n, span_alloc_info] = RandomAllocSize(rng); + Span* s = New(n, span_alloc_info); + allocs.push_back(SpanInfo{s, span_alloc_info}); } static const size_t kReps = 50 * 1000; for (int i = 0; i < kReps; ++i) { SCOPED_TRACE(absl::StrFormat("%d reps, %d pages", i, total_.raw_num())); - size_t index = absl::Uniform(rng_, 0, allocs.size()); - Span* old = allocs[index]; - Delete(old); - Length n = RandomAllocSize(); - allocs[index] = New(n); + size_t index = absl::Uniform(rng, 0, allocs.size()); + Span* old_span = allocs[index].span; + size_t objects_per_span = allocs[index].span_alloc_info.objects_per_span; + Delete(old_span, objects_per_span); + auto [n, span_alloc_info] = RandomAllocSize(rng); + allocs[index] = SpanInfo{New(n, span_alloc_info), span_alloc_info}; } for (auto s : allocs) { - Delete(s); + Delete(s.span, s.span_alloc_info.objects_per_span); } } // Prevent regression of the fragmentation problem that was reported in // b/63301358, reproduced in CL/161345659 and (partially) fixed in CL/161305971. -TEST_F(HugePageAwareAllocatorTest, JustUnderMultipleOfHugepages) { +TEST_P(HugePageAwareAllocatorTest, JustUnderMultipleOfHugepages) { + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; std::vector big_allocs, small_allocs; // Trigger creation of a hugepage with more than one allocation and plenty of // free space. - small_allocs.push_back(New(Length(1))); - small_allocs.push_back(New(Length(10))); + small_allocs.push_back(New(Length(1), kSpanInfo)); + small_allocs.push_back(New(Length(10), kSpanInfo)); // Limit iterations so that the huge page with the small allocs doesn't fill // up. size_t n_iter = (kPagesPerHugePage - Length(2)).raw_num(); @@ -246,22 +347,22 @@ TEST_F(HugePageAwareAllocatorTest, JustUnderMultipleOfHugepages) { n_iter = std::min((1 << 30) / (2 * kHugePageSize), n_iter); for (int i = 0; i < n_iter; ++i) { Length n = 2 * kPagesPerHugePage - Length(1); - big_allocs.push_back(New(n)); - small_allocs.push_back(New(Length(1))); + big_allocs.push_back(New(n, kSpanInfo)); + small_allocs.push_back(New(Length(1), kSpanInfo)); } for (auto* span : big_allocs) { - Delete(span); + Delete(span, kSpanInfo.objects_per_span); } // We should have one hugepage that's full of small allocations and a bunch // of empty hugepages. The HugeCache will keep some of the empty hugepages // backed so free space should drop to a small multiple of the huge page size. EXPECT_LE(GetFreeBytes(), 20 * kHugePageSize); for (auto* span : small_allocs) { - Delete(span); + Delete(span, kSpanInfo.objects_per_span); } } -TEST_F(HugePageAwareAllocatorTest, Multithreaded) { +TEST_P(HugePageAwareAllocatorTest, Multithreaded) { static const size_t kThreads = 16; std::vector threads; threads.reserve(kThreads); @@ -270,22 +371,23 @@ TEST_F(HugePageAwareAllocatorTest, Multithreaded) { for (int i = 0; i < kThreads; ++i) { threads.push_back(std::thread([this, &b1, &b2]() { absl::BitGen rng; - std::vector allocs; + std::vector allocs; for (int i = 0; i < 150; ++i) { - Length n = RandomAllocSize(); - allocs.push_back(New(n)); + auto [n, span_alloc_info] = RandomAllocSize(rng); + allocs.push_back(SpanInfo{New(n, span_alloc_info), span_alloc_info}); } b1.Block(); static const size_t kReps = 4 * 1000; for (int i = 0; i < kReps; ++i) { size_t index = absl::Uniform(rng, 0, allocs.size()); - Delete(allocs[index]); - Length n = RandomAllocSize(); - allocs[index] = New(n); + Delete(allocs[index].span, + allocs[index].span_alloc_info.objects_per_span); + auto [n, span_alloc_info] = RandomAllocSize(rng); + allocs[index] = SpanInfo{New(n, span_alloc_info), span_alloc_info}; } b2.Block(); for (auto s : allocs) { - Delete(s); + Delete(s.span, s.span_alloc_info.objects_per_span); } })); } @@ -295,137 +397,725 @@ TEST_F(HugePageAwareAllocatorTest, Multithreaded) { } } -TEST_F(HugePageAwareAllocatorTest, ReleasingLarge) { - // Ensure the HugeCache has some free items: - Delete(New(kPagesPerHugePage)); - ASSERT_LE(kPagesPerHugePage, ReleasePages(kPagesPerHugePage)); +TEST_P(HugePageAwareAllocatorTest, ReleasingLargeForUserRequestedRelease) { + // Tests that we can release when requested by the user, irrespective of + // whether the demand-based release is enabled or not. We do this by + // alternating the state of the demand-based release flag. + bool enabled = allocator_->forwarder().huge_cache_demand_based_release(); + constexpr int kNumIterations = 100; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + for (int i = 0; i < kNumIterations; ++i) { + // Ensure the HugeCache has some free items: + Delete(New(kPagesPerHugePage, kSpanInfo), kSpanInfo.objects_per_span); + EXPECT_EQ( + ReleasePages(kPagesPerHugePage, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + kPagesPerHugePage); + enabled = !enabled; + allocator_->forwarder().set_huge_cache_demand_based_release(enabled); + } +} + +TEST_P(HugePageAwareAllocatorTest, ReleasingLargeForBackgroundActions) { + // Tests that the background release will be impacted by the demand-based + // release: when enabled, it will not release any pages due to the recent + // demand. + bool enabled = allocator_->forwarder().huge_cache_demand_based_release(); + constexpr int kNumIterations = 100; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + for (int i = 0; i < kNumIterations; ++i) { + Delete(New(kPagesPerHugePage, kSpanInfo), kSpanInfo.objects_per_span); + // Demand-based release would think releasing is not a good idea, hence we + // need to force a release later. + EXPECT_EQ( + ReleasePages(kPagesPerHugePage, + /*reason=*/PageReleaseReason::kProcessBackgroundActions), + enabled ? Length(0) : kPagesPerHugePage); + if (enabled) { + EXPECT_EQ(ReleasePages(Length(1), + /*reason=*/PageReleaseReason::kSoftLimitExceeded), + kPagesPerHugePage); + } + enabled = !enabled; + allocator_->forwarder().set_huge_cache_demand_based_release(enabled); + } +} + +TEST_P(HugePageAwareAllocatorTest, ReleasingMemoryLimitHit) { + // Tests that we can release when the memory limit is hit, irrespective of + // whether the demand-based release is enabled or not. We test this by + // alternating the state of the demand-based release flag. + bool enabled = allocator_->forwarder().huge_cache_demand_based_release(); + constexpr int kNumIterations = 100; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + for (int i = 0; i < kNumIterations; ++i) { + Delete(New(kPagesPerHugePage, kSpanInfo), kSpanInfo.objects_per_span); + EXPECT_EQ(ReleaseAtLeastNPagesBreakingHugepages( + kPagesPerHugePage, + /*reason=*/PageReleaseReason::kSoftLimitExceeded), + kPagesPerHugePage); + enabled = !enabled; + allocator_->forwarder().set_huge_cache_demand_based_release(enabled); + } +} + +TEST_P(HugePageAwareAllocatorTest, + ReleasingLargeForBackgroundActionsWithZeroIntervals) { + // Tests that the configured intervals can be passed to HugeCache: release is + // not being impacted by demand-based release when the intervals are zero. + const bool old_enabled = + allocator_->forwarder().huge_cache_demand_based_release(); + allocator_->forwarder().set_huge_cache_demand_based_release(/*value=*/true); + const absl::Duration old_cache_short_interval = + allocator_->forwarder().cache_demand_release_short_interval(); + const absl::Duration old_cache_long_interval = + allocator_->forwarder().cache_demand_release_long_interval(); + allocator_->forwarder().set_cache_demand_release_short_interval( + absl::ZeroDuration()); + allocator_->forwarder().set_cache_demand_release_long_interval( + absl::ZeroDuration()); + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + Delete(New(kPagesPerHugePage, kSpanInfo), kSpanInfo.objects_per_span); + // There is no history to reference so release all. + EXPECT_EQ( + ReleasePages(kPagesPerHugePage, + /*reason=*/PageReleaseReason::kProcessBackgroundActions), + kPagesPerHugePage); + allocator_->forwarder().set_huge_cache_demand_based_release(old_enabled); + allocator_->forwarder().set_cache_demand_release_short_interval( + old_cache_short_interval); + allocator_->forwarder().set_cache_demand_release_long_interval( + old_cache_long_interval); } -TEST_F(HugePageAwareAllocatorTest, ReleasingSmall) { - const bool old_subrelease = Parameters::hpaa_subrelease(); - Parameters::set_hpaa_subrelease(true); +TEST_P(HugePageAwareAllocatorTest, ReleasingSmall) { + const bool old_subrelease = allocator_->forwarder().hpaa_subrelease(); + allocator_->forwarder().set_hpaa_subrelease(/*value=*/true); - const absl::Duration old_skip_subrelease = - Parameters::filler_skip_subrelease_interval(); - Parameters::set_filler_skip_subrelease_interval(absl::ZeroDuration()); + absl::Duration old_skip_subrelease_short_interval = + allocator_->forwarder().filler_skip_subrelease_short_interval(); + allocator_->forwarder().set_filler_skip_subrelease_short_interval( + absl::ZeroDuration()); + + absl::Duration old_skip_subrelease_long_interval = + allocator_->forwarder().filler_skip_subrelease_long_interval(); + allocator_->forwarder().set_filler_skip_subrelease_long_interval( + absl::ZeroDuration()); std::vector live, dead; static const size_t N = kPagesPerHugePage.raw_num() * 128; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; for (int i = 0; i < N; ++i) { - Span* span = New(Length(1)); + Span* span = New(Length(1), kSpanInfo); ((i % 2 == 0) ? live : dead).push_back(span); } for (auto d : dead) { - Delete(d); + Delete(d, kSpanInfo.objects_per_span); } - EXPECT_EQ(kPagesPerHugePage / 2, ReleasePages(Length(1))); + EXPECT_EQ(kPagesPerHugePage / 2, + ReleasePages(Length(1), + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem)); for (auto l : live) { - Delete(l); + Delete(l, kSpanInfo.objects_per_span); + } + + allocator_->forwarder().set_hpaa_subrelease(old_subrelease); + allocator_->forwarder().set_filler_skip_subrelease_short_interval( + old_skip_subrelease_short_interval); + allocator_->forwarder().set_filler_skip_subrelease_long_interval( + old_skip_subrelease_long_interval); +} + +TEST_P(HugePageAwareAllocatorTest, HardReleaseSmall) { + std::vector live, dead; + static const size_t N = kPagesPerHugePage.raw_num() * 128; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + for (int i = 0; i < N; ++i) { + Span* span = New(Length(1), kSpanInfo); + ((i % 2 == 0) ? live : dead).push_back(span); + } + + for (auto d : dead) { + Delete(d, kSpanInfo.objects_per_span); + } + + // Subrelease shouldn't release any pages by itself, but hard release using + // ReleaseAtLeastNPagesBreakingHugepages should release all the free pages. + EXPECT_EQ(ReleasePages(Length(1), + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + EXPECT_EQ(ReleaseAtLeastNPagesBreakingHugepages( + Length(1), /*reason=*/PageReleaseReason::kSoftLimitExceeded), + kPagesPerHugePage / 2); + + for (auto l : live) { + Delete(l, kSpanInfo.objects_per_span); + } +} + +TEST_P(HugePageAwareAllocatorTest, UseHugeRegion) { + // This test verifies that we use HugeRegion for large allocations as soon as + // the abandoned pages exceed 64MB, when we use abandoned count in addition to + // slack for determining when to use region. If we use slack for computation, + // this test should not trigger use of HugeRegion. + static constexpr Length kSlack = kPagesPerHugePage / 2 - Length(2); + static constexpr Length kSmallSize = kSlack; + static constexpr Length kLargeSize = kPagesPerHugePage - kSlack; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + + Length slack; + Length small_pages; + HugeLength donated_huge_pages; + Length abandoned_pages; + size_t active_regions; + BackingStats region_stats; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; + slack = allocator_->info().slack(); + small_pages = allocator_->info().small(); + donated_huge_pages = allocator_->DonatedHugePages(); + abandoned_pages = allocator_->AbandonedPages(); + active_regions = allocator_->region().ActiveRegions(); + region_stats = allocator_->region().stats(); + }; + + std::vector small_spans; + std::vector large_spans; + const Length small_binary_size = HLFromBytes(64 * 1024 * 1024).in_pages(); + Length expected_abandoned; + Length expected_slack; + int huge_pages = 0; + + // We first allocate large objects such that expected abandoned pages (once we + // deallocate those large objects) exceed the 64MB threshold. We place small + // allocations on the donated pages so that the hugepages aren't released. + while (true) { + Span* large = New(kLargeSize, kSpanInfo); + Span* small = New(kSmallSize, kSpanInfo); + large_spans.emplace_back(large); + small_spans.emplace_back(small); + ++huge_pages; + expected_abandoned += kLargeSize; + expected_slack += kSlack; + + RefreshStats(); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(huge_pages)); + EXPECT_EQ(slack, expected_slack); + EXPECT_EQ(active_regions, 0); + if (expected_abandoned >= small_binary_size) break; + } + + // Reset the abandoned count and start releasing huge allocations. We should + // start accumulating abandoned pages in filler. As we don't expect to trigger + // HugeRegion yet, the number of active regions should be zero throughout. + expected_abandoned = Length(0); + for (auto l : large_spans) { + Delete(l, kSpanInfo.objects_per_span); + expected_abandoned += kLargeSize; + expected_slack -= kSlack; + RefreshStats(); + EXPECT_EQ(abandoned_pages, expected_abandoned); + EXPECT_EQ(donated_huge_pages, NHugePages(huge_pages)); + EXPECT_EQ(slack, expected_slack); + EXPECT_EQ(active_regions, 0); + } + large_spans.clear(); + + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_GE(abandoned_pages, small_binary_size); + + // At this point, we have exhausted the 64MB slack for the donated pages to + // the filler. A large allocation should trigger allocation from a huge + // region if we are using huge regions more often. If we are using slack for + // determining when to use region, we should allocate from filler and the + // number of donated pages should continue to grow. + // + // We allocate a slightly larger object than before (kLargeSize + Length(1)) + // to make sure that filler doesn't try to pack it on the pages we released + // due to deallocations in the previous step. + static constexpr Length kSmallSize2 = kSmallSize - Length(1); + static constexpr Length kLargeSize2 = kLargeSize + Length(1); + + for (int i = 0; i < 100; ++i) { + Span* large = New(kLargeSize2, kSpanInfo); + Span* small = New(kSmallSize2, kSpanInfo); + large_spans.emplace_back(large); + small_spans.emplace_back(small); + RefreshStats(); + if (UseHugeRegionMoreOften()) { + EXPECT_EQ(abandoned_pages, expected_abandoned); + EXPECT_EQ(donated_huge_pages, NHugePages(huge_pages)); + EXPECT_EQ(active_regions, 1); + } else { + ASSERT_LT(slack, small_pages); + ++huge_pages; + EXPECT_EQ(abandoned_pages, expected_abandoned); + EXPECT_EQ(donated_huge_pages, NHugePages(huge_pages)); + EXPECT_EQ(active_regions, 0); + } + } + // Check stats to confirm that pages have been allocated from huge regions. + RefreshStats(); + size_t unmapped_bytes = region_stats.unmapped_bytes; + if (UseHugeRegionMoreOften()) { + EXPECT_GT(unmapped_bytes, 0); } - Parameters::set_hpaa_subrelease(old_subrelease); - Parameters::set_filler_skip_subrelease_interval(old_skip_subrelease); + // Deallocate large spans and make sure that HugeRegion does not unback that + // memory. This is because we do not unback objects during deallocation when a + // configuration to use huge region often is enabled. + for (auto l : large_spans) { + Delete(l, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(region_stats.unmapped_bytes, unmapped_bytes); + } + + size_t backed_bytes = region_stats.system_bytes - region_stats.unmapped_bytes; + + // Release pages and make sure we release a few free-but-backed pages from + // huge region. As we release pages from HugeRegion gradually, first make sure + // that we do not release all the free pages. + if (UseHugeRegionMoreOften()) { + Length released; + { + PageHeapSpinLockHolder l; + released = allocator_->ReleaseAtLeastNPages( + Length(1), /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); + } + EXPECT_GT(released.in_bytes(), 0); + EXPECT_LT(released.in_bytes(), backed_bytes); + RefreshStats(); + backed_bytes = region_stats.system_bytes - region_stats.unmapped_bytes; + + { + PageHeapSpinLockHolder l; + released = allocator_->ReleaseAtLeastNPages( + Length(1), /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); + } + EXPECT_GT(released.in_bytes(), 0); + RefreshStats(); + backed_bytes = region_stats.system_bytes - region_stats.unmapped_bytes; + + Length backed_in_pages = LengthFromBytes(backed_bytes); + { + PageHeapSpinLockHolder l; + released = allocator_->ReleaseAtLeastNPagesBreakingHugepages( + backed_in_pages, /*reason=*/PageReleaseReason::kSoftLimitExceeded); + } + EXPECT_EQ(released, backed_in_pages); + RefreshStats(); + backed_bytes = region_stats.system_bytes - region_stats.unmapped_bytes; + EXPECT_EQ(backed_bytes, 0); + } + + for (auto s : small_spans) { + Delete(s, kSpanInfo.objects_per_span); + } } -TEST_F(HugePageAwareAllocatorTest, DonatedHugePages) { +TEST_P(HugePageAwareAllocatorTest, DonatedHugePages) { // This test verifies that we accurately measure the amount of RAM that we // donate to the huge page filler when making large allocations, including // those kept alive after we deallocate. static constexpr Length kSlack = Length(2); static constexpr Length kLargeSize = 2 * kPagesPerHugePage - kSlack; static constexpr Length kSmallSize = Length(1); + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; - Span* large1 = New(kLargeSize); + Span* large1 = New(kLargeSize, kSpanInfo); Length slack; HugeLength donated_huge_pages; - { - absl::base_internal::SpinLockHolder l(&pageheap_lock); + Length abandoned_pages; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; slack = allocator_->info().slack(); donated_huge_pages = allocator_->DonatedHugePages(); - } + abandoned_pages = allocator_->AbandonedPages(); + }; + RefreshStats(); + EXPECT_EQ(slack, kSlack); EXPECT_EQ(donated_huge_pages, NHugePages(1)); - + EXPECT_EQ(abandoned_pages, Length(0)); EXPECT_THAT(Print(), HasSubstr("filler donations 1")); - EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1")); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_donated_huge_pages: 1")); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_abandoned_pages: 0")); // Make a small allocation and then free the large allocation. Slack should // fall, but we've kept alive our donation to the filler. - Span* small = New(kSmallSize); - Delete(large1); - { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - slack = allocator_->info().slack(); - donated_huge_pages = allocator_->DonatedHugePages(); - } + Span* small = New(kSmallSize, kSpanInfo); + Delete(large1, kSpanInfo.objects_per_span); + + RefreshStats(); + EXPECT_EQ(slack, Length(0)); EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, kPagesPerHugePage - kSlack); - EXPECT_THAT(Print(), HasSubstr("filler donations 1")); - EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1")); + EXPECT_THAT(Print(), HasSubstr(absl::StrCat("filler donations 1"))); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_donated_huge_pages: 1")); + EXPECT_THAT(PrintInPbtxt(), + HasSubstr(absl::StrCat("filler_abandoned_pages: ", + (kPagesPerHugePage - kSlack).raw_num()))); // Make another large allocation. The number of donated huge pages should // continue to increase. - Span* large2 = New(kLargeSize); - { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - slack = allocator_->info().slack(); - donated_huge_pages = allocator_->DonatedHugePages(); - } + Span* large2 = New(kLargeSize, kSpanInfo); + + RefreshStats(); + EXPECT_EQ(slack, kSlack); EXPECT_EQ(donated_huge_pages, NHugePages(2)); + EXPECT_EQ(abandoned_pages, kPagesPerHugePage - kSlack); - EXPECT_THAT(Print(), HasSubstr("filler donations 2")); - EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2")); + EXPECT_THAT(Print(), HasSubstr(absl::StrCat("filler donations 2"))); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_donated_huge_pages: 2")); + EXPECT_THAT(PrintInPbtxt(), + HasSubstr(absl::StrCat("filler_abandoned_pages: ", + (kPagesPerHugePage - kSlack).raw_num()))); + + // Deallocating the small allocation finally reduces the reduce the number of + // donations, as we were able reassemble the huge page for large1. + Delete(small, kSpanInfo.objects_per_span); + + RefreshStats(); - // Deallocating the small allocation does not reduce the number of donations, - // as we were unable to reassemble the VSS for large1. - Delete(small); - { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - slack = allocator_->info().slack(); - donated_huge_pages = allocator_->DonatedHugePages(); - } EXPECT_EQ(slack, kSlack); - EXPECT_EQ(donated_huge_pages, NHugePages(2)); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); - EXPECT_THAT(Print(), HasSubstr("filler donations 2")); - EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2")); + EXPECT_THAT(Print(), HasSubstr(absl::StrCat("filler donations 1"))); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_donated_huge_pages: 1")); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_abandoned_pages: 0")); // Deallocating everything should return slack to 0 and allow large2's // contiguous VSS to be reassembled. - Delete(large2); - { - absl::base_internal::SpinLockHolder l(&pageheap_lock); + Delete(large2, kSpanInfo.objects_per_span); + + RefreshStats(); + + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); + + EXPECT_THAT(Print(), HasSubstr(absl::StrCat("filler donations 0"))); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_donated_huge_pages: 0")); + EXPECT_THAT(PrintInPbtxt(), HasSubstr("filler_abandoned_pages: 0")); +} + +TEST_P(HugePageAwareAllocatorTest, SmallDonations) { + // This test works with small donations (kHugePageSize/2,kHugePageSize]-bytes + // in size to check statistics. + static constexpr Length kSlack = Length(2); + static constexpr Length kLargeSize = kPagesPerHugePage - kSlack; + static constexpr Length kSmallSize = Length(1); + static constexpr Length kSmallSize2 = kSlack; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + + Span* large1 = New(kLargeSize, kSpanInfo); + Span* large2 = New(kLargeSize, kSpanInfo); + + Length slack; + HugeLength donated_huge_pages; + Length abandoned_pages; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; slack = allocator_->info().slack(); donated_huge_pages = allocator_->DonatedHugePages(); + abandoned_pages = allocator_->AbandonedPages(); + }; + RefreshStats(); + + EXPECT_EQ(slack, 2 * kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(2)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_TRUE(large1->donated()); + EXPECT_TRUE(large2->donated()); + // HugePageAwareAllocatorTest.DonatedHugePages verifies Print works correctly + // for these stats. + + // Create two small allocations. They will be placed on different huge pages + // since kSmallSize+kSmallSize2 > kSlack for any single huge page. + Span* small1 = New(kSmallSize, kSpanInfo); + Span* small2 = New(kSmallSize2, kSpanInfo); + + RefreshStats(); + EXPECT_EQ(slack, 2 * kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(2)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_FALSE(small1->donated()); + EXPECT_FALSE(small2->donated()); + + // To simplify the rest of the test, swap small1/small2 as required such that + // small1 is on the same huge page as large1, etc. This allows us to release + // 2 allocations from the same huge page. + if (HugePageContaining(large1->first_page()) != + HugePageContaining(small1->first_page())) { + std::swap(small1, small2); } + EXPECT_EQ(HugePageContaining(large1->first_page()), + HugePageContaining(small1->first_page())); + EXPECT_EQ(HugePageContaining(large2->first_page()), + HugePageContaining(small2->first_page())); + + // Release both allocations from one huge page. Donations should tick down + // and no pages should be considered abandoned. + Delete(large1, kSpanInfo.objects_per_span); + Delete(small1, kSpanInfo.objects_per_span); + + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // Delete the large allocation on the second huge page. Abandoned should tick + // up. + Delete(large2, kSpanInfo.objects_per_span); + + RefreshStats(); EXPECT_EQ(slack, Length(0)); EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, kLargeSize); - EXPECT_THAT(Print(), HasSubstr("filler donations 1")); - EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1")); + // Reuse large2 and then deallocate it. Our abandoned count stats should not + // be double counted. + large2 = New(kLargeSize, kSpanInfo); + + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, kLargeSize); + + Delete(large2, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, kLargeSize); + + // Cleanup + Delete(small2, kSpanInfo.objects_per_span); + + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); } -TEST_F(HugePageAwareAllocatorTest, PageMapInterference) { +TEST_P(HugePageAwareAllocatorTest, LargeDonations) { + // A small allocation of size (kHugePageSize/2,kHugePageSize]-bytes can be + // considered not donated if it filled in a gap on an otherwise mostly free + // huge page that came from a donation. + static constexpr Length kSmallSize = kPagesPerHugePage - Length(1); + static constexpr Length kLargeSize = kPagesPerHugePage + Length(1); + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + + // large1 donates kSmallSize bytes to the filler. + Span* large = New(kLargeSize, kSpanInfo); + Length slack; + HugeLength donated_huge_pages; + Length abandoned_pages; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; + slack = allocator_->info().slack(); + donated_huge_pages = allocator_->DonatedHugePages(); + abandoned_pages = allocator_->AbandonedPages(); + }; + RefreshStats(); + + EXPECT_EQ(slack, kSmallSize); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_TRUE(large->donated()); + // HugePageAwareAllocatorTest.DonatedHugePages verifies Print works correctly + // for these stats. + + Span* small = New(kSmallSize, kSpanInfo); + RefreshStats(); + + // TODO(b/199203282): Current slack computation is unaware that this + // allocation is on a donated page. It assumes that kSmallSize allocation + // would also result in a slack. We would eliminate this once abandoned count + // subsumes slack computation. + EXPECT_EQ(slack, kSmallSize + Length(1)); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_FALSE(small->donated()); + + // small is on a donated hugepage. None of the stats should change when it is + // deallocated. + Delete(small, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, kSmallSize); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // Cleanup. Deallocate large. + Delete(large, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); +} + +TEST_P(HugePageAwareAllocatorTest, TailDonation) { + // This test makes sure that we account for tail donations alone in the + // abandoned pages. + static constexpr Length kSmallSize = Length(1); + static constexpr Length kSlack = kPagesPerHugePage - Length(1); + static constexpr Length kLargeSize = 2 * kPagesPerHugePage - kSlack; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + + // large donates kSlack to the filler. + Span* large = New(kLargeSize, kSpanInfo); + Length slack; + HugeLength donated_huge_pages; + Length abandoned_pages; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; + slack = allocator_->info().slack(); + donated_huge_pages = allocator_->DonatedHugePages(); + abandoned_pages = allocator_->AbandonedPages(); + }; + RefreshStats(); + + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_TRUE(large->donated()); + + // We should allocate small on the donated page. + Span* small = New(kSmallSize, kSpanInfo); + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_FALSE(small->donated()); + + // When we deallocate large, abandoned count should only account for the + // abandoned pages from the tail huge page. + Delete(large, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(1)); + + // small is on a donated hugepage. Cleanup. + Delete(small, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // large donates kSlack to the filler. + large = New(kLargeSize, kSpanInfo); + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_TRUE(large->donated()); + + // We should allocate small on the donated page. + small = New(kSmallSize, kSpanInfo); + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // If we delete small first, abandoned_pages should not tick up. + Delete(small, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, kSlack); + EXPECT_EQ(donated_huge_pages, NHugePages(1)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // Deallocating large. Cleanup. All stats should reset to zero. + Delete(large, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); +} + +TEST_P(HugePageAwareAllocatorTest, NotDonated) { + // A small allocation of size (kHugePageSize/2,kHugePageSize]-bytes can be + // considered not donated if it filled in a gap on an otherwise mostly free + // huge page. + static constexpr Length kSmallSize = Length(1); + static constexpr Length kLargeSize = kPagesPerHugePage - kSmallSize; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + + Span* small = New(kSmallSize, kSpanInfo); + + Length slack; + HugeLength donated_huge_pages; + Length abandoned_pages; + + auto RefreshStats = [&]() { + PageHeapSpinLockHolder l; + slack = allocator_->info().slack(); + donated_huge_pages = allocator_->DonatedHugePages(); + abandoned_pages = allocator_->AbandonedPages(); + }; + RefreshStats(); + + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_FALSE(small->donated()); + + // We should allocate large on the free huge page. That is, this allocation + // should not cause any donations to filler. + Span* large = New(kLargeSize, kSpanInfo); + + RefreshStats(); + // large contributes slack, but isn't donated. + EXPECT_EQ(slack, kSmallSize); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); + EXPECT_FALSE(large->donated()); + + Delete(large, kSpanInfo.objects_per_span); + RefreshStats(); + // large contributes slack, but isn't donated. + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); + + // Cleanup. + Delete(small, kSpanInfo.objects_per_span); + RefreshStats(); + EXPECT_EQ(slack, Length(0)); + EXPECT_EQ(donated_huge_pages, NHugePages(0)); + EXPECT_EQ(abandoned_pages, Length(0)); +} + +TEST_P(HugePageAwareAllocatorTest, PageMapInterference) { // This test manipulates the test HugePageAwareAllocator while making // allocations/deallocations that interact with the real PageAllocator. The // two share a global PageMap. // // If this test begins failing, the two are likely conflicting by violating // invariants in the PageMap. + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; std::vector allocs; for (int i : {10, 20, 30}) { auto n = Length(i << 7); - allocs.push_back(New(n)); + allocs.push_back(New(n, kSpanInfo)); } for (auto* a : allocs) { - Delete(a); + Delete(a, kSpanInfo.objects_per_span); } allocs.clear(); @@ -433,21 +1123,22 @@ TEST_F(HugePageAwareAllocatorTest, PageMapInterference) { // Do the same, but allocate something on the real page heap. for (int i : {10, 20, 30}) { auto n = Length(i << 7); - allocs.push_back(New(n)); + allocs.push_back(New(n, kSpanInfo)); ::operator delete(::operator new(1 << 20)); } for (auto* a : allocs) { - Delete(a); + Delete(a, kSpanInfo.objects_per_span); } } -TEST_F(HugePageAwareAllocatorTest, LargeSmall) { +TEST_P(HugePageAwareAllocatorTest, LargeSmall) { const int kIters = 2000; const Length kSmallPages = Length(1); // Large block must be larger than 1 huge page. const Length kLargePages = 2 * kPagesPerHugePage - kSmallPages; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; std::vector small_allocs; // Repeatedly allocate large and small allocations that fit into a multiple of @@ -456,25 +1147,25 @@ TEST_F(HugePageAwareAllocatorTest, LargeSmall) { // without bound, keeping many huge pages alive because of the small // allocations. for (int i = 0; i < kIters; i++) { - Span* large = New(kLargePages); + Span* large = New(kLargePages, kSpanInfo); ASSERT_NE(large, nullptr); - Span* small = New(kSmallPages); + Span* small = New(kSmallPages, kSpanInfo); ASSERT_NE(small, nullptr); small_allocs.push_back(small); - Delete(large); + Delete(large, kSpanInfo.objects_per_span); } BackingStats stats; { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + PageHeapSpinLockHolder l; stats = allocator_->stats(); } constexpr size_t kBufferSize = 1024 * 1024; char buffer[kBufferSize]; Printer printer(buffer, kBufferSize); - allocator_->Print(&printer); + allocator_->Print(printer); // Verify that we have less free memory than we allocated in total. We have // to account for bytes tied up in the cache. EXPECT_LE(stats.free_bytes - allocator_->cache()->size().in_bytes(), @@ -482,89 +1173,103 @@ TEST_F(HugePageAwareAllocatorTest, LargeSmall) { << buffer; for (Span* small : small_allocs) { - Delete(small); + Delete(small, kSpanInfo.objects_per_span); } } // Tests an edge case in hugepage donation behavior. -TEST_F(HugePageAwareAllocatorTest, DonatedPageLists) { +TEST_P(HugePageAwareAllocatorTest, DonatedPageLists) { const Length kSmallPages = Length(1); // Large block must be larger than 1 huge page. const Length kLargePages = 2 * kPagesPerHugePage - 2 * kSmallPages; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; - Span* large = New(kLargePages); + Span* large = New(kLargePages, kSpanInfo); ASSERT_NE(large, nullptr); // Allocating small1 moves the backing huge page off of the donated pages // list. - Span* small1 = New(kSmallPages); + Span* small1 = New(kSmallPages, kSpanInfo); ASSERT_NE(small1, nullptr); // This delete needs to have put the origin PageTracker back onto the right // free list. - Delete(small1); + Delete(small1, kSpanInfo.objects_per_span); // This otherwise fails. - Span* small2 = New(kSmallPages); + Span* small2 = New(kSmallPages, kSpanInfo); ASSERT_NE(small2, nullptr); - Delete(small2); + Delete(small2, kSpanInfo.objects_per_span); // Clean up. - Delete(large); + Delete(large, kSpanInfo.objects_per_span); } -TEST_F(HugePageAwareAllocatorTest, DonationAccounting) { +TEST_P(HugePageAwareAllocatorTest, DonationAccounting) { const Length kSmallPages = Length(2); const Length kOneHugePageDonation = kPagesPerHugePage - kSmallPages; const Length kMultipleHugePagesDonation = 3 * kPagesPerHugePage - kSmallPages; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; // Each of these allocations should count as one donation, but only if they // are actually being reused. - Span* large = New(kOneHugePageDonation); + Span* large = New(kOneHugePageDonation, kSpanInfo); ASSERT_NE(large, nullptr); // This allocation ensures that the donation is not counted. - Span* small = New(kSmallPages); + Span* small = New(kSmallPages, kSpanInfo); ASSERT_NE(small, nullptr); - Span* large2 = New(kMultipleHugePagesDonation); + Span* large2 = New(kMultipleHugePagesDonation, kSpanInfo); ASSERT_NE(large2, nullptr); // This allocation ensures that the donation is not counted. - Span* small2 = New(kSmallPages); + Span* small2 = New(kSmallPages, kSpanInfo); ASSERT_NE(small2, nullptr); - Span* large3 = New(kOneHugePageDonation); + Span* large3 = New(kOneHugePageDonation, kSpanInfo); ASSERT_NE(large3, nullptr); - Span* large4 = New(kMultipleHugePagesDonation); + Span* large4 = New(kMultipleHugePagesDonation, kSpanInfo); ASSERT_NE(large4, nullptr); + HugeLength donated; + // Check donation count. + { + PageHeapSpinLockHolder l; + donated = allocator_->DonatedHugePages(); + } + EXPECT_EQ(donated, NHugePages(4)); + // Clean up. - Delete(large); - Delete(large2); - Delete(large3); - Delete(large4); - Delete(small); - Delete(small2); + Delete(large, kSpanInfo.objects_per_span); + Delete(large2, kSpanInfo.objects_per_span); + Delete(large3, kSpanInfo.objects_per_span); + Delete(large4, kSpanInfo.objects_per_span); + Delete(small, kSpanInfo.objects_per_span); + Delete(small2, kSpanInfo.objects_per_span); // Check donation count. - absl::base_internal::SpinLockHolder h(&pageheap_lock); - CHECK_CONDITION(NHugePages(2) == allocator_->DonatedHugePages()); + { + PageHeapSpinLockHolder l; + donated = allocator_->DonatedHugePages(); + } + EXPECT_EQ(donated, NHugePages(0)); } // We'd like to test OOM behavior but this, err, OOMs. :) // (Usable manually in controlled environments. -TEST_F(HugePageAwareAllocatorTest, DISABLED_OOM) { +TEST_P(HugePageAwareAllocatorTest, DISABLED_OOM) { std::vector objs; auto n = Length(1); + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; while (true) { - Span* s = New(n); + Span* s = New(n, kSpanInfo); if (!s) break; objs.push_back(s); n *= 2; } for (auto s : objs) { - Delete(s); + Delete(s, kSpanInfo.objects_per_span); } } @@ -573,7 +1278,7 @@ struct MemoryBytes { uint64_t phys; }; -int64_t pagesize = getpagesize(); +int64_t pagesize = GetPageSize(); static size_t BytesInCore(void* p, size_t len) { static const size_t kBufSize = 1024; @@ -584,9 +1289,7 @@ static size_t BytesInCore(void* p, size_t len) { // We call mincore in bounded size chunks (though typically one // chunk will cover an entire request.) const size_t chunk_len = std::min(kChunk, len); - if (mincore(p, chunk_len, buf) != 0) { - Crash(kCrash, __FILE__, __LINE__, "mincore failed, errno", errno); - } + TC_CHECK_EQ(0, mincore(p, chunk_len, buf), "errno=%d", errno); const size_t lim = chunk_len / pagesize; for (size_t i = 0; i < lim; ++i) { if (buf[i] & 1) resident += pagesize; @@ -653,54 +1356,10 @@ void TouchTHP(Span* s) { // and without the validation class StatTest : public testing::Test { protected: - StatTest() : rng_() {} - - class RegionFactory; + StatTest() = default; - class Region : public AddressRegion { + class Forwarder : public huge_page_allocator_internal::StaticForwarder { public: - Region(AddressRegion* underlying, RegionFactory* factory) - : underlying_(underlying), factory_(factory) {} - - std::pair Alloc(size_t size, size_t alignment) override { - std::pair ret = underlying_->Alloc(size, alignment); - if (!ret.first) return {nullptr, 0}; - - // we only support so many allocations here for simplicity - CHECK_CONDITION(factory_->n_ < factory_->kNumAllocs); - // Anything coming from the test allocator will request full - // alignment. Metadata allocations will not. Since we can't - // control the backing of metadata allocations, elide them. - // TODO(b/128521238): this is not a good way to do this. - if (alignment >= kHugePageSize) { - factory_->allocs_[factory_->n_] = ret; - factory_->n_++; - } - return ret; - } - - private: - AddressRegion* underlying_; - RegionFactory* factory_; - }; - - class RegionFactory : public AddressRegionFactory { - public: - explicit RegionFactory(AddressRegionFactory* underlying) - : underlying_(underlying), n_(0) {} - - AddressRegion* Create(void* start, size_t size, UsageHint hint) override { - AddressRegion* underlying_region = underlying_->Create(start, size, hint); - CHECK_CONDITION(underlying_region); - void* region_space = MallocInternal(sizeof(Region)); - CHECK_CONDITION(region_space); - return new (region_space) Region(underlying_region, this); - } - - size_t GetStats(absl::Span buffer) override { - return underlying_->GetStats(buffer); - } - MemoryBytes Memory() { MemoryBytes b = {0, 0}; for (int i = 0; i < n_; ++i) { @@ -713,62 +1372,69 @@ class StatTest : public testing::Test { return b; } - AddressRegionFactory* underlying() const { return underlying_; } + // Provide hooked versions of AllocatePages + AddressRange AllocatePages(size_t bytes, size_t align, MemoryTag tag) { + auto& underlying = *static_cast(this); + auto range = underlying.AllocatePages(bytes, align, tag); - private: - friend class Region; - AddressRegionFactory* underlying_; + // we only support so many allocations here for simplicity + TC_CHECK_LT(n_, kNumAllocs); + if (tag != MemoryTag::kMetadata) { + allocs_[n_] = {range.ptr, range.bytes}; + n_++; + } + return range; + } + + private: static constexpr size_t kNumAllocs = 1000; - size_t n_; - std::pair allocs_[kNumAllocs]; + size_t n_ = 0; + std::pair allocs_[kNumAllocs] = {}; }; + using HookedAllocator = + huge_page_allocator_internal::HugePageAwareAllocator; + // Carefully get memory usage without touching anything. - MemoryBytes GetSystemBytes() { return replacement_region_factory_.Memory(); } + MemoryBytes GetSystemBytes() { return alloc_->forwarder().Memory(); } // This is essentially a test case set up, but run manually - // we can't guarantee gunit won't malloc between. void PrepTest() { - memset(buf, 0, sizeof(buf)); - MallocExtension::ReleaseMemoryToSystem(std::numeric_limits::max()); - SetRegionFactory(&replacement_region_factory_); - alloc = new (buf) HugePageAwareAllocator(MemoryTag::kNormal); + memset(buf_, 0, sizeof(buf_)); + alloc_ = new (buf_) + HookedAllocator(HugePageAwareAllocatorOptions{MemoryTag::kNormal}); } - ~StatTest() override { - SetRegionFactory(replacement_region_factory_.underlying()); - } + ~StatTest() override = default; BackingStats Stats() { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - BackingStats stats = alloc->stats(); + PageHeapSpinLockHolder l; + BackingStats stats = alloc_->stats(); return stats; } // Use bigger allocs here to ensure growth: - Length RandomAllocSize() { + Length RandomAllocSize(absl::BitGenRef rng) { // Since we touch all of the pages, try to avoid OOM'ing by limiting the // number of big allocations. const Length kMaxBigAllocs = Length(4096); - if (big_allocs_ < kMaxBigAllocs && absl::Bernoulli(rng_, 1.0 / 50)) { + if (big_allocs_ < kMaxBigAllocs && absl::Bernoulli(rng, 1.0 / 50)) { auto n = - Length(1024 * (1 + absl::LogUniform(rng_, 0, (1 << 9) - 1))); - n += Length(absl::Uniform(rng_, 0, 1024)); + Length(1024 * (1 + absl::LogUniform(rng, 0, (1 << 9) - 1))); + n += Length(absl::Uniform(rng, 0, 1024)); big_allocs_ += n; return n; } - return Length(1 + absl::LogUniform(rng_, 0, (1 << 10) - 1)); + return Length(1 + absl::LogUniform(rng, 0, (1 << 10) - 1)); } - Span* Alloc(Length n) { - Span* span = alloc->New(n); + Span* Alloc(Length n, SpanAllocInfo span_info) { + Span* span = alloc_->New(n, span_info); TouchTHP(span); - if (n > span->num_pages()) { - Crash(kCrash, __FILE__, __LINE__, n.raw_num(), - "not <=", span->num_pages().raw_num()); - } + TC_CHECK_LE(n, span->num_pages()); n = span->num_pages(); if (n > longest_) longest_ = n; total_ += n; @@ -776,13 +1442,21 @@ class StatTest : public testing::Test { return span; } - void Free(Span* s) { + void Free(Span* s, SpanAllocInfo span_info) { Length n = s->num_pages(); total_ -= n; - { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - alloc->Delete(s); - } +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; + alloc_->Delete(s); +#else + PageAllocatorInterface::AllocationState a{ + Range(s->first_page(), s->num_pages()), + s->donated(), + }; + alloc_->forwarder().DeleteSpan(s); + PageHeapSpinLockHolder l; + alloc_->Delete(a); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING } void CheckStats() { @@ -791,9 +1465,9 @@ class StatTest : public testing::Test { SmallSpanStats small; LargeSpanStats large; { - absl::base_internal::SpinLockHolder h(&pageheap_lock); - alloc->GetSmallSpanStats(&small); - alloc->GetLargeSpanStats(&large); + PageHeapSpinLockHolder l; + alloc_->GetSmallSpanStats(&small); + alloc_->GetLargeSpanStats(&large); } size_t span_stats_free_bytes = 0, span_stats_released_bytes = 0; @@ -805,30 +1479,19 @@ class StatTest : public testing::Test { span_stats_free_bytes += large.normal_pages.in_bytes(); span_stats_released_bytes += large.returned_pages.in_bytes(); -#ifndef __ppc__ const size_t alloced_bytes = total_.in_bytes(); -#endif ASSERT_EQ(here.virt, stats.system_bytes); -#ifndef __ppc__ const size_t actual_unmapped = here.virt - here.phys; -#endif - // TODO(b/122551676): On PPC, our release granularity may be smaller than - // the system page size, so we may not actually unmap memory that we expect. - // Pending using the return value of madvise, relax this constraint. -#ifndef __ppc__ ASSERT_EQ(actual_unmapped, stats.unmapped_bytes); ASSERT_EQ(here.phys, stats.free_bytes + alloced_bytes); ASSERT_EQ(alloced_bytes, stats.system_bytes - stats.free_bytes - stats.unmapped_bytes); -#endif ASSERT_EQ(stats.free_bytes, span_stats_free_bytes); ASSERT_EQ(stats.unmapped_bytes, span_stats_released_bytes); } - char buf[sizeof(HugePageAwareAllocator)]; - HugePageAwareAllocator* alloc; - RegionFactory replacement_region_factory_{GetRegionFactory()}; - absl::BitGen rng_; + char buf_[sizeof(HookedAllocator)]; + HookedAllocator* alloc_; Length total_; Length longest_; @@ -838,52 +1501,53 @@ class StatTest : public testing::Test { TEST_F(StatTest, Basic) { static const size_t kNumAllocs = 500; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; + absl::BitGen rng; Span* allocs[kNumAllocs]; - const bool always_check_usage = absl::GetFlag(FLAGS_always_check_usage); - PrepTest(); // DO NOT MALLOC ANYTHING BELOW THIS LINE! WE'RE TRYING TO CAREFULLY COUNT // ALLOCATIONS. // (note we can't stop background threads, but hopefully they're idle enough.) for (int i = 0; i < kNumAllocs; ++i) { - Length k = RandomAllocSize(); - allocs[i] = Alloc(k); + Length k = RandomAllocSize(rng); + allocs[i] = Alloc(k, kSpanInfo); // stats are expensive, don't always check - if (i % 10 != 0 && !always_check_usage) continue; + if (i % 10 != 0) continue; CheckStats(); } static const size_t kReps = 1000; for (int i = 0; i < kReps; ++i) { - size_t index = absl::Uniform(rng_, 0, kNumAllocs); + size_t index = absl::Uniform(rng, 0, kNumAllocs); - Free(allocs[index]); - Length k = RandomAllocSize(); - allocs[index] = Alloc(k); + Free(allocs[index], kSpanInfo); + Length k = RandomAllocSize(rng); + allocs[index] = Alloc(k, kSpanInfo); - if (absl::Bernoulli(rng_, 1.0 / 3)) { - Length pages(absl::LogUniform(rng_, 0, (1 << 10) - 1) + 1); - absl::base_internal::SpinLockHolder h(&pageheap_lock); - alloc->ReleaseAtLeastNPages(pages); + if (absl::Bernoulli(rng, 1.0 / 3)) { + Length pages(absl::LogUniform(rng, 0, (1 << 10) - 1) + 1); + PageHeapSpinLockHolder l; + alloc_->ReleaseAtLeastNPages( + pages, /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); } // stats are expensive, don't always check - if (i % 10 != 0 && !always_check_usage) continue; + if (i % 10 != 0) continue; CheckStats(); } for (int i = 0; i < kNumAllocs; ++i) { - Free(allocs[i]); - if (i % 10 != 0 && !always_check_usage) continue; + Free(allocs[i], kSpanInfo); + if (i % 10 != 0) continue; CheckStats(); } { CheckStats(); pageheap_lock.Lock(); - auto final_stats = alloc->stats(); + auto final_stats = alloc_->stats(); pageheap_lock.Unlock(); ASSERT_EQ(final_stats.free_bytes + final_stats.unmapped_bytes, final_stats.system_bytes); @@ -892,9 +1556,10 @@ TEST_F(StatTest, Basic) { // test over, malloc all you like } -TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { +TEST_P(HugePageAwareAllocatorTest, ParallelRelease) { ThreadManager threads; constexpr int kThreads = 10; + const SpanAllocInfo kSpanInfo = {1, AccessDensityPrediction::kSparse}; struct ABSL_CACHELINE_ALIGNED Metadata { absl::BitGen rng; @@ -908,7 +1573,8 @@ TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { Metadata& m = metadata[thread_id]; if (thread_id == 0) { - ReleasePages(Length(absl::Uniform(m.rng, 1, 1 << 10))); + ReleasePages(Length(absl::Uniform(m.rng, 1, 1 << 10)), + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); return; } else if (thread_id == 1) { benchmark::DoNotOptimize(Print()); @@ -916,15 +1582,9 @@ TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { } if (absl::Bernoulli(m.rng, 0.6) || m.spans.empty()) { - Span* s = AllocatorNew(Length(absl::LogUniform(m.rng, 1, 1 << 10))); - CHECK_CONDITION(s != nullptr); - - // Touch the contents of the buffer. We later use it to verify we are the - // only thread manipulating the Span, for example, if another thread - // madvise DONTNEED'd the contents and zero'd them. - const uintptr_t key = reinterpret_cast(s) ^ thread_id; - *reinterpret_cast(s->start_address()) = key; - + Span* s = + AllocatorNew(Length(absl::LogUniform(m.rng, 1, 1 << 10)), kSpanInfo); + TC_CHECK_NE(s, nullptr); m.spans.push_back(s); } else { size_t index = absl::Uniform(m.rng, 0, m.spans.size()); @@ -934,10 +1594,7 @@ TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { m.spans[index] = back; m.spans.pop_back(); - const uintptr_t key = reinterpret_cast(s) ^ thread_id; - EXPECT_EQ(*reinterpret_cast(s->start_address()), key); - - AllocatorDelete(s); + AllocatorDelete(s, kSpanInfo.objects_per_span); } }); @@ -947,11 +1604,345 @@ TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { for (auto& m : metadata) { for (Span* s : m.spans) { - AllocatorDelete(s); + AllocatorDelete(s, kSpanInfo.objects_per_span); } } } +INSTANTIATE_TEST_SUITE_P( + All, HugePageAwareAllocatorTest, + testing::Values(HugeRegionUsageOption::kDefault, + HugeRegionUsageOption::kUseForAllLargeAllocs)); + +// This is set to ensure that .in_bytes() doesn't overflow 64-bit size_t. +inline constexpr Length kMaxLength = + Length(std::numeric_limits::max()); + +using FakeHugePageAwareAllocator = + huge_page_allocator_internal::HugePageAwareAllocator< + huge_page_allocator_internal::FakeStaticForwarder>; +struct SpanDeleter { + explicit SpanDeleter(absl::Nonnull allocator) + : allocator(*allocator) {} + + void operator()(Span* s) ABSL_LOCKS_EXCLUDED(pageheap_lock) { +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + PageHeapSpinLockHolder l; + allocator.Delete(s); +#else + PageAllocatorInterface::AllocationState a{ + Range(s->first_page(), s->num_pages()), + s->donated(), + }; + allocator.forwarder().DeleteSpan(s); + PageHeapSpinLockHolder l; + allocator.Delete(a); +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + } + + FakeHugePageAwareAllocator& allocator; +}; + +using SpanUniquePtr = std::unique_ptr; + +class GetReleaseStatsTest : public testing::Test { + public: + void SetUp() override { + // Use SetUp instead of a constructor so that we can make assertions. + MallocExtension::SetRegionFactory(&factory_); + + allocator_ = new (allocator_storage_.data()) + FakeHugePageAwareAllocator({.tag = MemoryTag::kNormal}); + + allocator_->forwarder().set_hpaa_subrelease(/*value=*/false); + allocator_->forwarder().set_huge_cache_demand_based_release( + /*value=*/false); + allocator_->forwarder().set_huge_region_demand_based_release( + /*value=*/false); + allocator_->forwarder().set_filler_skip_subrelease_interval( + absl::ZeroDuration()); + allocator_->forwarder().set_filler_skip_subrelease_short_interval( + absl::ZeroDuration()); + allocator_->forwarder().set_filler_skip_subrelease_long_interval( + absl::ZeroDuration()); + + ASSERT_EQ(ReleaseAtLeastNPagesBreakingHugepages( + kMaxLength, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + + ASSERT_EQ(GetReleaseStats(), PageReleaseStats{}); + } + + ~GetReleaseStatsTest() override { + ReleaseAtLeastNPagesBreakingHugepages( + Length(std::numeric_limits::max()), + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem); + + MallocExtension::SetRegionFactory(previous_factory_); + }; + + FakeHugePageAwareAllocator& allocator() { return *allocator_; } + + PageReleaseStats GetReleaseStats() ABSL_LOCKS_EXCLUDED(pageheap_lock) { + const PageHeapSpinLockHolder l; + return allocator().GetReleaseStats(); + } + + SpanUniquePtr New(Length n, AccessDensityPrediction density = + AccessDensityPrediction::kDense) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + return {allocator().New(n, {.objects_per_span = 1, .density = density}), + SpanDeleter(&allocator())}; + } + + Length ReleaseAtLeastNPages(Length n, PageReleaseReason reason) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + const PageHeapSpinLockHolder l; + return allocator().ReleaseAtLeastNPages(n, reason); + } + + Length ReleaseAtLeastNPagesBreakingHugepages(Length n, + PageReleaseReason reason) + ABSL_LOCKS_EXCLUDED(pageheap_lock) { + const PageHeapSpinLockHolder l; + return allocator().ReleaseAtLeastNPagesBreakingHugepages(n, reason); + } + + protected: + AddressRegionFactory* const previous_factory_ = + MallocExtension::GetRegionFactory(); + + ExtraRegionFactory factory_{previous_factory_}; + + alignas(FakeHugePageAwareAllocator) std::array< + unsigned char, sizeof(FakeHugePageAwareAllocator)> allocator_storage_; + FakeHugePageAwareAllocator* allocator_; +}; + +TEST_F(GetReleaseStatsTest, GetReleaseStats) { + SpanUniquePtr huge_page = New(kPagesPerHugePage); + ASSERT_TRUE(huge_page != nullptr); + EXPECT_EQ(GetReleaseStats(), PageReleaseStats{}); + + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + EXPECT_EQ(GetReleaseStats(), PageReleaseStats{}); + + huge_page.reset(); + EXPECT_EQ(GetReleaseStats(), PageReleaseStats{}); + + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage, + .release_memory_to_system = kPagesPerHugePage, + })); +} + +TEST_F(GetReleaseStatsTest, ReasonsTrackedSeparately) { + SpanUniquePtr release_memory_to_system = New(kPagesPerHugePage); + ASSERT_TRUE(release_memory_to_system != nullptr); + + SpanUniquePtr process_background_actions = New(kPagesPerHugePage); + ASSERT_TRUE(process_background_actions != nullptr); + + SpanUniquePtr soft_limit_exceeded = New(kPagesPerHugePage); + ASSERT_TRUE(soft_limit_exceeded != nullptr); + + SpanUniquePtr hard_limit_exceeded = New(kPagesPerHugePage); + ASSERT_TRUE(hard_limit_exceeded != nullptr); + + EXPECT_EQ( + ReleaseAtLeastNPages( + kMaxLength, /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + + release_memory_to_system.reset(); + EXPECT_EQ( + ReleaseAtLeastNPages( + kMaxLength, /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage, + .release_memory_to_system = kPagesPerHugePage, + })); + + process_background_actions.reset(); + EXPECT_EQ( + ReleaseAtLeastNPages( + kMaxLength, /*reason=*/PageReleaseReason::kProcessBackgroundActions), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage * 2, + .release_memory_to_system = kPagesPerHugePage, + .process_background_actions = kPagesPerHugePage, + })); + + soft_limit_exceeded.reset(); + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, /*reason=*/PageReleaseReason::kSoftLimitExceeded), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage * 3, + .release_memory_to_system = kPagesPerHugePage, + .process_background_actions = kPagesPerHugePage, + .soft_limit_exceeded = kPagesPerHugePage, + })); + + hard_limit_exceeded.reset(); + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, /*reason=*/PageReleaseReason::kHardLimitExceeded), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage * 4, + .release_memory_to_system = kPagesPerHugePage, + .process_background_actions = kPagesPerHugePage, + .soft_limit_exceeded = kPagesPerHugePage, + .hard_limit_exceeded = kPagesPerHugePage, + })); +} + +TEST_F(GetReleaseStatsTest, + ReleaseSinglePageAfterBreakingHugepagesRequiresBreakingAgain) { + SpanUniquePtr page = New(Length(1)); + ASSERT_TRUE(page != nullptr); + EXPECT_EQ(GetReleaseStats(), PageReleaseStats{}); + + // We should have to break a hugepage apart to release the other pages. + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + EXPECT_EQ(GetReleaseStats(), (PageReleaseStats{})); + + EXPECT_EQ(ReleaseAtLeastNPagesBreakingHugepages( + kMaxLength, + /*reason=*/PageReleaseReason::kSoftLimitExceeded), + kPagesPerHugePage - Length(1)); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage - Length(1), + .soft_limit_exceeded = kPagesPerHugePage - Length(1), + })); + + page.reset(); + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + Length(0)); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage - Length(1), + .soft_limit_exceeded = kPagesPerHugePage - Length(1), + })); + + EXPECT_EQ(ReleaseAtLeastNPagesBreakingHugepages( + kMaxLength, + /*reason=*/PageReleaseReason::kHardLimitExceeded), + Length(1)); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage, + .soft_limit_exceeded = kPagesPerHugePage - Length(1), + .hard_limit_exceeded = Length(1), + })); +} + +TEST_F(GetReleaseStatsTest, + ReleaseAfterNewDeleteSinglePageDoesNotRequireBreakingHugepages) { + SpanUniquePtr page = New(Length(1)); + ASSERT_TRUE(page != nullptr); + page.reset(); + + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kProcessBackgroundActions), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage, + .process_background_actions = kPagesPerHugePage, + })); +} + +TEST_F(GetReleaseStatsTest, ReleaseAfterPartialReleaseContinuesTrackingStats) { + SpanUniquePtr two_hugepages = New(kPagesPerHugePage * 2); + ASSERT_TRUE(two_hugepages != nullptr); + two_hugepages.reset(); + + EXPECT_EQ(ReleaseAtLeastNPages( + kPagesPerHugePage, + /*reason=*/PageReleaseReason::kReleaseMemoryToSystem), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage, + .release_memory_to_system = kPagesPerHugePage, + })); + + EXPECT_EQ(ReleaseAtLeastNPages( + kMaxLength, + /*reason=*/PageReleaseReason::kProcessBackgroundActions), + kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), + (PageReleaseStats{ + .total = kPagesPerHugePage * 2, + .release_memory_to_system = kPagesPerHugePage, + .process_background_actions = kPagesPerHugePage, + })); +} + +TEST_F(GetReleaseStatsTest, b339535705) { + std::vector v; + size_t system_bytes; + do { + // Allocate until we trigger the huge regions. + v.push_back(New(kPagesPerHugePage * 2 + Length(1), + AccessDensityPrediction::kSparse)); + + PageHeapSpinLockHolder l; + system_bytes = allocator_->RegionsStats().system_bytes; + } while (system_bytes < kHugePageSize); + + EXPECT_FALSE(v.empty()); + + v.push_back( + New(kPagesPerHugePage * 2 + Length(1), AccessDensityPrediction::kSparse)); + v.push_back( + New(kPagesPerHugePage * 2 + Length(1), AccessDensityPrediction::kSparse)); + + v.pop_back(); + v.pop_back(); + + BackingStats stats; + { + PageHeapSpinLockHolder l; + stats = allocator_->RegionsStats(); + } + + EXPECT_GE(stats.system_bytes, kHugePageSize); + EXPECT_GE(stats.free_bytes, kHugePageSize); + + Length released = ReleaseAtLeastNPagesBreakingHugepages( + kPagesPerHugePage, /*reason=*/PageReleaseReason::kSoftLimitExceeded); + EXPECT_GE(released, kPagesPerHugePage); + EXPECT_EQ(GetReleaseStats(), (PageReleaseStats{ + .total = released, + .soft_limit_exceeded = released, + .hard_limit_exceeded = Length(0), + })); +} + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h index 2f72b438818c..288baa0a986d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,19 +20,31 @@ #include #include +#include #include #include "absl/algorithm/container.h" +#include "absl/base/attributes.h" #include "absl/base/internal/cycleclock.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/string_view.h" #include "absl/time/time.h" +#include "absl/types/span.h" #include "tcmalloc/common.h" -#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/hinted_tracker_lists.h" #include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/pageflags.h" #include "tcmalloc/internal/range_tracker.h" #include "tcmalloc/internal/timeseries_tracker.h" +#include "tcmalloc/pages.h" #include "tcmalloc/span.h" #include "tcmalloc/stats.h" @@ -39,607 +52,24 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -// This and the following classes implement the adaptive hugepage subrelease -// mechanism and realized fragmentation metric described in "Adaptive Hugepage -// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers" -// (ISMM 2021). - -// Tracks correctness of skipped subrelease decisions over time. -template -class SkippedSubreleaseCorrectnessTracker { - public: - struct SkippedSubreleaseDecision { - Length pages; // Number of pages we skipped subreleasing. - size_t count; // Number of times we skipped a subrelease. - - SkippedSubreleaseDecision() : pages(0), count(0) {} - explicit SkippedSubreleaseDecision(Length pages) : pages(pages), count(1) {} - explicit SkippedSubreleaseDecision(Length pages, size_t count) - : pages(pages), count(count) {} - - SkippedSubreleaseDecision& operator+=(SkippedSubreleaseDecision rhs) { - pages += rhs.pages; - count += rhs.count; - return *this; - } - - static SkippedSubreleaseDecision Zero() { - return SkippedSubreleaseDecision(); - } - }; - - explicit constexpr SkippedSubreleaseCorrectnessTracker(Clock clock, - absl::Duration w) - : window_(w), - epoch_length_(window_ / kEpochs), - last_confirmed_peak_(0), - tracker_(clock, w) {} - - // Not copyable or movable - SkippedSubreleaseCorrectnessTracker( - const SkippedSubreleaseCorrectnessTracker&) = delete; - SkippedSubreleaseCorrectnessTracker& operator=( - const SkippedSubreleaseCorrectnessTracker&) = delete; - - void ReportSkippedSubreleasePages( - Length skipped_pages, Length peak_pages, - absl::Duration expected_time_until_next_peak) { - total_skipped_ += SkippedSubreleaseDecision(skipped_pages); - pending_skipped_ += SkippedSubreleaseDecision(skipped_pages); - - SkippedSubreleaseUpdate update; - update.decision = SkippedSubreleaseDecision(skipped_pages); - update.num_pages_at_decision = peak_pages; - update.correctness_interval_epochs = - expected_time_until_next_peak / epoch_length_; - tracker_.Report(update); - } - - void ReportUpdatedPeak(Length current_peak) { - // Record this peak for the current epoch (so we don't double-count correct - // predictions later) and advance the tracker. - SkippedSubreleaseUpdate update; - update.confirmed_peak = current_peak; - if (tracker_.Report(update)) { - // Also keep track of the largest peak we have confirmed this epoch. - last_confirmed_peak_ = Length(0); - } - - // Recompute currently pending decisions. - pending_skipped_ = SkippedSubreleaseDecision::Zero(); - - Length largest_peak_already_confirmed = last_confirmed_peak_; - - tracker_.IterBackwards( - [&](size_t offset, int64_t ts, const SkippedSubreleaseEntry& e) { - // Do not clear any decisions in the current epoch. - if (offset == 0) { - return; - } - - if (e.decisions.count > 0 && - e.max_num_pages_at_decision > largest_peak_already_confirmed && - offset <= e.correctness_interval_epochs) { - if (e.max_num_pages_at_decision <= current_peak) { - // We can confirm a subrelease decision as correct and it had not - // been confirmed correct by an earlier peak yet. - correctly_skipped_ += e.decisions; - } else { - pending_skipped_ += e.decisions; - } - } - - // Did we clear any earlier decisions based on a peak in this epoch? - // Keep track of the peak, so we do not clear them again. - largest_peak_already_confirmed = - std::max(largest_peak_already_confirmed, e.max_confirmed_peak); - }, - -1); - - last_confirmed_peak_ = std::max(last_confirmed_peak_, current_peak); - } - - inline SkippedSubreleaseDecision total_skipped() const { - return total_skipped_; - } - - inline SkippedSubreleaseDecision correctly_skipped() const { - return correctly_skipped_; - } - - inline SkippedSubreleaseDecision pending_skipped() const { - return pending_skipped_; - } - - private: - struct SkippedSubreleaseUpdate { - // A subrelease decision that was made at this time step: How much did we - // decide not to release? - SkippedSubreleaseDecision decision; - - // What does our future demand have to be for this to be correct? If there - // were multiple subrelease decisions in the same epoch, use the max. - Length num_pages_at_decision; - - // How long from the time of the decision do we have before the decision - // will be determined incorrect? - int64_t correctness_interval_epochs = 0; - - // At this time step, we confirmed a demand peak at this level, which means - // all subrelease decisions in earlier time steps that had peak_demand_pages - // <= this confirmed_peak were confirmed correct and don't need to be - // considered again in the future. - Length confirmed_peak; - }; - - struct SkippedSubreleaseEntry { - SkippedSubreleaseDecision decisions = SkippedSubreleaseDecision::Zero(); - Length max_num_pages_at_decision; - int64_t correctness_interval_epochs = 0; - Length max_confirmed_peak; - - static SkippedSubreleaseEntry Nil() { return SkippedSubreleaseEntry(); } - - void Report(SkippedSubreleaseUpdate e) { - decisions += e.decision; - correctness_interval_epochs = - std::max(correctness_interval_epochs, e.correctness_interval_epochs); - max_num_pages_at_decision = - std::max(max_num_pages_at_decision, e.num_pages_at_decision); - max_confirmed_peak = std::max(max_confirmed_peak, e.confirmed_peak); - } - }; - - const absl::Duration window_; - const absl::Duration epoch_length_; - - // The largest peak we processed this epoch. This is required to avoid us - // double-counting correctly predicted decisions. - Length last_confirmed_peak_; - - SkippedSubreleaseDecision total_skipped_; - SkippedSubreleaseDecision correctly_skipped_; - SkippedSubreleaseDecision pending_skipped_; - - TimeSeriesTracker - tracker_; -}; - -struct SubreleaseStats { - Length total_pages_subreleased; // cumulative since startup - Length num_pages_subreleased; - HugeLength total_hugepages_broken{NHugePages(0)}; // cumulative since startup - HugeLength num_hugepages_broken{NHugePages(0)}; - - bool is_limit_hit = false; - // Keep these limit-related stats cumulative since startup only - Length total_pages_subreleased_due_to_limit; - HugeLength total_hugepages_broken_due_to_limit{NHugePages(0)}; - - void reset() { - total_pages_subreleased += num_pages_subreleased; - total_hugepages_broken += num_hugepages_broken; - num_pages_subreleased = Length(0); - num_hugepages_broken = NHugePages(0); - } - - // Must be called at the beginning of each subrelease request - void set_limit_hit(bool value) { is_limit_hit = value; } - - // This only has a well-defined meaning within ReleaseCandidates where - // set_limit_hit() has been called earlier. Do not use anywhere else. - bool limit_hit() { return is_limit_hit; } -}; - -// Track filler statistics over a time window. -template -class FillerStatsTracker { - public: - enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes }; - - struct FillerStats { - Length num_pages; - Length free_pages; - Length unmapped_pages; - Length used_pages_in_subreleased_huge_pages; - HugeLength huge_pages[kNumTypes]; - Length num_pages_subreleased; - HugeLength num_hugepages_broken = NHugePages(0); - - HugeLength total_huge_pages() const { - HugeLength total_huge_pages; - for (int i = 0; i < kNumTypes; i++) { - total_huge_pages += huge_pages[i]; - } - return total_huge_pages; - } - }; - - struct NumberOfFreePages { - Length free; - Length free_backed; - }; - - explicit constexpr FillerStatsTracker(Clock clock, absl::Duration w, - absl::Duration summary_interval) - : summary_interval_(summary_interval), - window_(w), - epoch_length_(window_ / kEpochs), - tracker_(clock, w), - skipped_subrelease_correctness_(clock, w) {} - - // Not copyable or movable - FillerStatsTracker(const FillerStatsTracker&) = delete; - FillerStatsTracker& operator=(const FillerStatsTracker&) = delete; - - void Report(const FillerStats stats) { - if (ABSL_PREDICT_FALSE(tracker_.Report(stats))) { - if (ABSL_PREDICT_FALSE(pending_skipped().count > 0)) { - // Consider the peak within the just completed epoch to confirm the - // correctness of any recent subrelease decisions. - skipped_subrelease_correctness_.ReportUpdatedPeak(std::max( - stats.num_pages, - tracker_.GetEpochAtOffset(1).stats[kStatsAtMaxDemand].num_pages)); - } - } - } - - void Print(Printer* out) const; - void PrintInPbtxt(PbtxtRegion* hpaa) const; - - // Calculates recent peaks for skipping subrelease decisions. If our allocated - // memory is below the demand peak within the last peak_interval, we stop - // subreleasing. If our demand is going above that peak again within another - // peak_interval, we report that we made the correct decision. - FillerStats GetRecentPeak(absl::Duration peak_interval) { - last_peak_interval_ = peak_interval; - FillerStats recent_peak; - Length max_demand_pages; - - int64_t num_epochs = peak_interval / epoch_length_; - tracker_.IterBackwards( - [&](size_t offset, int64_t ts, const FillerStatsEntry& e) { - if (!e.empty()) { - // Identify the maximum number of demand pages we have seen within - // the time interval. - if (e.stats[kStatsAtMaxDemand].num_pages > max_demand_pages) { - recent_peak = e.stats[kStatsAtMaxDemand]; - max_demand_pages = recent_peak.num_pages; - } - } - }, - num_epochs); - - return recent_peak; - } - - void ReportSkippedSubreleasePages( - Length pages, Length peak_pages, - absl::Duration expected_time_until_next_peak) { - if (pages == Length(0)) { - return; - } - - skipped_subrelease_correctness_.ReportSkippedSubreleasePages( - pages, peak_pages, expected_time_until_next_peak); - } - - inline typename SkippedSubreleaseCorrectnessTracker< - kEpochs>::SkippedSubreleaseDecision - total_skipped() const { - return skipped_subrelease_correctness_.total_skipped(); - } - - inline typename SkippedSubreleaseCorrectnessTracker< - kEpochs>::SkippedSubreleaseDecision - correctly_skipped() const { - return skipped_subrelease_correctness_.correctly_skipped(); - } - - inline typename SkippedSubreleaseCorrectnessTracker< - kEpochs>::SkippedSubreleaseDecision - pending_skipped() const { - return skipped_subrelease_correctness_.pending_skipped(); - } - - // Returns the minimum number of free pages throughout the tracker period. - // The first value of the pair is the number of all free pages, the second - // value contains only the backed ones. - NumberOfFreePages min_free_pages(absl::Duration w) const { - NumberOfFreePages mins; - mins.free = Length::max(); - mins.free_backed = Length::max(); - - int64_t num_epochs = std::clamp(w / epoch_length_, int64_t{0}, - static_cast(kEpochs)); - - tracker_.IterBackwards( - [&](size_t offset, int64_t ts, const FillerStatsEntry& e) { - if (!e.empty()) { - mins.free = std::min(mins.free, e.min_free_pages); - mins.free_backed = - std::min(mins.free_backed, e.min_free_backed_pages); - } - }, - num_epochs); - mins.free = (mins.free == Length::max()) ? Length(0) : mins.free; - mins.free_backed = - (mins.free_backed == Length::max()) ? Length(0) : mins.free_backed; - return mins; - } - - private: - // We collect filler statistics at four "interesting points" within each time - // step: at min/max demand of pages and at min/max use of hugepages. This - // allows us to approximate the envelope of the different metrics. - enum StatsType { - kStatsAtMinDemand, - kStatsAtMaxDemand, - kStatsAtMinHugePages, - kStatsAtMaxHugePages, - kNumStatsTypes - }; - - struct FillerStatsEntry { - // Collect filler stats at "interesting points" (minimum/maximum page demand - // and at minimum/maximum usage of huge pages). - FillerStats stats[kNumStatsTypes] = {}; - static constexpr Length kDefaultValue = Length::max(); - Length min_free_pages = kDefaultValue; - Length min_free_backed_pages = kDefaultValue; - Length num_pages_subreleased; - HugeLength num_hugepages_broken = NHugePages(0); - - static FillerStatsEntry Nil() { return FillerStatsEntry(); } - - void Report(FillerStats e) { - if (empty()) { - for (int i = 0; i < kNumStatsTypes; i++) { - stats[i] = e; - } - } - - if (e.num_pages < stats[kStatsAtMinDemand].num_pages) { - stats[kStatsAtMinDemand] = e; - } - - if (e.num_pages > stats[kStatsAtMaxDemand].num_pages) { - stats[kStatsAtMaxDemand] = e; - } - - if (e.total_huge_pages() < - stats[kStatsAtMinHugePages].total_huge_pages()) { - stats[kStatsAtMinHugePages] = e; - } - - if (e.total_huge_pages() > - stats[kStatsAtMaxHugePages].total_huge_pages()) { - stats[kStatsAtMaxHugePages] = e; - } - - min_free_pages = - std::min(min_free_pages, e.free_pages + e.unmapped_pages); - min_free_backed_pages = std::min(min_free_backed_pages, e.free_pages); - - // Subrelease stats - num_pages_subreleased += e.num_pages_subreleased; - num_hugepages_broken += e.num_hugepages_broken; - } - - bool empty() const { return min_free_pages == kDefaultValue; } - }; - - // The tracker reports pages that have been free for at least this interval, - // as well as peaks within this interval. - const absl::Duration summary_interval_; - - const absl::Duration window_; - const absl::Duration epoch_length_; - - TimeSeriesTracker tracker_; - SkippedSubreleaseCorrectnessTracker skipped_subrelease_correctness_; - - // Records the last peak_interval value, for reporting and debugging only. - absl::Duration last_peak_interval_; -}; - -// Evaluate a/b, avoiding division by zero -inline double safe_div(double a, double b) { - if (b == 0) { - return 0.; - } else { - return a / b; - } -} - -inline double safe_div(Length a, Length b) { - return safe_div(a.raw_num(), b.raw_num()); -} - -template -void FillerStatsTracker::Print(Printer* out) const { - NumberOfFreePages free_pages = min_free_pages(summary_interval_); - out->printf("HugePageFiller: time series over %d min interval\n\n", - absl::ToInt64Minutes(summary_interval_)); - - // Realized fragmentation is equivalent to backed minimum free pages over a - // 5-min interval. It is printed for convenience but not included in pbtxt. - out->printf("HugePageFiller: realized fragmentation: %.1f MiB\n", - free_pages.free_backed.in_mib()); - out->printf("HugePageFiller: minimum free pages: %zu (%zu backed)\n", - free_pages.free.raw_num(), free_pages.free_backed.raw_num()); - - FillerStatsEntry at_peak_demand; - FillerStatsEntry at_peak_hps; - - tracker_.IterBackwards( - [&](size_t offset, int64_t ts, const FillerStatsEntry& e) { - if (!e.empty()) { - if (at_peak_demand.empty() || - at_peak_demand.stats[kStatsAtMaxDemand].num_pages < - e.stats[kStatsAtMaxDemand].num_pages) { - at_peak_demand = e; - } - - if (at_peak_hps.empty() || - at_peak_hps.stats[kStatsAtMaxHugePages].total_huge_pages() < - e.stats[kStatsAtMaxHugePages].total_huge_pages()) { - at_peak_hps = e; - } - } - }, - summary_interval_ / epoch_length_); - - out->printf( - "HugePageFiller: at peak demand: %zu pages (and %zu free, %zu unmapped)\n" - "HugePageFiller: at peak demand: %zu hps (%zu regular, %zu donated, " - "%zu partial, %zu released)\n", - at_peak_demand.stats[kStatsAtMaxDemand].num_pages.raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].free_pages.raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand] - .huge_pages[kPartialReleased] - .raw_num(), - at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num()); - - out->printf( - "HugePageFiller: at peak hps: %zu pages (and %zu free, %zu unmapped)\n" - "HugePageFiller: at peak hps: %zu hps (%zu regular, %zu donated, " - "%zu partial, %zu released)\n", - at_peak_hps.stats[kStatsAtMaxDemand].num_pages.raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].free_pages.raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand] - .huge_pages[kPartialReleased] - .raw_num(), - at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num()); - - out->printf( - "\nHugePageFiller: Since the start of the execution, %zu subreleases (%zu" - " pages) were skipped due to recent (%llds) peaks.\n", - total_skipped().count, total_skipped().pages.raw_num(), - static_cast(absl::ToInt64Seconds(last_peak_interval_))); - - Length skipped_pages = total_skipped().pages - pending_skipped().pages; - double correctly_skipped_pages_percentage = - safe_div(100.0 * correctly_skipped().pages, skipped_pages); - - size_t skipped_count = total_skipped().count - pending_skipped().count; - double correctly_skipped_count_percentage = - safe_div(100.0 * correctly_skipped().count, skipped_count); - - out->printf( - "HugePageFiller: %.4f%% of decisions confirmed correct, %zu " - "pending (%.4f%% of pages, %zu pending).\n", - correctly_skipped_count_percentage, pending_skipped().count, - correctly_skipped_pages_percentage, pending_skipped().pages.raw_num()); - - // Print subrelease stats - Length total_subreleased; - HugeLength total_broken = NHugePages(0); - tracker_.Iter( - [&](size_t offset, int64_t ts, const FillerStatsEntry& e) { - total_subreleased += e.num_pages_subreleased; - total_broken += e.num_hugepages_broken; - }, - tracker_.kSkipEmptyEntries); - out->printf( - "HugePageFiller: Subrelease stats last %d min: total " - "%zu pages subreleased, %zu hugepages broken\n", - static_cast(absl::ToInt64Minutes(window_)), - total_subreleased.raw_num(), total_broken.raw_num()); -} - -template -void FillerStatsTracker::PrintInPbtxt(PbtxtRegion* hpaa) const { - { - auto skip_subrelease = hpaa->CreateSubRegion("filler_skipped_subrelease"); - skip_subrelease.PrintI64("skipped_subrelease_interval_ms", - absl::ToInt64Milliseconds(last_peak_interval_)); - skip_subrelease.PrintI64("skipped_subrelease_pages", - total_skipped().pages.raw_num()); - skip_subrelease.PrintI64("correctly_skipped_subrelease_pages", - correctly_skipped().pages.raw_num()); - skip_subrelease.PrintI64("pending_skipped_subrelease_pages", - pending_skipped().pages.raw_num()); - skip_subrelease.PrintI64("skipped_subrelease_count", total_skipped().count); - skip_subrelease.PrintI64("correctly_skipped_subrelease_count", - correctly_skipped().count); - skip_subrelease.PrintI64("pending_skipped_subrelease_count", - pending_skipped().count); - } - - auto filler_stats = hpaa->CreateSubRegion("filler_stats_timeseries"); - filler_stats.PrintI64("window_ms", absl::ToInt64Milliseconds(epoch_length_)); - filler_stats.PrintI64("epochs", kEpochs); - - NumberOfFreePages free_pages = min_free_pages(summary_interval_); - filler_stats.PrintI64("min_free_pages_interval_ms", - absl::ToInt64Milliseconds(summary_interval_)); - filler_stats.PrintI64("min_free_pages", free_pages.free.raw_num()); - filler_stats.PrintI64("min_free_backed_pages", - free_pages.free_backed.raw_num()); - - static const char* labels[kNumStatsTypes] = { - "at_minimum_demand", "at_maximum_demand", "at_minimum_huge_pages", - "at_maximum_huge_pages"}; - - tracker_.Iter( - [&](size_t offset, int64_t ts, const FillerStatsEntry& e) { - auto region = filler_stats.CreateSubRegion("measurements"); - region.PrintI64("epoch", offset); - region.PrintI64("timestamp_ms", - absl::ToInt64Milliseconds(absl::Nanoseconds(ts))); - region.PrintI64("min_free_pages", e.min_free_pages.raw_num()); - region.PrintI64("min_free_backed_pages", - e.min_free_backed_pages.raw_num()); - region.PrintI64("num_pages_subreleased", - e.num_pages_subreleased.raw_num()); - region.PrintI64("num_hugepages_broken", - e.num_hugepages_broken.raw_num()); - for (int i = 0; i < kNumStatsTypes; i++) { - auto m = region.CreateSubRegion(labels[i]); - FillerStats stats = e.stats[i]; - m.PrintI64("num_pages", stats.num_pages.raw_num()); - m.PrintI64("regular_huge_pages", - stats.huge_pages[kRegular].raw_num()); - m.PrintI64("donated_huge_pages", - stats.huge_pages[kDonated].raw_num()); - m.PrintI64("partial_released_huge_pages", - stats.huge_pages[kPartialReleased].raw_num()); - m.PrintI64("released_huge_pages", - stats.huge_pages[kReleased].raw_num()); - m.PrintI64("used_pages_in_subreleased_huge_pages", - stats.used_pages_in_subreleased_huge_pages.raw_num()); - } - }, - tracker_.kSkipEmptyEntries); -} - // PageTracker keeps track of the allocation status of every page in a HugePage. // It allows allocation and deallocation of a contiguous run of pages. // // Its mutating methods are annotated as requiring the pageheap_lock, in order // to support unlocking the page heap lock in a dynamic annotation-friendly way. -template -class PageTracker : public TList>::Elem { +class PageTracker : public TList::Elem { public: - static void UnbackImpl(void* p, size_t size) { Unback(p, size); } - - constexpr PageTracker(HugePage p, uint64_t when) + PageTracker(HugePage p, bool was_donated, uint64_t now) : location_(p), released_count_(0), + abandoned_count_(0), donated_(false), + was_donated_(was_donated), + was_released_(false), + abandoned_(false), unbroken_(true), + alloctime_(now), free_{} { - init_when(when); - #ifndef __ppc64__ #if defined(__GNUC__) #pragma GCC diagnostic push @@ -653,29 +83,21 @@ class PageTracker : public TList>::Elem { // On PPC64, kHugePageSize / kPageSize is typically ~2K (16MB / 8KB), // requiring 512 bytes for representing free_. While its cache line size is // larger, the entirety of free_ will not fit on two cache lines. - static_assert( - offsetof(PageTracker, location_) + sizeof(location_) <= - 2 * ABSL_CACHELINE_SIZE, - "location_ should fall within the first two cachelines of " - "PageTracker."); - static_assert(offsetof(PageTracker, when_numerator_) + - sizeof(when_numerator_) <= - 2 * ABSL_CACHELINE_SIZE, - "when_numerator_ should fall within the first two cachelines " - "of PageTracker."); - static_assert(offsetof(PageTracker, when_denominator_) + - sizeof(when_denominator_) <= + static_assert(offsetof(PageTracker, location_) + sizeof(location_) <= 2 * ABSL_CACHELINE_SIZE, - "when_denominator_ should fall within the first two " - "cachelines of PageTracker."); + "location_ should fall within the first two cachelines of " + "PageTracker."); static_assert( - offsetof(PageTracker, donated_) + sizeof(donated_) <= + offsetof(PageTracker, donated_) + sizeof(donated_) <= 2 * ABSL_CACHELINE_SIZE, "donated_ should fall within the first two cachelines of PageTracker."); static_assert( - offsetof(PageTracker, free_) + sizeof(free_) <= - 2 * ABSL_CACHELINE_SIZE, + offsetof(PageTracker, free_) + sizeof(free_) <= 2 * ABSL_CACHELINE_SIZE, "free_ should fall within the first two cachelines of PageTracker."); + static_assert(offsetof(PageTracker, alloctime_) + sizeof(alloctime_) <= + 2 * ABSL_CACHELINE_SIZE, + "alloctime_ should fall within the first two cachelines of " + "PageTracker."); #if defined(__GNUC__) #pragma GCC diagnostic pop #endif @@ -693,8 +115,8 @@ class PageTracker : public TList>::Elem { // [i, i+n) in previously_unbacked. PageAllocation Get(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - // REQUIRES: p was the result of a previous call to Get(n) - void Put(PageId p, Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + // REQUIRES: r was the result of a previous call to Get(n) + void Put(Range r) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Returns true if any unused pages have been returned-to-system. bool released() const { return released_count_ > 0; } @@ -703,16 +125,41 @@ class PageTracker : public TList>::Elem { // Only up-to-date when the tracker is on a TrackerList in the Filler; // otherwise the value is meaningless. bool donated() const { return donated_; } + // Set/reset the donated flag. The donated status is lost, for instance, // when further allocations are made on the tracker. void set_donated(bool status) { donated_ = status; } + // Tracks whether the page was given to the filler in the donated state. It + // is not cleared by the filler, allowing the HugePageAwareAllocator to track + // memory persistently donated to the filler. + bool was_donated() const { return was_donated_; } + + bool was_released() const { return was_released_; } + void set_was_released(bool status) { was_released_ = status; } + + // Tracks whether the page, previously donated to the filler, was abondoned. + // When a large allocation is deallocated but the huge page is not + // reassembled, the pages are abondoned to the filler for future allocations. + bool abandoned() const { return abandoned_; } + void set_abandoned(bool status) { abandoned_ = status; } + // Tracks how many pages were provided when the originating allocation of a + // donated page was deallocated but other allocations were in use. + // + // Requires was_donated(). + Length abandoned_count() const { return Length(abandoned_count_); } + void set_abandoned_count(Length count) { + TC_ASSERT(was_donated_); + abandoned_count_ = count.raw_num(); + } + // These statistics help us measure the fragmentation of a hugepage and // the desirability of allocating from this hugepage. Length longest_free_range() const { return Length(free_.longest_free()); } size_t nallocs() const { return free_.allocs(); } Length used_pages() const { return Length(free_.used()); } Length released_pages() const { return Length(released_count_); } + double alloctime() const { return alloctime_; } Length free_pages() const; bool empty() const; @@ -723,56 +170,32 @@ class PageTracker : public TList>::Elem { // Return all unused pages to the system, mark future frees to do same. // Returns the count of pages unbacked. - Length ReleaseFree() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - - // Return this allocation to the system, if policy warrants it. - // - // As of 3/2020 our policy is to rerelease: Once we break a hugepage by - // returning a fraction of it, we return *anything* unused. This simplifies - // tracking. - // - // TODO(b/141550014): Make retaining the default/sole policy. - void MaybeRelease(PageId p, Length n) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - if (released_count_ == 0) { - return; - } - - // Mark pages as released. - Length index = p - location_.first_page(); - ASSERT(released_by_page_.CountBits(index.raw_num(), n.raw_num()) == 0); - released_by_page_.SetRange(index.raw_num(), n.raw_num()); - released_count_ += n.raw_num(); - ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == - released_count_); - - // TODO(b/122551676): If release fails, we should not SetRange above. - ReleasePagesWithoutLock(p, n); - } + Length ReleaseFree(MemoryModifyFunction& unback) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large, - PageAgeHistograms* ages) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; + bool HasDenseSpans() const { return has_dense_spans_; } + void SetHasDenseSpans() { has_dense_spans_ = true; } private: - void init_when(uint64_t w) { - const Length before = Length(free_.total_free()); - when_numerator_ = w * before.raw_num(); - when_denominator_ = before.raw_num(); - } - HugePage location_; - // We keep track of an average time weighted by Length::raw_num. In order to - // avoid doing division on fast path, store the numerator and denominator and - // only do the division when we need the average. - uint64_t when_numerator_; - uint64_t when_denominator_; // Cached value of released_by_page_.CountBits(0, kPagesPerHugePages) // // TODO(b/151663108): Logically, this is guarded by pageheap_lock. uint16_t released_count_; + uint16_t abandoned_count_; bool donated_; + bool was_donated_; + bool was_released_; + // Tracks whether we accounted for the abandoned state of the page. When a + // large allocation is deallocated but the huge page can not be reassembled, + // we measure the number of pages abandoned to the filler. To make sure that + // we do not double-count any future deallocations, we maintain a state and + // reset it once we measure those pages in abandoned_count_. + bool abandoned_; bool unbroken_; + double alloctime_; RangeTracker free_; // Bitmap of pages based on them being released to the OS. @@ -792,38 +215,44 @@ class PageTracker : public TList>::Elem { std::numeric_limits::max(), "nallocs must be able to support kPagesPerHugePage!"); - void ReleasePages(PageId p, Length n) { - void* ptr = p.start_addr(); - size_t byte_len = n.in_bytes(); - Unback(ptr, byte_len); - unbroken_ = false; - } - - void ReleasePagesWithoutLock(PageId p, Length n) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - pageheap_lock.Unlock(); + bool has_dense_spans_ = false; - void* ptr = p.start_addr(); - size_t byte_len = n.in_bytes(); - Unback(ptr, byte_len); - - pageheap_lock.Lock(); - unbroken_ = false; + [[nodiscard]] bool ReleasePages(Range r, MemoryModifyFunction& unback) { + bool success = unback(r); + if (ABSL_PREDICT_TRUE(success)) { + unbroken_ = false; + } + return success; } }; -enum class FillerPartialRerelease : bool { - // Once we break a hugepage by returning a fraction of it, we return - // *anything* unused. This simplifies tracking. - // - // As of 2/2020, this is the default behavior. - Return, - // When releasing a page onto an already-released huge page, retain the page - // rather than releasing it back to the OS. This can reduce minor page - // faults for hot pages. - // - // TODO(b/141550014, b/122551676): Make this the default behavior. - Retain, +// Records number of hugepages in different types of allocs. +// +// We use an additional element in the array to record the total sum of pages +// in kSparse and kDense allocs. +struct HugePageFillerStats { + // Number of hugepages in fully-released alloc. + HugeLength n_fully_released[AccessDensityPrediction::kPredictionCounts + 1]; + // Number of hugepages in partially-released alloc. + HugeLength n_partial_released[AccessDensityPrediction::kPredictionCounts + 1]; + // Total hugepages that are either in fully- or partially-released allocs. + HugeLength n_released[AccessDensityPrediction::kPredictionCounts + 1]; + // Total hugepages in the filler of a particular object count. + HugeLength n_total[AccessDensityPrediction::kPredictionCounts + 1]; + // Total hugepages that have been fully allocated. + HugeLength n_full[AccessDensityPrediction::kPredictionCounts + 1]; + // Number of hugepages in partially allocated (but not released) allocs. + HugeLength n_partial[AccessDensityPrediction::kPredictionCounts + 1]; +}; + +enum class HugePageFillerDenseTrackerType : bool { + // Hugepages sorted on longest free range and chunk index. This is currently + // the default. + kLongestFreeRangeAndChunks, + // Hugepages sorted only on number of spans allocated. As we allocate + // single-page many-object spans, we do not sort hugepages on longest free + // range when this configuration is used. + kSpansAllocated, }; // This tracks a set of unfilled hugepages, and fulfills allocations @@ -832,14 +261,21 @@ enum class FillerPartialRerelease : bool { template class HugePageFiller { public: - explicit HugePageFiller(FillerPartialRerelease partial_rerelease); - HugePageFiller(FillerPartialRerelease partial_rerelease, Clock clock); + explicit HugePageFiller( + HugePageFillerDenseTrackerType dense_tracker_type, + MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND, + MemoryModifyFunction& unback_without_lock ABSL_ATTRIBUTE_LIFETIME_BOUND); + HugePageFiller(Clock clock, HugePageFillerDenseTrackerType dense_tracker_type, + MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND, + MemoryModifyFunction& unback_without_lock + ABSL_ATTRIBUTE_LIFETIME_BOUND); typedef TrackerType Tracker; struct TryGetResult { TrackerType* pt; PageId page; + bool from_released; }; // Our API is simple, but note that it does not include an unconditional @@ -847,36 +283,77 @@ class HugePageFiller { // needed. This simplifies using it in a few different contexts (and improves // the testing story - no dependencies.) // + // n is the number of TCMalloc pages to be allocated. num_objects is the + // number of individual objects that would be allocated on these n pages. + // // On failure, returns nullptr/PageId{0}. - TryGetResult TryGet(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + TryGetResult TryGet(Length n, SpanAllocInfo span_alloc_info) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - // Marks [p, p + n) as usable by new allocations into *pt; returns pt - // if that hugepage is now empty (nullptr otherwise.) + // Marks r as usable by new allocations into *pt; returns pt if that hugepage + // is now empty (nullptr otherwise.) + // // REQUIRES: pt is owned by this object (has been Contribute()), and - // {pt, p, n} was the result of a previous TryGet. - TrackerType* Put(TrackerType* pt, PageId p, Length n) + // {pt, Range{p, n}} was the result of a previous TryGet. + TrackerType* Put(TrackerType* pt, Range r) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Contributes a tracker to the filler. If "donated," then the tracker is // marked as having come from the tail of a multi-hugepage allocation, which // causes it to be treated slightly differently. - void Contribute(TrackerType* pt, bool donated); + void Contribute(TrackerType* pt, bool donated, SpanAllocInfo span_alloc_info); HugeLength size() const { return size_; } // Useful statistics - Length pages_allocated() const { return allocated_; } - Length used_pages() const { return allocated_; } + Length pages_allocated(AccessDensityPrediction type) const { + TC_ASSERT_LT(type, AccessDensityPrediction::kPredictionCounts); + return pages_allocated_[type]; + } + Length pages_allocated() const { + return pages_allocated_[AccessDensityPrediction::kSparse] + + pages_allocated_[AccessDensityPrediction::kDense]; + } + Length used_pages() const { return pages_allocated(); } Length unmapped_pages() const { return unmapped_; } Length free_pages() const; - Length used_pages_in_released() const { return n_used_released_; } + Length used_pages_in_released() const { + TC_ASSERT_LE(n_used_released_[AccessDensityPrediction::kSparse], + regular_alloc_released_[AccessDensityPrediction::kSparse] + .size() + .in_pages()); + TC_ASSERT_LE(n_used_released_[AccessDensityPrediction::kDense], + regular_alloc_released_[AccessDensityPrediction::kDense] + .size() + .in_pages()); + return n_used_released_[AccessDensityPrediction::kDense] + + n_used_released_[AccessDensityPrediction::kSparse]; + } Length used_pages_in_partial_released() const { - return n_used_partial_released_; + TC_ASSERT_LE( + n_used_partial_released_[AccessDensityPrediction::kSparse], + regular_alloc_partial_released_[AccessDensityPrediction::kSparse] + .size() + .in_pages()); + TC_ASSERT_LE( + n_used_partial_released_[AccessDensityPrediction::kDense], + regular_alloc_partial_released_[AccessDensityPrediction::kDense] + .size() + .in_pages()); + return n_used_partial_released_[AccessDensityPrediction::kDense] + + n_used_partial_released_[AccessDensityPrediction::kSparse]; } Length used_pages_in_any_subreleased() const { - return n_used_released_ + n_used_partial_released_; + return used_pages_in_released() + used_pages_in_partial_released(); } + HugeLength previously_released_huge_pages() const { + return n_was_released_[AccessDensityPrediction::kDense] + + n_was_released_[AccessDensityPrediction::kSparse]; + } + + Length FreePagesInPartialAllocs() const; + // Fraction of used pages that are on non-released hugepages and // thus could be backed by kernel hugepages. (Of course, we can't // guarantee that the kernel had available 2-mib regions of physical @@ -885,148 +362,119 @@ class HugePageFiller { double hugepage_frac() const; // Returns the amount of memory to release if all remaining options of - // releasing memory involve subreleasing pages. + // releasing memory involve subreleasing pages. Provided intervals are used + // for making skip subrelease decisions. Length GetDesiredSubreleasePages(Length desired, Length total_released, - absl::Duration peak_interval) + SkipSubreleaseIntervals intervals) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Tries to release desired pages by iteratively releasing from the emptiest - // possible hugepage and releasing its free memory to the system. Return the - // number of pages actually released. - Length ReleasePages(Length desired, - absl::Duration skip_subrelease_after_peaks_interval, - bool hit_limit) + // possible hugepage and releasing its free memory to the system. If + // release_partial_alloc_pages is enabled, it also releases all the free + // pages from the partial allocs. Note that the number of pages released may + // be greater than the desired number of pages. + // Returns the number of pages actually released. The releasing target can be + // reduced by skip subrelease which is disabled if all intervals are zero. + static constexpr double kPartialAllocPagesRelease = 0.1; + Length ReleasePages(Length desired, SkipSubreleaseIntervals intervals, + bool release_partial_alloc_pages, bool hit_limit) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + // Number of candidate hugepages selected in each iteration for releasing + // their free memory. + static constexpr size_t kCandidatesForReleasingMemory = + kPagesPerHugePage.raw_num(); - void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large, - PageAgeHistograms* ages) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; BackingStats stats() const; SubreleaseStats subrelease_stats() const { return subrelease_stats_; } - void Print(Printer* out, bool everything) const; - void PrintInPbtxt(PbtxtRegion* hpaa) const; - private: - typedef TList TrackerList; + HugePageFillerStats GetStats() const; + void Print(Printer& out, bool everything); + void PrintInPbtxt(PbtxtRegion& hpaa) const; + + template + void ForEachHugePage(const F& func) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + private: // This class wraps an array of N TrackerLists and a Bitmap storing which // elements are non-empty. template - class HintedTrackerLists { + class PageTrackerLists : public HintedTrackerLists { public: - HintedTrackerLists() : nonempty_{}, size_(NHugePages(0)) {} - - // Removes a TrackerType from the first non-empty freelist with index at - // least n and returns it. Returns nullptr if there is none. - TrackerType* GetLeast(const size_t n) { - ASSERT(n < N); - size_t i = nonempty_.FindSet(n); - if (i == N) { - return nullptr; - } - ASSERT(!lists_[i].empty()); - TrackerType* pt = lists_[i].first(); - if (lists_[i].remove(pt)) { - nonempty_.ClearBit(i); - } - --size_; - return pt; + HugeLength size() const { + return NHugePages(HintedTrackerLists::size()); } - void Add(TrackerType* pt, const size_t i) { - ASSERT(i < N); - ASSERT(pt != nullptr); - lists_[i].prepend(pt); - nonempty_.SetBit(i); - ++size_; - } - void Remove(TrackerType* pt, const size_t i) { - ASSERT(i < N); - ASSERT(pt != nullptr); - if (lists_[i].remove(pt)) { - nonempty_.ClearBit(i); - } - --size_; - } - const TrackerList& operator[](const size_t n) const { - ASSERT(n < N); - return lists_[n]; - } - HugeLength size() const { return size_; } - bool empty() const { return size().raw_num() == 0; } - // Runs a functor on all HugePages in the TrackerLists. - // This method is const but the Functor gets passed a non-const pointer. - // This quirk is inherited from TrackerList. - template - void Iter(const Functor& func, size_t start) const { - size_t i = nonempty_.FindSet(start); - while (i < N) { - auto& list = lists_[i]; - ASSERT(!list.empty()); - for (TrackerType* pt : list) { - func(pt); - } - i++; - if (i < N) i = nonempty_.FindSet(i); - } - } - - private: - TrackerList lists_[N]; - Bitmap nonempty_; - HugeLength size_; }; SubreleaseStats subrelease_stats_; // We group hugepages first by longest-free (as a measure of fragmentation), - // then into 8 chunks inside there by desirability of allocation. + // then into kChunks chunks inside there by desirability of + // allocation. static constexpr size_t kChunks = 8; // Which chunk should this hugepage be in? - // This returns the largest possible value kChunks-1 iff pt has a single - // allocation. - size_t IndexFor(TrackerType* pt); + // This returns the largest possible value kChunks - 1 iff + // pt has a single allocation. + size_t IndexFor(TrackerType* pt) const; // Returns index for regular_alloc_. - static size_t ListFor(Length longest, size_t chunk); + size_t ListFor(Length longest, size_t chunk, AccessDensityPrediction density, + size_t nallocs) const; static constexpr size_t kNumLists = kPagesPerHugePage.raw_num() * kChunks; - HintedTrackerLists regular_alloc_; - HintedTrackerLists donated_alloc_; + // List of hugepages from which no pages have been released to the OS. + PageTrackerLists + regular_alloc_[AccessDensityPrediction::kPredictionCounts]; + PageTrackerLists donated_alloc_; // Partially released ones that we are trying to release. // - // When FillerPartialRerelease == Return: - // regular_alloc_partial_released_ is empty and n_used_partial_released_ is - // 0. - // - // When FillerPartialRerelease == Retain: - // regular_alloc_partial_released_ contains huge pages that are partially - // allocated, partially free, and partially returned to the OS. - // n_used_partial_released_ is the number of pages which have been allocated - // of the set. + // regular_alloc_partial_released_ contains huge pages that are partially + // allocated, partially free, and partially returned to the OS. // // regular_alloc_released_: This list contains huge pages whose pages are // either allocated or returned to the OS. There are no pages that are free, - // but not returned to the OS. n_used_released_ contains the number of - // pages in those huge pages that are not free (i.e., allocated). - Length n_used_partial_released_; - Length n_used_released_; - HintedTrackerLists regular_alloc_partial_released_; - HintedTrackerLists regular_alloc_released_; - - // RemoveFromFillerList pt from the appropriate HintedTrackerList. + // but not returned to the OS. + PageTrackerLists regular_alloc_partial_released_ + [AccessDensityPrediction::kPredictionCounts]; + PageTrackerLists + regular_alloc_released_[AccessDensityPrediction::kPredictionCounts]; + // n_used_released_ contains the number of pages in huge pages that are not + // free (i.e., allocated). Only the hugepages in regular_alloc_released_ are + // considered. + Length n_used_released_[AccessDensityPrediction::kPredictionCounts]; + + HugeLength n_was_released_[AccessDensityPrediction::kPredictionCounts]; + // n_used_partial_released_ is the number of pages which have been allocated + // from the hugepages in the set regular_alloc_partial_released. + Length n_used_partial_released_[AccessDensityPrediction::kPredictionCounts]; + const HugePageFillerDenseTrackerType dense_tracker_type_; + + // RemoveFromFillerList pt from the appropriate PageTrackerList. void RemoveFromFillerList(TrackerType* pt); - // Put pt in the appropriate HintedTrackerList. + // Put pt in the appropriate PageTrackerList. void AddToFillerList(TrackerType* pt); // Like AddToFillerList(), but for use when donating from the tail of a // multi-hugepage allocation. void DonateToFillerList(TrackerType* pt); + void PrintAllocStatsInPbtxt(absl::string_view field, PbtxtRegion& hpaa, + const HugePageFillerStats& stats, + AccessDensityPrediction count) const; // CompareForSubrelease identifies the worse candidate for subrelease, between // the choice of huge pages a and b. - static bool CompareForSubrelease(TrackerType* a, TrackerType* b) { - ASSERT(a != nullptr); - ASSERT(b != nullptr); - - return a->used_pages() < b->used_pages(); + static bool CompareForSubrelease(const TrackerType* a, const TrackerType* b) { + TC_ASSERT_NE(a, nullptr); + TC_ASSERT_NE(b, nullptr); + + if (a->used_pages() < b->used_pages()) return true; + if (a->used_pages() > b->used_pages()) return false; + // If 'a' has dense spans, then we do not prefer to release from 'a' + // compared to 'b'. + if (a->HasDenseSpans()) return false; + // We know 'a' does not have dense spans. If 'b' has dense spans, then we + // prefer to release from 'a'. Otherwise, we do not prefer either. + return b->HasDenseSpans(); } // SelectCandidates identifies the candidates.size() best candidates in the @@ -1037,38 +485,37 @@ class HugePageFiller { template static int SelectCandidates(absl::Span candidates, int current_candidates, - const HintedTrackerLists& tracker_list, + const PageTrackerLists& tracker_list, size_t tracker_start); // Release desired pages from the page trackers in candidates. Returns the // number of pages released. - Length ReleaseCandidates(absl::Span candidates, Length desired) + Length ReleaseCandidates(absl::Span candidates, Length target) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); HugeLength size_; - Length allocated_; + Length pages_allocated_[AccessDensityPrediction::kPredictionCounts]; Length unmapped_; // How much have we eagerly unmapped (in already released hugepages), but // not reported to ReleasePages calls? Length unmapping_unaccounted_; - FillerPartialRerelease partial_rerelease_; - // Functionality related to time series tracking. void UpdateFillerStatsTracker(); - using StatsTrackerType = FillerStatsTracker<600>; + using StatsTrackerType = SubreleaseStatsTracker<600>; StatsTrackerType fillerstats_tracker_; + Clock clock_; + // TODO(b/73749855): Remove remaining uses of unback_. + MemoryModifyFunction& unback_; + MemoryModifyFunction& unback_without_lock_; }; -template -inline typename PageTracker::PageAllocation PageTracker::Get( - Length n) { +inline typename PageTracker::PageAllocation PageTracker::Get(Length n) { size_t index = free_.FindAndMark(n.raw_num()); - ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == - released_count_); + TC_ASSERT_EQ(released_by_page_.CountBits(), released_count_); size_t unbacked = 0; // If release_count_ == 0, CountBits will return 0 and ClearRange will be a @@ -1079,27 +526,21 @@ inline typename PageTracker::PageAllocation PageTracker::Get( if (ABSL_PREDICT_FALSE(released_count_ > 0)) { unbacked = released_by_page_.CountBits(index, n.raw_num()); released_by_page_.ClearRange(index, n.raw_num()); - ASSERT(released_count_ >= unbacked); + TC_ASSERT_GE(released_count_, unbacked); released_count_ -= unbacked; } - ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == - released_count_); + TC_ASSERT_EQ(released_by_page_.CountBits(), released_count_); return PageAllocation{location_.first_page() + Length(index), Length(unbacked)}; } -template -inline void PageTracker::Put(PageId p, Length n) { - Length index = p - location_.first_page(); - free_.Unmark(index.raw_num(), n.raw_num()); - - when_numerator_ += n.raw_num() * absl::base_internal::CycleClock::Now(); - when_denominator_ += n.raw_num(); +inline void PageTracker::Put(Range r) { + Length index = r.p - location_.first_page(); + free_.Unmark(index.raw_num(), r.n.raw_num()); } -template -inline Length PageTracker::ReleaseFree() { +inline Length PageTracker::ReleaseFree(MemoryModifyFunction& unback) { size_t count = 0; size_t index = 0; size_t n; @@ -1124,16 +565,16 @@ inline Length PageTracker::ReleaseFree() { // In debug builds, verify [free_index, end) is backed. size_t length = end - free_index; - ASSERT(released_by_page_.CountBits(free_index, length) == 0); - // Mark pages as released. Amortize the update to release_count_. - released_by_page_.SetRange(free_index, length); - + TC_ASSERT_EQ(released_by_page_.CountBits(free_index, length), 0); PageId p = location_.first_page() + Length(free_index); - // TODO(b/122551676): If release fails, we should not SetRange above. - ReleasePages(p, Length(length)); + + if (ABSL_PREDICT_TRUE(ReleasePages(Range(p, Length(length)), unback))) { + // Mark pages as released. Amortize the update to release_count_. + released_by_page_.SetRange(free_index, length); + count += length; + } index = end; - count += length; } else { // [index, index+n) did not have an overlapping range in free_, move to // the next backed range of pages. @@ -1142,21 +583,15 @@ inline Length PageTracker::ReleaseFree() { } released_count_ += count; - ASSERT(Length(released_count_) <= kPagesPerHugePage); - ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == - released_count_); - init_when(absl::base_internal::CycleClock::Now()); + TC_ASSERT_LE(Length(released_count_), kPagesPerHugePage); + TC_ASSERT_EQ(released_by_page_.CountBits(), released_count_); return Length(count); } -template -inline void PageTracker::AddSpanStats(SmallSpanStats* small, - LargeSpanStats* large, - PageAgeHistograms* ages) const { +inline void PageTracker::AddSpanStats(SmallSpanStats* small, + LargeSpanStats* large) const { size_t index = 0, n; - uint64_t w = when_denominator_ == 0 ? when_numerator_ - : when_numerator_ / when_denominator_; while (free_.NextFreeRange(index, &index, &n)) { bool is_released = released_by_page_.GetBit(index); // Find the last bit in the run with the same state (set or cleared) as @@ -1169,7 +604,7 @@ inline void PageTracker::AddSpanStats(SmallSpanStats* small, : released_by_page_.FindSet(index + 1); } n = std::min(end - index, n); - ASSERT(n > 0); + TC_ASSERT_GT(n, 0); if (n < kMaxPages.raw_num()) { if (small != nullptr) { @@ -1190,43 +625,44 @@ inline void PageTracker::AddSpanStats(SmallSpanStats* small, } } - if (ages) { - ages->RecordRange(Length(n), is_released, w); - } index += n; } } -template -inline bool PageTracker::empty() const { - return free_.used() == 0; -} +inline bool PageTracker::empty() const { return free_.used() == 0; } -template -inline Length PageTracker::free_pages() const { +inline Length PageTracker::free_pages() const { return kPagesPerHugePage - used_pages(); } template inline HugePageFiller::HugePageFiller( - FillerPartialRerelease partial_rerelease) - : HugePageFiller( - partial_rerelease, - Clock{.now = absl::base_internal::CycleClock::Now, - .freq = absl::base_internal::CycleClock::Frequency}) {} + HugePageFillerDenseTrackerType dense_tracker_type, + MemoryModifyFunction& unback, MemoryModifyFunction& unback_without_lock) + : HugePageFiller(Clock{.now = absl::base_internal::CycleClock::Now, + .freq = absl::base_internal::CycleClock::Frequency}, + dense_tracker_type, unback, unback_without_lock) {} // For testing with mock clock template inline HugePageFiller::HugePageFiller( - FillerPartialRerelease partial_rerelease, Clock clock) - : size_(NHugePages(0)), - partial_rerelease_(partial_rerelease), - fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {} + Clock clock, HugePageFillerDenseTrackerType dense_tracker_type, + MemoryModifyFunction& unback, MemoryModifyFunction& unback_without_lock) + : dense_tracker_type_(dense_tracker_type), + size_(NHugePages(0)), + fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)), + clock_(clock), + unback_(unback), + unback_without_lock_(unback_without_lock) {} template inline typename HugePageFiller::TryGetResult -HugePageFiller::TryGet(Length n) { - ASSERT(n > Length(0)); +HugePageFiller::TryGet(Length n, SpanAllocInfo span_alloc_info) { + TC_ASSERT_GT(n, Length(0)); + TC_ASSERT(dense_tracker_type_ == + HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks || + span_alloc_info.density == AccessDensityPrediction::kSparse || + n == Length(1)); // How do we choose which hugepage to allocate from (among those with // a free range of at least n?) Our goal is to be as space-efficient @@ -1240,7 +676,7 @@ HugePageFiller::TryGet(Length n) { // return them to the OS.) // // In practice, avoiding fragmentation is by far more important: - // space usage can explode if we don't jealously guard large free ranges. + // space usage can explode if we don't zealously guard large free ranges. // // Our primary measure of fragmentation of a hugepage by a proxy measure: the // longest free range it contains. If this is short, any free space is @@ -1280,7 +716,7 @@ HugePageFiller::TryGet(Length n) { // store each group in a TrackerList. All freshly-donated groups are stored // in a "donated" array and the groups with (possibly prior) small allocs are // stored in a "regular" array. Each of these arrays is encapsulated in a - // HintedTrackerLists object, which stores the array together with a bitmap to + // PageTrackerLists object, which stores the array together with a bitmap to // quickly find non-empty lists. The lists are ordered to satisfy the // following two useful properties: // @@ -1290,116 +726,120 @@ HugePageFiller::TryGet(Length n) { // for allocation. // // So all we have to do is find the first nonempty freelist in the regular - // HintedTrackerList that *could* support our allocation, and it will be our - // best choice. If there is none we repeat with the donated HintedTrackerList. + // PageTrackerList that *could* support our allocation, and it will be our + // best choice. If there is none we repeat with the donated PageTrackerList. ASSUME(n < kPagesPerHugePage); TrackerType* pt; bool was_released = false; + const AccessDensityPrediction type = span_alloc_info.density; do { - pt = regular_alloc_.GetLeast(ListFor(n, 0)); - if (pt) { - ASSERT(!pt->donated()); - break; - } - pt = donated_alloc_.GetLeast(n.raw_num()); + pt = regular_alloc_[type].GetLeast( + ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1)); if (pt) { + TC_ASSERT(!pt->donated()); break; } - if (partial_rerelease_ == FillerPartialRerelease::Retain) { - pt = regular_alloc_partial_released_.GetLeast(ListFor(n, 0)); + if (ABSL_PREDICT_TRUE(type == AccessDensityPrediction::kSparse)) { + pt = donated_alloc_.GetLeast(n.raw_num()); if (pt) { - ASSERT(!pt->donated()); - was_released = true; - ASSERT(n_used_partial_released_ >= pt->used_pages()); - n_used_partial_released_ -= pt->used_pages(); break; } } - pt = regular_alloc_released_.GetLeast(ListFor(n, 0)); + pt = regular_alloc_partial_released_[type].GetLeast( + ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1)); + if (pt) { + TC_ASSERT(!pt->donated()); + was_released = true; + TC_ASSERT_GE(n_used_partial_released_[type], pt->used_pages()); + n_used_partial_released_[type] -= pt->used_pages(); + break; + } + pt = regular_alloc_released_[type].GetLeast( + ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1)); if (pt) { - ASSERT(!pt->donated()); + TC_ASSERT(!pt->donated()); was_released = true; - ASSERT(n_used_released_ >= pt->used_pages()); - n_used_released_ -= pt->used_pages(); + TC_ASSERT_GE(n_used_released_[type], pt->used_pages()); + n_used_released_[type] -= pt->used_pages(); break; } - return {nullptr, PageId{0}}; + return {nullptr, PageId{0}, false}; } while (false); ASSUME(pt != nullptr); - ASSERT(pt->longest_free_range() >= n); + TC_ASSERT_GE(pt->longest_free_range(), n); + // type == AccessDensityPrediction::kDense => pt->HasDenseSpans(). This + // also verifies we do not end up with a donated pt on the kDense path. + TC_ASSERT(type == AccessDensityPrediction::kSparse || pt->HasDenseSpans()); const auto page_allocation = pt->Get(n); AddToFillerList(pt); - allocated_ += n; + pages_allocated_[type] += n; - ASSERT(was_released || page_allocation.previously_unbacked == Length(0)); - (void)was_released; - ASSERT(unmapped_ >= page_allocation.previously_unbacked); + // If it was in a released state earlier, and is about to be full again, + // record that the state has been toggled back and update the stat counter. + if (was_released && !pt->released() && !pt->was_released()) { + pt->set_was_released(/*status=*/true); + ++n_was_released_[type]; + } + TC_ASSERT(was_released || page_allocation.previously_unbacked == Length(0)); + TC_ASSERT_GE(unmapped_, page_allocation.previously_unbacked); unmapped_ -= page_allocation.previously_unbacked; // We're being used for an allocation, so we are no longer considered // donated by this point. - ASSERT(!pt->donated()); + TC_ASSERT(!pt->donated()); UpdateFillerStatsTracker(); - return {pt, page_allocation.page}; + return {pt, page_allocation.page, was_released}; } -// Marks [p, p + n) as usable by new allocations into *pt; returns pt -// if that hugepage is now empty (nullptr otherwise.) -// REQUIRES: pt is owned by this object (has been Contribute()), and -// {pt, p, n} was the result of a previous TryGet. +// Marks r as usable by new allocations into *pt; returns pt if that hugepage is +// now empty (nullptr otherwise.) +// +// REQUIRES: pt is owned by this object (has been Contribute()), and {pt, +// Range(p, n)} was the result of a previous TryGet. template -inline TrackerType* HugePageFiller::Put(TrackerType* pt, PageId p, - Length n) { - // Consider releasing [p, p+n). We do this here: - // * To unback the memory before we mark it as free. When partially - // unbacking, we release the pageheap_lock. Another thread could see the - // "free" memory and begin using it before we retake the lock. - // * To maintain maintain the invariant that - // pt->released() => regular_alloc_released_.size() > 0 || - // regular_alloc_partial_released_.size() > 0 - // We do this before removing pt from our lists, since another thread may - // encounter our post-RemoveFromFillerList() update to - // regular_alloc_released_.size() and regular_alloc_partial_released_.size() - // while encountering pt. - if (partial_rerelease_ == FillerPartialRerelease::Return) { - pt->MaybeRelease(p, n); - } - +inline TrackerType* HugePageFiller::Put(TrackerType* pt, Range r) { RemoveFromFillerList(pt); - - pt->Put(p, n); - - allocated_ -= n; - if (partial_rerelease_ == FillerPartialRerelease::Return && pt->released()) { - unmapped_ += n; - unmapping_unaccounted_ += n; + pt->Put(r); + if (pt->HasDenseSpans()) { + TC_ASSERT_GE(pages_allocated_[AccessDensityPrediction::kDense], r.n); + pages_allocated_[AccessDensityPrediction::kDense] -= r.n; + } else { + TC_ASSERT_GE(pages_allocated_[AccessDensityPrediction::kSparse], r.n); + pages_allocated_[AccessDensityPrediction::kSparse] -= r.n; } if (pt->longest_free_range() == kPagesPerHugePage) { + TC_ASSERT_EQ(pt->nallocs(), 0); --size_; if (pt->released()) { const Length free_pages = pt->free_pages(); const Length released_pages = pt->released_pages(); - ASSERT(free_pages >= released_pages); - ASSERT(unmapped_ >= released_pages); + TC_ASSERT_GE(free_pages, released_pages); + TC_ASSERT_GE(unmapped_, released_pages); unmapped_ -= released_pages; if (free_pages > released_pages) { - // We should only see a difference between free pages and released pages - // when we retain returned pages. - ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain); - // pt is partially released. As the rest of the hugepage-aware // allocator works in terms of whole hugepages, we need to release the // rest of the hugepage. This simplifies subsequent accounting by // allowing us to work with hugepage-granularity, rather than needing to // retain pt's state indefinitely. - pageheap_lock.Unlock(); - TrackerType::UnbackImpl(pt->location().start_addr(), kHugePageSize); - pageheap_lock.Lock(); + bool success = + unback_without_lock_(HugeRange(pt->location(), NHugePages(1))); + + if (ABSL_PREDICT_TRUE(success)) { + unmapping_unaccounted_ += free_pages - released_pages; + } + } + } - unmapping_unaccounted_ += free_pages - released_pages; + if (pt->was_released()) { + pt->set_was_released(/*status=*/false); + if (pt->HasDenseSpans()) { + --n_was_released_[AccessDensityPrediction::kDense]; + } else { + --n_was_released_[AccessDensityPrediction::kSparse]; } } @@ -1412,17 +852,25 @@ inline TrackerType* HugePageFiller::Put(TrackerType* pt, PageId p, } template -inline void HugePageFiller::Contribute(TrackerType* pt, - bool donated) { +inline void HugePageFiller::Contribute( + TrackerType* pt, bool donated, SpanAllocInfo span_alloc_info) { // A contributed huge page should not yet be subreleased. - ASSERT(pt->released_pages() == Length(0)); + TC_ASSERT_EQ(pt->released_pages(), Length(0)); + + const AccessDensityPrediction type = span_alloc_info.density; - allocated_ += pt->used_pages(); + pages_allocated_[type] += pt->used_pages(); + TC_ASSERT(!(type == AccessDensityPrediction::kDense && donated)); if (donated) { + TC_ASSERT(pt->was_donated()); DonateToFillerList(pt); } else { + if (type == AccessDensityPrediction::kDense) { + pt->SetHasDenseSpans(); + } AddToFillerList(pt); } + ++size_; UpdateFillerStatsTracker(); } @@ -1431,15 +879,18 @@ template template inline int HugePageFiller::SelectCandidates( absl::Span candidates, int current_candidates, - const HintedTrackerLists& tracker_list, size_t tracker_start) { - auto PushCandidate = [&](TrackerType* pt) { + const PageTrackerLists& tracker_list, size_t tracker_start) { + auto PushCandidate = [&](TrackerType& pt) GOOGLE_MALLOC_SECTION { + TC_ASSERT_GT(pt.free_pages(), Length(0)); + TC_ASSERT_GT(pt.free_pages(), pt.released_pages()); + // If we have few candidates, we can avoid creating a heap. // // In ReleaseCandidates(), we unconditionally sort the list and linearly // iterate through it--rather than pop_heap repeatedly--so we only need the // heap for creating a bounded-size priority queue. if (current_candidates < candidates.size()) { - candidates[current_candidates] = pt; + candidates[current_candidates] = &pt; current_candidates++; if (current_candidates == candidates.size()) { @@ -1450,14 +901,14 @@ inline int HugePageFiller::SelectCandidates( } // Consider popping the worst candidate from our list. - if (CompareForSubrelease(candidates[0], pt)) { + if (CompareForSubrelease(candidates[0], &pt)) { // pt is worse than the current worst. return; } std::pop_heap(candidates.begin(), candidates.begin() + current_candidates, CompareForSubrelease); - candidates[current_candidates - 1] = pt; + candidates[current_candidates - 1] = &pt; std::push_heap(candidates.begin(), candidates.begin() + current_candidates, CompareForSubrelease); }; @@ -1479,11 +930,18 @@ inline Length HugePageFiller::ReleaseCandidates( #endif for (int i = 0; i < candidates.size() && total_released < target; i++) { TrackerType* best = candidates[i]; - ASSERT(best != nullptr); + TC_ASSERT_NE(best, nullptr); + + // Verify that we have pages that we can release. + TC_ASSERT_NE(best->free_pages(), Length(0)); + // TODO(b/73749855): This assertion may need to be relaxed if we release + // the pageheap_lock here. A candidate could change state with another + // thread while we have the lock released for another candidate. + TC_ASSERT_GT(best->free_pages(), best->released_pages()); #ifndef NDEBUG // Double check that our sorting criteria were applied correctly. - ASSERT(last <= best->used_pages()); + TC_ASSERT_LE(last, best->used_pages()); last = best->used_pages(); #endif @@ -1491,11 +949,22 @@ inline Length HugePageFiller::ReleaseCandidates( ++total_broken; } RemoveFromFillerList(best); - Length ret = best->ReleaseFree(); + Length ret = best->ReleaseFree(unback_); unmapped_ += ret; - ASSERT(unmapped_ >= best->released_pages()); + TC_ASSERT_GE(unmapped_, best->released_pages()); total_released += ret; AddToFillerList(best); + // If the candidate we just released from previously had was_released set, + // clear it. was_released is tracked only for pages that aren't in + // released state. + if (best->was_released() && best->released()) { + best->set_was_released(/*status=*/false); + if (best->HasDenseSpans()) { + --n_was_released_[AccessDensityPrediction::kDense]; + } else { + --n_was_released_[AccessDensityPrediction::kSparse]; + } + } } subrelease_stats_.num_pages_subreleased += total_released; @@ -1510,43 +979,70 @@ inline Length HugePageFiller::ReleaseCandidates( return total_released; } +template +inline Length HugePageFiller::FreePagesInPartialAllocs() const { + return regular_alloc_partial_released_[AccessDensityPrediction::kSparse] + .size() + .in_pages() + + regular_alloc_partial_released_[AccessDensityPrediction::kDense] + .size() + .in_pages() + + regular_alloc_released_[AccessDensityPrediction::kSparse] + .size() + .in_pages() + + regular_alloc_released_[AccessDensityPrediction::kDense] + .size() + .in_pages() - + used_pages_in_any_subreleased() - unmapped_pages(); +} + template inline Length HugePageFiller::GetDesiredSubreleasePages( - Length desired, Length total_released, absl::Duration peak_interval) { - // Don't subrelease pages if it wouldn't push you under the latest peak. - // This is a bit subtle: We want the current *mapped* pages not to be below - // the recent *demand* peak, i.e., if we have a large amount of free memory - // right now but demand is below a recent peak, we still want to subrelease. - ASSERT(total_released < desired); - - if (peak_interval == absl::ZeroDuration()) { + Length desired, Length total_released, SkipSubreleaseIntervals intervals) { + // Don't subrelease pages if it would push you under the sum of short-term + // demand fluctuation peak and long-term demand trend. This is a bit subtle: + // We want the current *mapped* pages not to be below the recent *demand* + // requirement, i.e., if we have a large amount of free memory right now but + // demand is below the requirement, we still want to subrelease. + TC_ASSERT_LT(total_released, desired); + if (!intervals.SkipSubreleaseEnabled()) { return desired; } - UpdateFillerStatsTracker(); - Length demand_at_peak = - fillerstats_tracker_.GetRecentPeak(peak_interval).num_pages; + Length required_pages; + required_pages = fillerstats_tracker_.GetRecentDemand( + intervals.short_interval, intervals.long_interval); + Length current_pages = used_pages() + free_pages(); - if (demand_at_peak != Length(0)) { + if (required_pages != Length(0)) { Length new_desired; - if (demand_at_peak >= current_pages) { + if (required_pages >= current_pages) { new_desired = total_released; } else { - new_desired = total_released + (current_pages - demand_at_peak); + new_desired = total_released + (current_pages - required_pages); } if (new_desired >= desired) { return desired; } - - // Report the amount of memory that we didn't release due to this - // mechanism, but never more than free_pages, since we would not have - // been able to release that much memory with or without this mechanism - // (i.e., reporting more would be confusing). - Length skipped_pages = std::min(free_pages(), (desired - new_desired)); + // Remaining target amount to release after applying skip subrelease. Note: + // the remaining target should always be smaller or equal to the number of + // free pages according to the mechanism (recent peak is always larger or + // equal to current used_pages), however, we still calculate allowed release + // using the minimum of the two to avoid relying on that assumption. + Length releasable_pages = + std::min(free_pages(), (new_desired - total_released)); + // Reports the amount of memory that we didn't release due to this + // mechanism, but never more than skipped free pages. In other words, + // skipped_pages is zero if all free pages are allowed to be released by + // this mechanism. Note, only free pages in the smaller of the two + // (current_pages and required_pages) are skipped, the rest are allowed to + // be subreleased. + Length skipped_pages = + std::min((free_pages() - releasable_pages), (desired - new_desired)); fillerstats_tracker_.ReportSkippedSubreleasePages( - skipped_pages, current_pages, peak_interval); + skipped_pages, std::min(current_pages, required_pages)); return new_desired; } @@ -1554,14 +1050,31 @@ inline Length HugePageFiller::GetDesiredSubreleasePages( } // Tries to release desired pages by iteratively releasing from the emptiest -// possible hugepage and releasing its free memory to the system. Return the +// possible hugepage and releasing its free memory to the system. Return the // number of pages actually released. template inline Length HugePageFiller::ReleasePages( - Length desired, absl::Duration skip_subrelease_after_peaks_interval, - bool hit_limit) { + Length desired, SkipSubreleaseIntervals intervals, + bool release_partial_alloc_pages, bool hit_limit) { Length total_released; + // If the feature to release all free pages in partially-released allocs is + // enabled, we increase the desired number of pages below to the total number + // of releasable pages in partially-released allocs. We disable this feature + // for cases when hit_limit is set to true (i.e. when memory limit is hit). + const bool release_all_from_partial_allocs = + release_partial_alloc_pages && !hit_limit; + if (ABSL_PREDICT_FALSE(release_all_from_partial_allocs)) { + // If we have fewer than desired number of free pages in partial allocs, we + // would try to release pages from full allocs as well (after we include + // unaccounted unmapped pages and release from partial allocs). Else, we aim + // to release up to the total number of free pages in partially-released + // allocs. + size_t from_partial_allocs = + kPartialAllocPagesRelease * FreePagesInPartialAllocs().raw_num(); + desired = std::max(desired, Length(from_partial_allocs)); + } + // We also do eager release, once we've called this at least once: // claim credit for anything that gets done. if (unmapping_unaccounted_.raw_num() > 0) { @@ -1570,17 +1083,19 @@ inline Length HugePageFiller::ReleasePages( Length n = unmapping_unaccounted_; unmapping_unaccounted_ = Length(0); subrelease_stats_.num_pages_subreleased += n; - - if (n >= desired) { - return n; - } - total_released += n; } - if (skip_subrelease_after_peaks_interval != absl::ZeroDuration()) { - desired = GetDesiredSubreleasePages(desired, total_released, - skip_subrelease_after_peaks_interval); + if (total_released >= desired) { + return total_released; + } + + // Only reduce desired if skip subrelease is on. + // + // Additionally, if we hit the limit, we should not be applying skip + // subrelease. OOM may be imminent. + if (intervals.SkipSubreleaseEnabled() && !hit_limit) { + desired = GetDesiredSubreleasePages(desired, total_released, intervals); if (desired <= total_released) { return total_released; } @@ -1591,37 +1106,51 @@ inline Length HugePageFiller::ReleasePages( // Optimize for releasing up to a huge page worth of small pages (scattered // over many parts of the filler). Since we hold pageheap_lock, we cannot // allocate here. - constexpr size_t kCandidates = kPagesPerHugePage.raw_num(); - using CandidateArray = std::array; - - if (partial_rerelease_ == FillerPartialRerelease::Retain) { - while (total_released < desired) { - CandidateArray candidates; - // We can skip the first kChunks lists as they are known to be 100% full. - // (Those lists are likely to be long.) - // - // We do not examine the regular_alloc_released_ lists, as only contain - // completely released pages. - int n_candidates = - SelectCandidates(absl::MakeSpan(candidates), 0, - regular_alloc_partial_released_, kChunks); - - Length released = - ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates), - desired - total_released); - if (released == Length(0)) { - break; - } - total_released += released; + using CandidateArray = + std::array; + + while (total_released < desired) { + CandidateArray candidates; + // We can skip the first kChunks lists as they are known + // to be 100% full. (Those lists are likely to be long.) + // + // We do not examine the regular_alloc_released_ lists, as only contain + // completely released pages. + int n_candidates = SelectCandidates( + absl::MakeSpan(candidates), 0, + regular_alloc_partial_released_[AccessDensityPrediction::kSparse], + kChunks); + n_candidates = SelectCandidates( + absl::MakeSpan(candidates), n_candidates, + regular_alloc_partial_released_[AccessDensityPrediction::kDense], + kChunks); + + Length released = + ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates), + desired - total_released); + subrelease_stats_.num_partial_alloc_pages_subreleased += released; + if (released == Length(0)) { + break; } + total_released += released; } // Only consider breaking up a hugepage if there are no partially released // pages. while (total_released < desired) { CandidateArray candidates; - int n_candidates = SelectCandidates(absl::MakeSpan(candidates), 0, - regular_alloc_, kChunks); + // TODO(b/199203282): revisit the order in which allocs are searched for + // release candidates. + // + // We select candidate hugepages from few_objects_alloc_ first as we expect + // hugepages in this alloc to become free earlier than those in other + // allocs. + int n_candidates = SelectCandidates( + absl::MakeSpan(candidates), /*current_candidates=*/0, + regular_alloc_[AccessDensityPrediction::kSparse], kChunks); + n_candidates = SelectCandidates( + absl::MakeSpan(candidates), n_candidates, + regular_alloc_[AccessDensityPrediction::kDense], kChunks); // TODO(b/138864853): Perhaps remove donated_alloc_ from here, it's not a // great candidate for partial release. n_candidates = SelectCandidates(absl::MakeSpan(candidates), n_candidates, @@ -1641,22 +1170,17 @@ inline Length HugePageFiller::ReleasePages( template inline void HugePageFiller::AddSpanStats( - SmallSpanStats* small, LargeSpanStats* large, - PageAgeHistograms* ages) const { - auto loop = [&](const TrackerType* pt) { - pt->AddSpanStats(small, large, ages); - }; - // We can skip the first kChunks lists as they are known to be 100% full. - regular_alloc_.Iter(loop, kChunks); + SmallSpanStats* small, LargeSpanStats* large) const { + auto loop = [&](const TrackerType& pt) { pt.AddSpanStats(small, large); }; + // We can skip the first kChunks lists as they are known to be + // 100% full. donated_alloc_.Iter(loop, 0); - - if (partial_rerelease_ == FillerPartialRerelease::Retain) { - regular_alloc_partial_released_.Iter(loop, 0); - } else { - ASSERT(regular_alloc_partial_released_.empty()); - ASSERT(n_used_partial_released_ == Length(0)); + for (const AccessDensityPrediction type : + {AccessDensityPrediction::kDense, AccessDensityPrediction::kSparse}) { + regular_alloc_[type].Iter(loop, kChunks); + regular_alloc_partial_released_[type].Iter(loop, 0); + regular_alloc_released_[type].Iter(loop, 0); } - regular_alloc_released_.Iter(loop, 0); } template @@ -1675,114 +1199,280 @@ namespace huge_page_filler_internal { // (mostly) even buckets in the middle. class UsageInfo { public: - enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes }; + enum Type { + kSparseRegular, + kDenseRegular, + kDonated, + kSparsePartialReleased, + kDensePartialReleased, + kSparseReleased, + kDenseReleased, + kNumTypes + }; UsageInfo() { size_t i; - for (i = 0; i <= 4 && i < kPagesPerHugePage.raw_num(); ++i) { + for (i = 0; i <= kBucketsAtBounds && i < kPagesPerHugePage.raw_num(); ++i) { bucket_bounds_[buckets_size_] = i; buckets_size_++; } - if (i < kPagesPerHugePage.raw_num() - 4) { + // Histograms should have kBucketsAtBounds buckets at the start and at the + // end. Additionally kPagesPerHugePage - kBucketsAtBounds must not + // underflow. Hence the assert below. + static_assert(kPagesPerHugePage.raw_num() >= kBucketsAtBounds); + if (i < kPagesPerHugePage.raw_num() - kBucketsAtBounds) { // Because kPagesPerHugePage is a power of two, it must be at least 16 - // to get inside this "if" - either i=5 and kPagesPerHugePage=8 and - // the test fails, or kPagesPerHugePage <= 4 and the test fails. - ASSERT(kPagesPerHugePage >= Length(16)); + // to get inside this "if". The test fails if either (i=5 and + // kPagesPerHugePage=8), or kPagesPerHugePage <= kBucketsAtBounds. + TC_ASSERT_GE(kPagesPerHugePage, Length(16)); constexpr int step = kPagesPerHugePage.raw_num() / 16; // We want to move in "step"-sized increments, aligned every "step". // So first we have to round i up to the nearest step boundary. This // logic takes advantage of step being a power of two, so step-1 is // all ones in the low-order bits. i = ((i - 1) | (step - 1)) + 1; - for (; i < kPagesPerHugePage.raw_num() - 4; i += step) { + for (; i < kPagesPerHugePage.raw_num() - kBucketsAtBounds; i += step) { bucket_bounds_[buckets_size_] = i; buckets_size_++; } - i = kPagesPerHugePage.raw_num() - 4; + i = kPagesPerHugePage.raw_num() - kBucketsAtBounds; } for (; i < kPagesPerHugePage.raw_num(); ++i) { bucket_bounds_[buckets_size_] = i; buckets_size_++; } - CHECK_CONDITION(buckets_size_ <= kBucketCapacity); + + lifetime_bucket_bounds_[0] = 0; + lifetime_bucket_bounds_[1] = 1; + for (int i = 2; i <= kLifetimeBuckets; ++i) { + lifetime_bucket_bounds_[i] = lifetime_bucket_bounds_[i - 1] * 10; + } + TC_CHECK_LE(buckets_size_, kBucketCapacity); + } + + template + bool IsHugepageBacked(const TrackerType& tracker, PageFlags& pageflags) { + void* addr = tracker.location().start_addr(); + // TODO(b/28093874): Investigate if pageflags may be queried without + // pageheap_lock. + const bool is_hugepage_backed = pageflags.IsHugepageBacked(addr); + return is_hugepage_backed; + } + + // Reports the number of pages that were previously released, but later became + // full and are hugepage backed. + size_t HugepageBackedPreviouslyReleased() { + return hugepage_backed_previously_released_; } template - void Record(const TrackerType* pt, Type which) { - const Length free = kPagesPerHugePage - pt->used_pages(); - const Length lf = pt->longest_free_range(); - const size_t nalloc = pt->nallocs(); + void Record(const TrackerType& pt, PageFlags& pageflags, Type which, + double clock_now, double clock_frequency) { + TC_ASSERT_LT(which, kNumTypes); + const Length free = kPagesPerHugePage - pt.used_pages(); + const Length lf = pt.longest_free_range(); + const size_t nalloc = pt.nallocs(); // This is a little annoying as our buckets *have* to differ; // nalloc is in [1,256], free_pages and longest_free are in [0, 255]. free_page_histo_[which][BucketNum(free.raw_num())]++; longest_free_histo_[which][BucketNum(lf.raw_num())]++; nalloc_histo_[which][BucketNum(nalloc - 1)]++; + + const double elapsed = std::max(clock_now - pt.alloctime(), 0); + const absl::Duration lifetime = + absl::Milliseconds(elapsed * 1000 / clock_frequency); + ++lifetime_histo_[which][LifetimeBucketNum(lifetime)]; + + if (lifetime >= kLongLivedLifetime) { + ++long_lived_hps_histo_[which][BucketNum(nalloc - 1)]; + } + + if (free >= kLowOccupancyNumFreePages) { + ++low_occupancy_lifetime_histo_[which][LifetimeBucketNum(lifetime)]; + } + + if (which == kSparseRegular) { + nalloc_free_page_histo_[BucketNum(nalloc - 1)] + [BucketNum(free.raw_num())]++; + } + + if (IsHugepageBacked(pt, pageflags)) { + ++hugepage_backed_[which]; + if (pt.was_released()) { + ++hugepage_backed_previously_released_; + } + } + ++total_pages_[which]; } - void Print(Printer* out) { - PrintHisto(out, free_page_histo_[kRegular], - "# of regular hps with a<= # of free pages (i); + PrintHisto(out, free_page_histo_[type], type, + "hps with a<= # of free pages (i); + if (type == kDonated) continue; + PrintHisto(out, longest_free_histo_[type], type, + "hps with a<= longest free range (i); + if (type == kDonated) continue; + PrintHisto(out, nalloc_histo_[type], type, + "hps with a<= # of allocations (i); + PrintLifetimeHisto(out, lifetime_histo_[type], type, + "hps with lifetime a <= # hps < b"); + } + + out.printf( + "\nHugePageFiller: # of hps with >= %3zu free pages, with different " + "lifetimes.", + kLowOccupancyNumFreePages.raw_num()); + for (int i = 0; i < kNumTypes; ++i) { + const Type type = static_cast(i); + PrintLifetimeHisto(out, low_occupancy_lifetime_histo_[type], type, + "hps with lifetime a <= # hps < b"); + } + + out.printf("\nHugePageFiller: # of hps with lifetime >= %3zu ms.", + absl::ToInt64Milliseconds(kLongLivedLifetime)); + for (int i = 0; i < kNumTypes; ++i) { + const Type type = static_cast(i); + PrintHisto(out, long_lived_hps_histo_[type], type, + "hps with a <= # of allocations < b", 0); + } + + for (int i = 0; i < kNumTypes; ++i) { + const Type type = static_cast(i); + out.printf( + "\nHugePageFiller: %zu of %s pages hugepage backed out of %zu.", + hugepage_backed_[type], TypeToStr(type), total_pages_[type]); + } + out.printf("\n"); } - void Print(PbtxtRegion* hpaa) { - static constexpr absl::string_view kTrackerTypes[kNumTypes] = { - "REGULAR", "DONATED", "PARTIAL", "RELEASED"}; + void Print(PbtxtRegion& hpaa) { for (int i = 0; i < kNumTypes; ++i) { - PbtxtRegion scoped = hpaa->CreateSubRegion("filler_tracker"); - scoped.PrintRaw("type", kTrackerTypes[i]); - PrintHisto(&scoped, free_page_histo_[i], "free_pages_histogram", 0); - PrintHisto(&scoped, longest_free_histo_[i], - "longest_free_range_histogram", 0); - PrintHisto(&scoped, nalloc_histo_[i], "allocations_histogram", 1); + const Type type = static_cast(i); + PbtxtRegion scoped = hpaa.CreateSubRegion("filler_tracker"); + scoped.PrintRaw("type", AllocType(type)); + scoped.PrintRaw("objects", ObjectType(type)); + PrintHisto(scoped, free_page_histo_[i], "free_pages_histogram", 0); + PrintHisto(scoped, longest_free_histo_[i], "longest_free_range_histogram", + 0); + PrintHisto(scoped, nalloc_histo_[i], "allocations_histogram", 1); + PrintLifetimeHisto(scoped, lifetime_histo_[i], "lifetime_histogram"); + PrintLifetimeHisto(scoped, low_occupancy_lifetime_histo_[i], + "low_occupancy_lifetime_histogram"); + PrintHisto(scoped, long_lived_hps_histo_[i], + "long_lived_hugepages_histogram", 0); + if (type == kSparseRegular) { + for (int j = 0; j < buckets_size_; ++j) { + if (nalloc_histo_[i][j] == 0) continue; + PbtxtRegion twodhist = + hpaa.CreateSubRegion("allocations_free_pages_histogram"); + twodhist.PrintI64("lower_bound", bucket_bounds_[j] + 1); + twodhist.PrintI64("upper_bound", (j == buckets_size_ - 1 + ? bucket_bounds_[j] + : bucket_bounds_[j + 1] - 1) + + 1); + PrintHisto(twodhist, nalloc_free_page_histo_[j], "entry", 0); + } + } + scoped.PrintI64("total_pages", total_pages_[type]); + scoped.PrintI64("num_pages_hugepage_backed", hugepage_backed_[type]); } } private: - // Maximum of 4 buckets at the start and end, and 16 in the middle. - static constexpr size_t kBucketCapacity = 4 + 16 + 4; + // Maximum number of buckets at the start and end. + static constexpr size_t kBucketsAtBounds = 8; + static constexpr size_t kLifetimeBuckets = 8; + // Threshold for a page to be long-lived, as a lifetime in milliseconds, for + // telemetry purposes only. + static constexpr absl::Duration kLongLivedLifetime = + absl::Milliseconds(100000); + // Threshold for a hugepage considered to have a low occupancy, for logging + // lifetime telemetry only. + static constexpr Length kLowOccupancyNumFreePages = + Length(kPagesPerHugePage.raw_num() - (kPagesPerHugePage.raw_num() >> 3)); + // 16 buckets in the middle. + static constexpr size_t kBucketCapacity = + kBucketsAtBounds + 16 + kBucketsAtBounds; using Histo = size_t[kBucketCapacity]; + using LifetimeHisto = size_t[kLifetimeBuckets]; int BucketNum(size_t page) { auto it = std::upper_bound(bucket_bounds_, bucket_bounds_ + buckets_size_, page); - CHECK_CONDITION(it != bucket_bounds_); + TC_CHECK_NE(it, bucket_bounds_); return it - bucket_bounds_ - 1; } - void PrintHisto(Printer* out, Histo h, const char blurb[], size_t offset) { - out->printf("\nHugePageFiller: %s", blurb); + int LifetimeBucketNum(absl::Duration duration) { + int64_t duration_ms = absl::ToInt64Milliseconds(duration); + auto it = std::upper_bound(lifetime_bucket_bounds_, + lifetime_bucket_bounds_ + kLifetimeBuckets, + duration_ms); + TC_CHECK_NE(it, lifetime_bucket_bounds_); + return it - lifetime_bucket_bounds_ - 1; + } + + void PrintHisto(Printer& out, Histo h, Type type, absl::string_view blurb, + size_t offset) { + out.printf("\nHugePageFiller: # of %s %s", TypeToStr(type), blurb); for (size_t i = 0; i < buckets_size_; ++i) { if (i % 6 == 0) { - out->printf("\nHugePageFiller:"); + out.printf("\nHugePageFiller:"); + } + out.printf(" <%3zu<=%6zu", bucket_bounds_[i] + offset, h[i]); + } + out.printf("\n"); + } + + void PrintLifetimeHisto(Printer& out, Histo h, Type type, + absl::string_view blurb) { + out.printf("\nHugePageFiller: # of %s %s", TypeToStr(type), blurb); + for (size_t i = 0; i < kLifetimeBuckets; ++i) { + if (i % 6 == 0) { + out.printf("\nHugePageFiller:"); } - out->printf(" <%3zu<=%6zu", bucket_bounds_[i] + offset, h[i]); + out.printf(" < %3zu ms <= %6zu", lifetime_bucket_bounds_[i], h[i]); } - out->printf("\n"); + out.printf("\n"); } - void PrintHisto(PbtxtRegion* hpaa, Histo h, const char key[], size_t offset) { + void PrintHisto(PbtxtRegion& hpaa, Histo h, absl::string_view key, + size_t offset) { for (size_t i = 0; i < buckets_size_; ++i) { - auto hist = hpaa->CreateSubRegion(key); + if (h[i] == 0) continue; + auto hist = hpaa.CreateSubRegion(key); hist.PrintI64("lower_bound", bucket_bounds_[i] + offset); hist.PrintI64("upper_bound", (i == buckets_size_ - 1 ? bucket_bounds_[i] @@ -1792,68 +1482,225 @@ class UsageInfo { } } + void PrintLifetimeHisto(PbtxtRegion& hpaa, Histo h, absl::string_view key) { + for (size_t i = 0; i < kLifetimeBuckets; ++i) { + if (h[i] == 0) continue; + auto hist = hpaa.CreateSubRegion(key); + hist.PrintI64("lower_bound", lifetime_bucket_bounds_[i]); + hist.PrintI64("upper_bound", (i == kLifetimeBuckets - 1 + ? lifetime_bucket_bounds_[i] + : lifetime_bucket_bounds_[i + 1])); + hist.PrintI64("value", h[i]); + } + } + + absl::string_view TypeToStr(Type type) const { + TC_ASSERT_LT(type, kNumTypes); + switch (type) { + case kSparseRegular: + return "sparsely-accessed regular"; + case kDenseRegular: + return "densely-accessed regular"; + case kDonated: + return "donated"; + case kSparsePartialReleased: + return "sparsely-accessed partial released"; + case kDensePartialReleased: + return "densely-accessed partial released"; + case kSparseReleased: + return "sparsely-accessed released"; + case kDenseReleased: + return "densely-accessed released"; + default: + TC_BUG("bad type %v", type); + } + } + + absl::string_view AllocType(Type type) const { + TC_ASSERT_LT(type, kNumTypes); + switch (type) { + case kSparseRegular: + case kDenseRegular: + return "REGULAR"; + case kDonated: + return "DONATED"; + case kSparsePartialReleased: + case kDensePartialReleased: + return "PARTIAL"; + case kSparseReleased: + case kDenseReleased: + return "RELEASED"; + default: + TC_BUG("bad type %v", type); + } + } + + absl::string_view ObjectType(Type type) const { + TC_ASSERT_LT(type, kNumTypes); + switch (type) { + case kSparseRegular: + case kDonated: + case kSparsePartialReleased: + case kSparseReleased: + return "SPARSELY_ACCESSED"; + case kDenseRegular: + case kDensePartialReleased: + case kDenseReleased: + return "DENSELY_ACCESSED"; + default: + TC_BUG("bad type %v", type); + } + } + // Arrays, because they are split per alloc type. Histo free_page_histo_[kNumTypes]{}; Histo longest_free_histo_[kNumTypes]{}; Histo nalloc_histo_[kNumTypes]{}; + LifetimeHisto lifetime_histo_[kNumTypes]{}; + Histo long_lived_hps_histo_[kNumTypes]{}; + LifetimeHisto low_occupancy_lifetime_histo_[kNumTypes]{}; + // TODO(b/282993806): drop nalloc_free_page_histo_ after experiment is done. + // Two dimensional histogram. The outer histogram is indexed using the number + // of allocations. The nested histogram is indexed using the number of free + // pages. + // + // Unlike the histograms above, which have separate histograms for each type, + // the histogram below has data only for kSparseRegular hugepages. This is + // being done to reduce the amount of space required. + Histo nalloc_free_page_histo_[kBucketCapacity]{}; size_t bucket_bounds_[kBucketCapacity]; + size_t lifetime_bucket_bounds_[kBucketCapacity]; + size_t hugepage_backed_[kNumTypes] = {0}; + size_t total_pages_[kNumTypes] = {0}; + size_t hugepage_backed_previously_released_ = 0; int buckets_size_ = 0; }; } // namespace huge_page_filler_internal template -inline void HugePageFiller::Print(Printer* out, - bool everything) const { - out->printf("HugePageFiller: densely pack small requests into hugepages\n"); - - HugeLength nrel = - regular_alloc_released_.size() + regular_alloc_partial_released_.size(); - HugeLength nfull = NHugePages(0); - - // note kChunks, not kNumLists here--we're iterating *full* lists. +inline HugePageFillerStats HugePageFiller::GetStats() const { + HugePageFillerStats stats; + // Note kChunks, not kNumLists here--we're iterating *full* lists. for (size_t chunk = 0; chunk < kChunks; ++chunk) { - nfull += NHugePages( - regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length()); + stats.n_full[AccessDensityPrediction::kSparse] += NHugePages( + regular_alloc_[AccessDensityPrediction::kSparse] + [ListFor(/*longest=*/Length(0), chunk, + AccessDensityPrediction::kSparse, /*nallocs=*/0)] + .length()); + stats.n_full[AccessDensityPrediction::kDense] += + NHugePages(regular_alloc_[AccessDensityPrediction::kDense] + [ListFor(/*longest=*/Length(0), chunk, + AccessDensityPrediction::kDense, + kPagesPerHugePage.raw_num())] + .length()); } + stats.n_full[AccessDensityPrediction::kPredictionCounts] = + stats.n_full[AccessDensityPrediction::kSparse] + + stats.n_full[AccessDensityPrediction::kDense]; + + // We only use donated allocs for allocating sparse pages. + stats.n_total[AccessDensityPrediction::kSparse] = donated_alloc_.size(); + for (const AccessDensityPrediction count : + {AccessDensityPrediction::kSparse, AccessDensityPrediction::kDense}) { + stats.n_fully_released[count] = regular_alloc_released_[count].size(); + stats.n_partial_released[count] = + regular_alloc_partial_released_[count].size(); + stats.n_released[count] = + stats.n_fully_released[count] + stats.n_partial_released[count]; + stats.n_total[count] += + stats.n_released[count] + regular_alloc_[count].size(); + stats.n_partial[count] = + stats.n_total[count] - stats.n_released[count] - stats.n_full[count]; + } + + // Collect total stats that is the sum of both kSparse and kDense allocs. + stats.n_fully_released[AccessDensityPrediction::kPredictionCounts] = + stats.n_fully_released[AccessDensityPrediction::kSparse] + + stats.n_fully_released[AccessDensityPrediction::kDense]; + stats.n_partial_released[AccessDensityPrediction::kPredictionCounts] = + stats.n_partial_released[AccessDensityPrediction::kSparse] + + stats.n_partial_released[AccessDensityPrediction::kDense]; + stats.n_released[AccessDensityPrediction::kPredictionCounts] = + stats.n_released[AccessDensityPrediction::kSparse] + + stats.n_released[AccessDensityPrediction::kDense]; + + stats.n_total[AccessDensityPrediction::kPredictionCounts] = size(); + stats.n_partial[AccessDensityPrediction::kPredictionCounts] = + size() - stats.n_released[AccessDensityPrediction::kPredictionCounts] - + stats.n_full[AccessDensityPrediction::kPredictionCounts]; + return stats; +} + +template +inline void HugePageFiller::Print(Printer& out, bool everything) { + out.printf("HugePageFiller: densely pack small requests into hugepages\n"); + const HugePageFillerStats stats = GetStats(); + // A donated alloc full list is impossible because it would have never been // donated in the first place. (It's an even hugepage.) - ASSERT(donated_alloc_[0].empty()); + TC_ASSERT(donated_alloc_[0].empty()); // Evaluate a/b, avoiding division by zero const auto safe_div = [](Length a, Length b) { return b == Length(0) ? 0. : static_cast(a.raw_num()) / static_cast(b.raw_num()); }; - const HugeLength n_partial = size() - nrel - nfull; - const HugeLength n_nonfull = - n_partial + regular_alloc_partial_released_.size(); - out->printf( - "HugePageFiller: %zu total, %zu full, %zu partial, %zu released " + out.printf( + "HugePageFiller: Overall, %zu total, %zu full, %zu partial, %zu released " "(%zu partially), 0 quarantined\n", - size().raw_num(), nfull.raw_num(), n_partial.raw_num(), nrel.raw_num(), - regular_alloc_partial_released_.size().raw_num()); - out->printf("HugePageFiller: %zu pages free in %zu hugepages, %.4f free\n", - free_pages().raw_num(), size().raw_num(), - safe_div(free_pages(), size().in_pages())); + size().raw_num(), + stats.n_full[AccessDensityPrediction::kPredictionCounts].raw_num(), + stats.n_partial[AccessDensityPrediction::kPredictionCounts].raw_num(), + stats.n_released[AccessDensityPrediction::kPredictionCounts].raw_num(), + stats.n_partial_released[AccessDensityPrediction::kPredictionCounts] + .raw_num()); + + out.printf( + "HugePageFiller: those with sparsely-accessed spans, %zu total, " + "%zu full, %zu partial, %zu released (%zu partially), 0 quarantined\n", + stats.n_total[AccessDensityPrediction::kSparse].raw_num(), + stats.n_full[AccessDensityPrediction::kSparse].raw_num(), + stats.n_partial[AccessDensityPrediction::kSparse].raw_num(), + stats.n_released[AccessDensityPrediction::kSparse].raw_num(), + stats.n_partial_released[AccessDensityPrediction::kSparse].raw_num()); + + out.printf( + "HugePageFiller: those with densely-accessed spans, %zu total, " + "%zu full, %zu partial, %zu released (%zu partially), 0 quarantined\n", + stats.n_total[AccessDensityPrediction::kDense].raw_num(), + stats.n_full[AccessDensityPrediction::kDense].raw_num(), + stats.n_partial[AccessDensityPrediction::kDense].raw_num(), + stats.n_released[AccessDensityPrediction::kDense].raw_num(), + stats.n_partial_released[AccessDensityPrediction::kDense].raw_num()); + + out.printf("HugePageFiller: %zu pages free in %zu hugepages, %.4f free\n", + free_pages().raw_num(), size().raw_num(), + safe_div(free_pages(), size().in_pages())); - ASSERT(free_pages() <= n_nonfull.in_pages()); - out->printf("HugePageFiller: among non-fulls, %.4f free\n", - safe_div(free_pages(), n_nonfull.in_pages())); + const HugeLength n_nonfull = + stats.n_partial[AccessDensityPrediction::kPredictionCounts] + + stats.n_partial_released[AccessDensityPrediction::kPredictionCounts]; + TC_ASSERT_LE(free_pages(), n_nonfull.in_pages()); + out.printf("HugePageFiller: among non-fulls, %.4f free\n", + safe_div(free_pages(), n_nonfull.in_pages())); - out->printf( + out.printf( "HugePageFiller: %zu used pages in subreleased hugepages (%zu of them in " "partially released)\n", used_pages_in_any_subreleased().raw_num(), used_pages_in_partial_released().raw_num()); - out->printf( + out.printf( "HugePageFiller: %zu hugepages partially released, %.4f released\n", - nrel.raw_num(), safe_div(unmapped_pages(), nrel.in_pages())); - out->printf("HugePageFiller: %.4f of used pages hugepageable\n", - hugepage_frac()); + stats.n_released[AccessDensityPrediction::kPredictionCounts].raw_num(), + safe_div(unmapped_pages(), + stats.n_released[AccessDensityPrediction::kPredictionCounts] + .in_pages())); + out.printf("HugePageFiller: %.4f of used pages hugepageable\n", + hugepage_frac()); // Subrelease - out->printf( + out.printf( "HugePageFiller: Since startup, %zu pages subreleased, %zu hugepages " "broken, (%zu pages, %zu hugepages due to reaching tcmalloc limit)\n", subrelease_stats_.total_pages_subreleased.raw_num(), @@ -1866,130 +1713,225 @@ inline void HugePageFiller::Print(Printer* out, // Compute some histograms of fullness. using huge_page_filler_internal::UsageInfo; UsageInfo usage; - regular_alloc_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); + const double now = clock_.now(); + const double frequency = clock_.freq(); + PageFlags pageflags; donated_alloc_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0); - if (partial_rerelease_ == FillerPartialRerelease::Retain) { - regular_alloc_partial_released_.Iter( - [&](const TrackerType* pt) { - usage.Record(pt, UsageInfo::kPartialReleased); - }, - 0); - } else { - ASSERT(regular_alloc_partial_released_.empty()); - ASSERT(n_used_partial_released_.raw_num() == 0); - } - regular_alloc_released_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); }, + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDonated, now, frequency); + }, 0); + regular_alloc_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparseRegular, now, frequency); + }, + 0); + regular_alloc_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDenseRegular, now, frequency); + }, + 0); + regular_alloc_partial_released_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparsePartialReleased, now, + frequency); + }, + 0); + regular_alloc_partial_released_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDensePartialReleased, now, + frequency); + }, + 0); + regular_alloc_released_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparseReleased, now, frequency); + }, + 0); + regular_alloc_released_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDenseReleased, now, frequency); + }, + 0); + + out.printf( + "HugePageFiller: %zu hugepages became full after being previously " + "released, " + "out of which %zu pages are hugepage backed.\n", + previously_released_huge_pages().raw_num(), + usage.HugepageBackedPreviouslyReleased()); - out->printf("\n"); - out->printf("HugePageFiller: fullness histograms\n"); + out.printf("\n"); + out.printf("HugePageFiller: fullness histograms\n"); usage.Print(out); - out->printf("\n"); - fillerstats_tracker_.Print(out); + out.printf("\n"); + fillerstats_tracker_.Print(out, "HugePageFiller"); } template -inline void HugePageFiller::PrintInPbtxt(PbtxtRegion* hpaa) const { - HugeLength nrel = - regular_alloc_released_.size() + regular_alloc_partial_released_.size(); - HugeLength nfull = NHugePages(0); +inline void HugePageFiller::PrintAllocStatsInPbtxt( + absl::string_view field, PbtxtRegion& hpaa, + const HugePageFillerStats& stats, AccessDensityPrediction count) const { + TC_ASSERT_LT(count, AccessDensityPrediction::kPredictionCounts); + PbtxtRegion alloc_region = hpaa.CreateSubRegion(field); + alloc_region.PrintI64("full_huge_pages", stats.n_full[count].raw_num()); + alloc_region.PrintI64("partial_huge_pages", stats.n_partial[count].raw_num()); + alloc_region.PrintI64("released_huge_pages", + stats.n_released[count].raw_num()); + alloc_region.PrintI64("partially_released_huge_pages", + stats.n_partial_released[count].raw_num()); +} + +template +inline void HugePageFiller::PrintInPbtxt(PbtxtRegion& hpaa) const { + const HugePageFillerStats stats = GetStats(); - // note kChunks, not kNumLists here--we're iterating *full* lists. - for (size_t chunk = 0; chunk < kChunks; ++chunk) { - nfull += NHugePages( - regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length()); - } // A donated alloc full list is impossible because it would have never been // donated in the first place. (It's an even hugepage.) - ASSERT(donated_alloc_[0].empty()); + TC_ASSERT(donated_alloc_[0].empty()); // Evaluate a/b, avoiding division by zero const auto safe_div = [](Length a, Length b) { - return b == Length(0) ? 0 + return b == Length(0) ? 0. : static_cast(a.raw_num()) / static_cast(b.raw_num()); }; - const HugeLength n_partial = size() - nrel - nfull; - hpaa->PrintI64("filler_full_huge_pages", nfull.raw_num()); - hpaa->PrintI64("filler_partial_huge_pages", n_partial.raw_num()); - hpaa->PrintI64("filler_released_huge_pages", nrel.raw_num()); - hpaa->PrintI64("filler_partially_released_huge_pages", - regular_alloc_partial_released_.size().raw_num()); - hpaa->PrintI64("filler_free_pages", free_pages().raw_num()); - hpaa->PrintI64("filler_used_pages_in_subreleased", - used_pages_in_any_subreleased().raw_num()); - hpaa->PrintI64("filler_used_pages_in_partial_released", - used_pages_in_partial_released().raw_num()); - hpaa->PrintI64( + + hpaa.PrintI64( + "filler_full_huge_pages", + stats.n_full[AccessDensityPrediction::kPredictionCounts].raw_num()); + hpaa.PrintI64( + "filler_partial_huge_pages", + stats.n_partial[AccessDensityPrediction::kPredictionCounts].raw_num()); + hpaa.PrintI64( + "filler_released_huge_pages", + stats.n_released[AccessDensityPrediction::kPredictionCounts].raw_num()); + hpaa.PrintI64( + "filler_partially_released_huge_pages", + stats.n_partial_released[AccessDensityPrediction::kPredictionCounts] + .raw_num()); + + PrintAllocStatsInPbtxt("filler_sparsely_accessed_alloc_stats", hpaa, stats, + AccessDensityPrediction::kSparse); + PrintAllocStatsInPbtxt("filler_densely_accessed_alloc_stats", hpaa, stats, + AccessDensityPrediction::kDense); + + hpaa.PrintI64("filler_free_pages", free_pages().raw_num()); + hpaa.PrintI64("filler_used_pages_in_subreleased", + used_pages_in_any_subreleased().raw_num()); + hpaa.PrintI64("filler_used_pages_in_partial_released", + used_pages_in_partial_released().raw_num()); + hpaa.PrintI64( "filler_unmapped_bytes", - static_cast(nrel.raw_num() * - safe_div(unmapped_pages(), nrel.in_pages()))); - hpaa->PrintI64( + static_cast( + stats.n_released[AccessDensityPrediction::kPredictionCounts] + .raw_num() * + safe_div(unmapped_pages(), + stats.n_released[AccessDensityPrediction::kPredictionCounts] + .in_pages()))); + hpaa.PrintI64( "filler_hugepageable_used_bytes", - static_cast(hugepage_frac() * - static_cast(allocated_.in_bytes()))); - hpaa->PrintI64("filler_num_pages_subreleased", - subrelease_stats_.total_pages_subreleased.raw_num()); - hpaa->PrintI64("filler_num_hugepages_broken", - subrelease_stats_.total_hugepages_broken.raw_num()); - hpaa->PrintI64( + static_cast( + hugepage_frac() * + static_cast( + pages_allocated_[AccessDensityPrediction::kSparse].in_bytes() + + pages_allocated_[AccessDensityPrediction::kDense].in_bytes()))); + hpaa.PrintI64("filler_previously_released_huge_pages", + previously_released_huge_pages().raw_num()); + hpaa.PrintI64("filler_num_pages_subreleased", + subrelease_stats_.total_pages_subreleased.raw_num()); + hpaa.PrintI64("filler_num_hugepages_broken", + subrelease_stats_.total_hugepages_broken.raw_num()); + hpaa.PrintI64( "filler_num_pages_subreleased_due_to_limit", subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num()); - hpaa->PrintI64( + hpaa.PrintI64( "filler_num_hugepages_broken_due_to_limit", subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num()); // Compute some histograms of fullness. using huge_page_filler_internal::UsageInfo; UsageInfo usage; - regular_alloc_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); + const double now = clock_.now(); + const double frequency = clock_.freq(); + PageFlags pageflags; donated_alloc_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0); - if (partial_rerelease_ == FillerPartialRerelease::Retain) { - regular_alloc_partial_released_.Iter( - [&](const TrackerType* pt) { - usage.Record(pt, UsageInfo::kPartialReleased); - }, - 0); - } else { - ASSERT(regular_alloc_partial_released_.empty()); - ASSERT(n_used_partial_released_ == Length(0)); - } - regular_alloc_released_.Iter( - [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); }, + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDonated, now, frequency); + }, + 0); + regular_alloc_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparseRegular, now, frequency); + }, + 0); + regular_alloc_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDenseRegular, now, frequency); + }, + 0); + regular_alloc_partial_released_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparsePartialReleased, now, + frequency); + }, + 0); + regular_alloc_partial_released_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDensePartialReleased, now, + frequency); + }, + 0); + regular_alloc_released_[AccessDensityPrediction::kSparse].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kSparseReleased, now, frequency); + }, + 0); + regular_alloc_released_[AccessDensityPrediction::kDense].Iter( + [&](const TrackerType& pt) { + usage.Record(pt, pageflags, UsageInfo::kDenseReleased, now, frequency); + }, 0); + hpaa.PrintI64("filler_previously_released_backed_huge_pages", + usage.HugepageBackedPreviouslyReleased()); usage.Print(hpaa); - - fillerstats_tracker_.PrintInPbtxt(hpaa); + fillerstats_tracker_.PrintSubreleaseStatsInPbtxt(hpaa, + "filler_skipped_subrelease"); + fillerstats_tracker_.PrintTimeseriesStatsInPbtxt(hpaa, + "filler_stats_timeseries"); } template inline void HugePageFiller::UpdateFillerStatsTracker() { - StatsTrackerType::FillerStats stats; - stats.num_pages = allocated_; + StatsTrackerType::SubreleaseStats stats; + stats.num_pages = pages_allocated(); stats.free_pages = free_pages(); stats.unmapped_pages = unmapped_pages(); stats.used_pages_in_subreleased_huge_pages = - n_used_partial_released_ + n_used_released_; - stats.huge_pages[StatsTrackerType::kRegular] = regular_alloc_.size(); + n_used_released_[AccessDensityPrediction::kDense] + + n_used_released_[AccessDensityPrediction::kSparse] + + n_used_partial_released_[AccessDensityPrediction::kDense] + + n_used_partial_released_[AccessDensityPrediction::kSparse]; stats.huge_pages[StatsTrackerType::kDonated] = donated_alloc_.size(); - stats.huge_pages[StatsTrackerType::kPartialReleased] = - regular_alloc_partial_released_.size(); - stats.huge_pages[StatsTrackerType::kReleased] = - regular_alloc_released_.size(); + for (const AccessDensityPrediction type : + {AccessDensityPrediction::kDense, AccessDensityPrediction::kSparse}) { + stats.huge_pages[StatsTrackerType::kRegular] += regular_alloc_[type].size(); + stats.huge_pages[StatsTrackerType::kPartialReleased] += + regular_alloc_partial_released_[type].size(); + stats.huge_pages[StatsTrackerType::kReleased] += + regular_alloc_released_[type].size(); + } stats.num_pages_subreleased = subrelease_stats_.num_pages_subreleased; + stats.num_partial_alloc_pages_subreleased = + subrelease_stats_.num_partial_alloc_pages_subreleased; stats.num_hugepages_broken = subrelease_stats_.num_hugepages_broken; fillerstats_tracker_.Report(stats); subrelease_stats_.reset(); } template -inline size_t HugePageFiller::IndexFor(TrackerType* pt) { - ASSERT(!pt->empty()); +inline size_t HugePageFiller::IndexFor(TrackerType* pt) const { + TC_ASSERT(!pt->empty()); // Prefer to allocate from hugepages with many allocations already present; // spaced logarithmically. const size_t na = pt->nallocs(); @@ -1998,52 +1940,69 @@ inline size_t HugePageFiller::IndexFor(TrackerType* pt) { const size_t neg_ceil_log = __builtin_clzl(2 * na - 1); // We want the same spread as neg_ceil_log, but spread over [0, - // kChunks) (clamped at the left edge) instead of [0, 64). So subtract off - // the difference (computed by forcing na=1 to kChunks - 1.) + // kChunks) (clamped at the left edge) instead of [0, 64). So + // subtract off the difference (computed by forcing na=1 to + // kChunks - 1.) const size_t kOffset = __builtin_clzl(1) - (kChunks - 1); const size_t i = std::max(neg_ceil_log, kOffset) - kOffset; - ASSERT(i < kChunks); + TC_ASSERT_LT(i, kChunks); return i; } template -inline size_t HugePageFiller::ListFor(const Length longest, - const size_t chunk) { - ASSERT(chunk < kChunks); - ASSERT(longest < kPagesPerHugePage); - return longest.raw_num() * kChunks + chunk; +inline size_t HugePageFiller::ListFor( + const Length longest, const size_t chunk, + const AccessDensityPrediction density, size_t nallocs) const { + TC_ASSERT_LT(chunk, kChunks); + if (ABSL_PREDICT_TRUE( + density == AccessDensityPrediction::kSparse || + dense_tracker_type_ == + HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks)) { + TC_ASSERT_LT(longest, kPagesPerHugePage); + return longest.raw_num() * kChunks + chunk; + } + TC_ASSERT(density == AccessDensityPrediction::kDense); + TC_ASSERT(dense_tracker_type_ == + HugePageFillerDenseTrackerType::kSpansAllocated); + TC_ASSERT_LE(nallocs, kPagesPerHugePage.raw_num()); + // For the dense tracker with hugepages sorted on allocs, the hugepages are + // placed only in lists that are multiples of kChunks. The in-between lists + // are empty. + return (kPagesPerHugePage.raw_num() - nallocs) * kChunks + chunk; } template inline void HugePageFiller::RemoveFromFillerList(TrackerType* pt) { Length longest = pt->longest_free_range(); - ASSERT(longest < kPagesPerHugePage); + TC_ASSERT_LT(longest, kPagesPerHugePage); if (pt->donated()) { donated_alloc_.Remove(pt, longest.raw_num()); + return; + } + + const AccessDensityPrediction type = pt->HasDenseSpans() + ? AccessDensityPrediction::kDense + : AccessDensityPrediction::kSparse; + size_t i = ListFor(longest, IndexFor(pt), type, pt->nallocs()); + + if (!pt->released()) { + regular_alloc_[type].Remove(pt, i); + } else if (pt->free_pages() <= pt->released_pages()) { + regular_alloc_released_[type].Remove(pt, i); + TC_ASSERT_GE(n_used_released_[type], pt->used_pages()); + n_used_released_[type] -= pt->used_pages(); } else { - size_t chunk = IndexFor(pt); - size_t i = ListFor(longest, chunk); - if (!pt->released()) { - regular_alloc_.Remove(pt, i); - } else if (partial_rerelease_ == FillerPartialRerelease::Return || - pt->free_pages() <= pt->released_pages()) { - regular_alloc_released_.Remove(pt, i); - ASSERT(n_used_released_ >= pt->used_pages()); - n_used_released_ -= pt->used_pages(); - } else { - regular_alloc_partial_released_.Remove(pt, i); - ASSERT(n_used_partial_released_ >= pt->used_pages()); - n_used_partial_released_ -= pt->used_pages(); - } + regular_alloc_partial_released_[type].Remove(pt, i); + TC_ASSERT_GE(n_used_partial_released_[type], pt->used_pages()); + n_used_partial_released_[type] -= pt->used_pages(); } } template inline void HugePageFiller::AddToFillerList(TrackerType* pt) { - size_t chunk = IndexFor(pt); Length longest = pt->longest_free_range(); - ASSERT(longest < kPagesPerHugePage); + TC_ASSERT_LT(longest, kPagesPerHugePage); // Once a donated alloc is used in any way, it degenerates into being a // regular alloc. This allows the algorithm to keep using it (we had to be @@ -2051,27 +2010,29 @@ inline void HugePageFiller::AddToFillerList(TrackerType* pt) { // donated allocs. pt->set_donated(false); - size_t i = ListFor(longest, chunk); + const AccessDensityPrediction type = pt->HasDenseSpans() + ? AccessDensityPrediction::kDense + : AccessDensityPrediction::kSparse; + size_t i = ListFor(longest, IndexFor(pt), type, pt->nallocs()); + if (!pt->released()) { - regular_alloc_.Add(pt, i); - } else if (partial_rerelease_ == FillerPartialRerelease::Return || - pt->free_pages() == pt->released_pages()) { - regular_alloc_released_.Add(pt, i); - n_used_released_ += pt->used_pages(); + regular_alloc_[type].Add(pt, i); + } else if (pt->free_pages() <= pt->released_pages()) { + regular_alloc_released_[type].Add(pt, i); + n_used_released_[type] += pt->used_pages(); } else { - ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain); - regular_alloc_partial_released_.Add(pt, i); - n_used_partial_released_ += pt->used_pages(); + regular_alloc_partial_released_[type].Add(pt, i); + n_used_partial_released_[type] += pt->used_pages(); } } template inline void HugePageFiller::DonateToFillerList(TrackerType* pt) { Length longest = pt->longest_free_range(); - ASSERT(longest < kPagesPerHugePage); + TC_ASSERT_LT(longest, kPagesPerHugePage); // We should never be donating already-released trackers! - ASSERT(!pt->released()); + TC_ASSERT(!pt->released()); pt->set_donated(true); donated_alloc_.Add(pt, longest.raw_num()); @@ -2082,24 +2043,33 @@ inline double HugePageFiller::hugepage_frac() const { // How many of our used pages are on non-huge pages? Since // everything on a released hugepage is either used or released, // just the difference: - const Length nrel = regular_alloc_released_.size().in_pages(); const Length used = used_pages(); - const Length unmapped = unmapped_pages(); - ASSERT(n_used_partial_released_ <= - regular_alloc_partial_released_.size().in_pages()); - const Length used_on_rel = (nrel >= unmapped ? nrel - unmapped : Length(0)) + - n_used_partial_released_; - ASSERT(used >= used_on_rel); + const Length used_on_rel = used_pages_in_any_subreleased(); + TC_ASSERT_GE(used, used_on_rel); const Length used_on_huge = used - used_on_rel; const Length denom = used > Length(0) ? used : Length(1); const double ret = static_cast(used_on_huge.raw_num()) / denom.raw_num(); - ASSERT(ret >= 0); - ASSERT(ret <= 1); + TC_ASSERT_GE(ret, 0); + TC_ASSERT_LE(ret, 1); return std::clamp(ret, 0, 1); } +template +template +void HugePageFiller::ForEachHugePage(const F& func) { + donated_alloc_.Iter(func, 0); + regular_alloc_[AccessDensityPrediction::kSparse].Iter(func, 0); + regular_alloc_[AccessDensityPrediction::kDense].Iter(func, 0); + regular_alloc_partial_released_[AccessDensityPrediction::kSparse].Iter(func, + 0); + regular_alloc_partial_released_[AccessDensityPrediction::kDense].Iter(func, + 0); + regular_alloc_released_[AccessDensityPrediction::kSparse].Iter(func, 0); + regular_alloc_released_[AccessDensityPrediction::kDense].Iter(func, 0); +} + // Helper for stat functions. template inline Length HugePageFiller::free_pages() const { diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_fuzz.cc new file mode 100644 index 000000000000..9c72b140acba --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_fuzz.cc @@ -0,0 +1,450 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "fuzztest/fuzztest.h" +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_filler.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +// As we read the fuzzer input, we update these variables to control global +// state. +int64_t fake_clock = 0; +bool unback_success = true; + +int64_t mock_clock() { return fake_clock; } + +double freq() { return 1 << 10; } + +absl::flat_hash_set& ReleasedPages() { + static auto* set = new absl::flat_hash_set(); + return *set; +} + +class MockUnback final : public MemoryModifyFunction { + public: + [[nodiscard]] bool operator()(Range r) override { + if (!unback_success) { + return false; + } + + absl::flat_hash_set& released_set = ReleasedPages(); + + PageId end = r.p + r.n; + for (; r.p != end; ++r.p) { + released_set.insert(r.p); + } + + return true; + } +}; + +void FuzzFiller(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); + + // TODO(b/271282540): Strongly type these parameters from fuzztest. + constexpr int kInitBytes = 3; + if (size <= kInitBytes || size > 100000) { + // size <= kInitBytes for needing some entropy to initialize the filler + // with. + // + // size > 100000 for avoiding overly large inputs given we do extra + // checking. + return; + } + + // Reset global state. + MockUnback unback; + fake_clock = 0; + unback_success = true; + absl::flat_hash_set& released_set = ReleasedPages(); + released_set.clear(); + // To avoid reentrancy during unback, reserve space in released_set. + // We have at most size/5 allocations, for at most kPagesPerHugePage pages + // each, that we can track the released status of. + // + // TODO(b/73749855): Releasing the pageheap_lock during ReleaseFree will + // eliminate the need for this. + released_set.reserve(kPagesPerHugePage.raw_num() * size / 5); + + // We interpret data as a small DSL for exploring the state space of + // HugePageFiller. + // + // [0] - used for choosing dense tracker type. + // [1] - (available) + // [2] - (available) + // + // Afterwards, we read 5 bytes at a time until the buffer is exhausted. + // [i + 0] - Specifies an operation to perform on the filler (allocate, + // deallocate, release memory, gather stats, etc.) + // [i + 1, i + 4] - Specifies an integer. We use this as a source of + // deterministic entropy to allow inputs to be replayed. + // For example, this input can provide a Length to + // allocate, or the index of the previous allocation to + // deallocate. + const HugePageFillerDenseTrackerType dense_tracker_type = + static_cast(data[0]) >= 128 + ? HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks + : HugePageFillerDenseTrackerType::kSpansAllocated; + data += kInitBytes; + size -= kInitBytes; + + HugePageFiller filler(Clock{.now = mock_clock, .freq = freq}, + dense_tracker_type, unback, unback); + + std::vector trackers; + absl::flat_hash_map> allocs; + + // Running counter to allocate pseudo-random addresses + size_t next_hugepage = 1; + + for (size_t i = 0; i + 5 <= size; i += 5) { + const uint8_t op = data[i]; + uint32_t value; + memcpy(&value, &data[i + 1], sizeof(value)); + + switch (op & 0xF) { + case 0: { + // Allocate. We divide up our random value by: + // + // value[0:15] - We choose a Length to allocate. + // value[16:31] - We select num_to_objects. + Length n(std::clamp(value & 0xFFFF, 1, + kPagesPerHugePage.raw_num() - 1)); + AccessDensityPrediction density; + const uint32_t lval = (value >> 16); + // Choose many objects if the last bit is 1. + if (lval & 1) { + density = AccessDensityPrediction::kDense; + } else { + // We need to choose few objects, so only top four bits are used. + density = AccessDensityPrediction::kSparse; + } + size_t num_objects = std::max(lval >> 1, 1); + + // Truncate to single object for larger allocations. This ensures that + // we always allocate few-object spans from donations. + if (n > kPagesPerHugePage / 2) { + num_objects = 1; + density = AccessDensityPrediction::kSparse; + } + if (dense_tracker_type == + HugePageFillerDenseTrackerType::kSpansAllocated && + density == AccessDensityPrediction::kDense) { + n = Length(1); + } + + SpanAllocInfo alloc_info = {.objects_per_span = num_objects, + .density = density}; + absl::flat_hash_set& released_set = ReleasedPages(); + + CHECK_EQ(filler.size().raw_num(), trackers.size()); + CHECK_EQ(filler.unmapped_pages().raw_num(), released_set.size()); + + HugePageFiller::TryGetResult result; + { + PageHeapSpinLockHolder l; + result = filler.TryGet(n, alloc_info); + } + + if (result.pt == nullptr) { + // Failed to allocate. Create a new huge page. + // + // Donated pages do not necessarily have to have a particular size, + // since this may be (kPagesPerHugePage/2,kPagesPerHugePage) in size + // *or* the tail of an allocation >PagesPerHugePage. + // + // Since small objects are likely to be found, we model those tail + // donations separately. + const bool donated = n > kPagesPerHugePage / 2; + result.pt = new PageTracker(HugePage{.pn = next_hugepage}, donated, + fake_clock); + next_hugepage++; + { + PageHeapSpinLockHolder l; + + result.page = result.pt->Get(n).page; + filler.Contribute(result.pt, donated, alloc_info); + } + + trackers.push_back(result.pt); + } + + // We have now successfully allocated. Record the alloc and clear any + // released bits. + for (PageId p = result.page, end = p + n; p != end; ++p) { + released_set.erase(p); + } + + allocs[result.pt].push_back({result.page, n}); + + CHECK_EQ(filler.size().raw_num(), trackers.size()); + CHECK_EQ(filler.unmapped_pages().raw_num(), released_set.size()); + + break; + } + case 1: { + // Deallocate. + // + // value[0:15] - Index of the huge page (from trackers) to select + // value[16:31] - Index of the allocation (on pt) to select + if (trackers.empty()) { + break; + } + + const size_t lo = std::min(value & 0xFFFF, trackers.size() - 1); + PageTracker* pt = trackers[lo]; + + CHECK(!allocs[pt].empty()); + const size_t hi = std::min(value >> 16, allocs[pt].size() - 1); + Range alloc = allocs[pt][hi]; + + // Remove the allocation. + std::swap(allocs[pt][hi], allocs[pt].back()); + allocs[pt].resize(allocs[pt].size() - 1); + bool last_alloc = allocs[pt].empty(); + if (last_alloc) { + allocs.erase(pt); + std::swap(trackers[lo], trackers.back()); + trackers.resize(trackers.size() - 1); + } + + PageTracker* ret; + { + PageHeapSpinLockHolder l; + ret = filler.Put(pt, alloc); + } + CHECK_EQ(ret != nullptr, last_alloc); + absl::flat_hash_set& released_set = ReleasedPages(); + if (ret) { + // Clear released_set, since the page has become free. + HugePage hp = ret->location(); + for (PageId p = hp.first_page(), + end = hp.first_page() + kPagesPerHugePage; + p != end; ++p) { + released_set.erase(p); + } + delete ret; + } + + CHECK_EQ(filler.size().raw_num(), trackers.size()); + CHECK_EQ(filler.unmapped_pages().raw_num(), released_set.size()); + + break; + } + case 2: { + // Release + // + // value[0] - Whether are trying to apply TCMalloc's memory limits + // value[1] - reserved + // If using peak interval: + // value[2:9] - Peak interval for skip subrelease + // value[10:31]- Number of pages to try to release + // If not using peak interval: + // value[2:9] - Short interval for skip subrelease + // value[10:17]- Long interval for skip subrelease + // value[18:29]- Number of pages to try to release + // value[30] - Whether we release all free pages from partial allocs. + // value[31] - Reserved. + bool hit_limit = value & 0x1; + SkipSubreleaseIntervals skip_subrelease_intervals; + uint32_t short_interval_s = (value >> 2) & 0xFF; + uint32_t long_interval_s = (value >> 10) & 0xFF; + if (short_interval_s > long_interval_s) { + std::swap(short_interval_s, long_interval_s); + } + skip_subrelease_intervals.short_interval = + absl::Seconds(short_interval_s); + skip_subrelease_intervals.long_interval = + absl::Seconds(long_interval_s); + value >>= 18; + Length desired(value & 0xFFF); + const bool release_partial_allocs = (value >> 12) & 0x1; + size_t to_release_from_partial_allocs; + + Length released; + { + PageHeapSpinLockHolder l; + to_release_from_partial_allocs = + HugePageFiller::kPartialAllocPagesRelease * + filler.FreePagesInPartialAllocs().raw_num(); + released = filler.ReleasePages(desired, skip_subrelease_intervals, + release_partial_allocs, hit_limit); + } + + // We should be able to release all the free pages in partial allocs if + // skip-subrelease is disabled. + if (release_partial_allocs && !hit_limit && + !skip_subrelease_intervals.SkipSubreleaseEnabled() && + unback_success) { + CHECK_GE(released.raw_num(), to_release_from_partial_allocs); + } + break; + } + case 3: { + // Advance clock + // + // value[0:31] - Advances clock by this amount in arbitrary units. + fake_clock += value; + break; + } + case 4: { + // Toggle unback, simulating madvise potentially failing or succeeding. + // + // value is unused. + unback_success = !unback_success; + break; + } + case 5: { + // Gather stats + // + // value is unused. + std::string s; + s.resize(1 << 20); + Printer p(&s[0], s.size()); + PageHeapSpinLockHolder l; + filler.Print(p, true); + break; + } + case 6: { + // Model a tail from a larger allocation. The tail can have any size + // [1,kPagesPerHugePage). + // + // value[0:15] - We choose a Length to allocate. + // value[16:31] - Unused. + const Length n(std::clamp(value & 0xFFFF, 1, + kPagesPerHugePage.raw_num() - 1)); + absl::flat_hash_set& released_set = ReleasedPages(); + + auto* pt = new PageTracker(HugePage{.pn = next_hugepage}, + /*was_donated=*/true, fake_clock); + next_hugepage++; + PageId start; + { + PageHeapSpinLockHolder l; + + start = pt->Get(n).page; + filler.Contribute(pt, /*donated=*/true, + {1, AccessDensityPrediction::kSparse}); + } + + trackers.push_back(pt); + + // We have now successfully allocated. Record the alloc and clear any + // released bits. + for (PageId p = start, end = p + n; p != end; ++p) { + released_set.erase(p); + } + + allocs[pt].push_back({start, n}); + + CHECK_EQ(filler.size().raw_num(), trackers.size()); + CHECK_EQ(filler.unmapped_pages().raw_num(), released_set.size()); + break; + } + case 7: { + // Memory limit hit. Release. + // + // value[0:15]- Number of pages to try to release + Length desired(value & 0xFF); + + Length released; + const Length free = filler.free_pages(); + { + PageHeapSpinLockHolder l; + released = filler.ReleasePages(desired, SkipSubreleaseIntervals{}, + /*release_partial_alloc_pages=*/false, + /*hit_limit=*/true); + } + const Length expected = + unback_success ? std::min(free, desired) : Length(0); + CHECK_GE(released.raw_num(), expected.raw_num()); + break; + } + case 8: { + // Gather stats in pbtxt format. + // + // value is unused. + std::string s; + s.resize(1 << 20); + Printer p(&s[0], s.size()); + { + PbtxtRegion region(p, kTop); + PageHeapSpinLockHolder l; + filler.PrintInPbtxt(region); + } + + CHECK_LE(p.SpaceRequired(), s.size()); + s.resize(p.SpaceRequired()); + break; + } + case 9: { + // Gather span stats. + // + // value populates ages' now argument. + SmallSpanStats small; + LargeSpanStats large; + filler.AddSpanStats(&small, &large); + break; + } + } + } + + // Shut down, confirm filler is empty. + CHECK_EQ(ReleasedPages().size(), filler.unmapped_pages().raw_num()); + for (auto& [pt, v] : allocs) { + for (size_t i = 0, n = v.size(); i < n; ++i) { + auto alloc = v[i]; + PageTracker* ret; + { + PageHeapSpinLockHolder l; + ret = filler.Put(pt, alloc); + } + CHECK_EQ(ret != nullptr, i + 1 == n); + } + + delete pt; + } + + CHECK(filler.size() == NHugePages(0)); +} + +FUZZ_TEST(HugePageFillerTest, FuzzFiller) + ; + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc index 9879d41d7991..c98fadfd1c7a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc @@ -14,42 +14,47 @@ #include "tcmalloc/huge_page_filler.h" +#include #include #include -#include +#include #include -#include #include -#include #include #include #include // NOLINT(build/c++11) +#include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/algorithm/container.h" +#include "absl/base/attributes.h" +#include "absl/base/internal/cycleclock.h" #include "absl/base/internal/sysinfo.h" -#include "absl/base/thread_annotations.h" -#include "absl/container/flat_hash_map.h" +#include "absl/base/macros.h" #include "absl/container/flat_hash_set.h" #include "absl/flags/flag.h" #include "absl/memory/memory.h" -#include "absl/random/bernoulli_distribution.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/blocking_counter.h" #include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "benchmark/benchmark.h" #include "tcmalloc/common.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/pages.h" +#include "tcmalloc/span.h" #include "tcmalloc/stats.h" using tcmalloc::tcmalloc_internal::Length; @@ -57,12 +62,6 @@ using tcmalloc::tcmalloc_internal::Length; ABSL_FLAG(Length, page_tracker_defrag_lim, Length(32), "Max allocation size for defrag test"); -ABSL_FLAG(Length, frag_req_limit, Length(32), - "request size limit for frag test"); -ABSL_FLAG(Length, frag_size, Length(512 * 1024), - "target number of pages for frag test"); -ABSL_FLAG(uint64_t, frag_iters, 10 * 1000 * 1000, "iterations for frag test"); - ABSL_FLAG(double, release_until, 0.01, "fraction of used we target in pageheap"); ABSL_FLAG(uint64_t, bytes, 1024 * 1024 * 1024, "baseline usage"); @@ -160,13 +159,19 @@ class PageTrackerTest : public testing::Test { PageTrackerTest() : // an unlikely magic page huge_(HugePageContaining(reinterpret_cast(0x1abcde200000))), - tracker_(huge_, absl::base_internal::CycleClock::Now()) {} + tracker_(huge_, + /*was_donated=*/false, + absl::base_internal::CycleClock::Now()) {} ~PageTrackerTest() override { mock_.VerifyAndClear(); } struct PAlloc { PageId p; Length n; + SpanAllocInfo span_alloc_info; + + PAlloc(PageId pp, Length nn, SpanAllocInfo s) + : p(pp), n(nn), span_alloc_info(s) {} }; void Mark(PAlloc a, size_t mark) { @@ -179,17 +184,21 @@ class PageTrackerTest : public testing::Test { } } - class MockUnbackInterface { + class MockUnbackInterface final : public MemoryModifyFunction { public: - void Unback(void* p, size_t len) { - CHECK_CONDITION(actual_index_ < kMaxCalls); - actual_[actual_index_] = {p, len}; + [[nodiscard]] bool operator()(Range r) override { + TC_CHECK_LT(actual_index_, ABSL_ARRAYSIZE(actual_)); + actual_[actual_index_].r = r; + TC_CHECK_LT(actual_index_, ABSL_ARRAYSIZE(expected_)); + // Assume expected calls occur and use those return values. + const bool success = expected_[actual_index_].success; ++actual_index_; + return success; } - void Expect(void* p, size_t len) { - CHECK_CONDITION(expected_index_ < kMaxCalls); - expected_[expected_index_] = {p, len}; + void Expect(PageId p, Length len, bool success) { + TC_CHECK_LT(expected_index_, kMaxCalls); + expected_[expected_index_] = {Range(p, len), success}; ++expected_index_; } @@ -197,8 +206,8 @@ class PageTrackerTest : public testing::Test { EXPECT_EQ(expected_index_, actual_index_); for (size_t i = 0, n = std::min(expected_index_, actual_index_); i < n; ++i) { - EXPECT_EQ(expected_[i].ptr, actual_[i].ptr); - EXPECT_EQ(expected_[i].len, actual_[i].len); + EXPECT_EQ(expected_[i].r.p, actual_[i].r.p); + EXPECT_EQ(expected_[i].r.n, actual_[i].r.n); } expected_index_ = 0; actual_index_ = 0; @@ -206,8 +215,8 @@ class PageTrackerTest : public testing::Test { private: struct CallArgs { - void* ptr{nullptr}; - size_t len{0}; + Range r; + bool success = true; }; static constexpr size_t kMaxCalls = 10; @@ -217,12 +226,8 @@ class PageTrackerTest : public testing::Test { size_t actual_index_{0}; }; - static void MockUnback(void* p, size_t len); - - typedef PageTracker TestPageTracker; - // strict because release calls should only happen when we ask - static MockUnbackInterface mock_; + MockUnbackInterface mock_; void Check(PAlloc a, size_t mark) { EXPECT_LE(huge_.first_page(), a.p); @@ -230,55 +235,44 @@ class PageTrackerTest : public testing::Test { size_t end = index + a.n.raw_num(); EXPECT_LE(end, kPagesPerHugePage.raw_num()); for (; index < end; ++index) { - EXPECT_EQ(mark, marks_[index]); + EXPECT_EQ(marks_[index], mark); } } size_t marks_[kPagesPerHugePage.raw_num()]; HugePage huge_; - TestPageTracker tracker_; + PageTracker tracker_; - void ExpectPages(PAlloc a) { - void* ptr = a.p.start_addr(); - size_t bytes = a.n.in_bytes(); - mock_.Expect(ptr, bytes); + void ExpectPages(PAlloc a, bool success = true) { + mock_.Expect(a.p, a.n, success); } - PAlloc Get(Length n) { - absl::base_internal::SpinLockHolder l(&pageheap_lock); + PAlloc Get(Length n, SpanAllocInfo span_alloc_info) { + PageHeapSpinLockHolder l; PageId p = tracker_.Get(n).page; - return {p, n}; + return {p, n, span_alloc_info}; } void Put(PAlloc a) { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - tracker_.Put(a.p, a.n); + PageHeapSpinLockHolder l; + tracker_.Put(Range(a.p, a.n)); } Length ReleaseFree() { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - return tracker_.ReleaseFree(); - } - - void MaybeRelease(PAlloc a) { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - tracker_.MaybeRelease(a.p, a.n); + PageHeapSpinLockHolder l; + return tracker_.ReleaseFree(mock_); } }; -void PageTrackerTest::MockUnback(void* p, size_t len) { mock_.Unback(p, len); } - -PageTrackerTest::MockUnbackInterface PageTrackerTest::mock_; - TEST_F(PageTrackerTest, AllocSane) { Length free = kPagesPerHugePage; auto n = Length(1); std::vector allocs; // This should work without fragmentation. while (n <= free) { - ASSERT_LE(n, tracker_.longest_free_range()); - EXPECT_EQ(kPagesPerHugePage - free, tracker_.used_pages()); - EXPECT_EQ(free, tracker_.free_pages()); - PAlloc a = Get(n); + ASSERT_GE(tracker_.longest_free_range(), n); + EXPECT_EQ(tracker_.used_pages(), kPagesPerHugePage - free); + EXPECT_EQ(tracker_.free_pages(), free); + PAlloc a = Get(n, {1, AccessDensityPrediction::kSparse}); Mark(a, n.raw_num()); allocs.push_back(a); free -= n; @@ -293,38 +287,36 @@ TEST_F(PageTrackerTest, AllocSane) { TEST_F(PageTrackerTest, ReleasingReturn) { static const Length kAllocSize = kPagesPerHugePage / 4; - PAlloc a1 = Get(kAllocSize - Length(3)); - PAlloc a2 = Get(kAllocSize); - PAlloc a3 = Get(kAllocSize + Length(1)); - PAlloc a4 = Get(kAllocSize + Length(2)); + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; + PAlloc a1 = Get(kAllocSize - Length(3), info); + PAlloc a2 = Get(kAllocSize, info); + PAlloc a3 = Get(kAllocSize + Length(1), info); + PAlloc a4 = Get(kAllocSize + Length(2), info); Put(a2); Put(a4); // We now have a hugepage that looks like [alloced] [free] [alloced] [free]. // The free parts should be released when we mark the hugepage as such, // but not the allocated parts. - ExpectPages(a2); - ExpectPages(a4); + ExpectPages(a2, /*success=*/true); + ExpectPages(a4, /*success=*/true); ReleaseFree(); mock_.VerifyAndClear(); - // Now we return the other parts, and they *should* get released. - ExpectPages(a1); - ExpectPages(a3); + EXPECT_EQ(tracker_.released_pages(), a2.n + a4.n); + EXPECT_EQ(tracker_.free_pages(), a2.n + a4.n); - MaybeRelease(a1); Put(a1); - - MaybeRelease(a3); Put(a3); } TEST_F(PageTrackerTest, ReleasingRetain) { static const Length kAllocSize = kPagesPerHugePage / 4; - PAlloc a1 = Get(kAllocSize - Length(3)); - PAlloc a2 = Get(kAllocSize); - PAlloc a3 = Get(kAllocSize + Length(1)); - PAlloc a4 = Get(kAllocSize + Length(2)); + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; + PAlloc a1 = Get(kAllocSize - Length(3), info); + PAlloc a2 = Get(kAllocSize, info); + PAlloc a3 = Get(kAllocSize + Length(1), info); + PAlloc a4 = Get(kAllocSize + Length(2), info); Put(a2); Put(a4); @@ -349,9 +341,49 @@ TEST_F(PageTrackerTest, ReleasingRetain) { mock_.VerifyAndClear(); } +TEST_F(PageTrackerTest, ReleasingRetainFailure) { + static const Length kAllocSize = kPagesPerHugePage / 4; + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; + PAlloc a1 = Get(kAllocSize - Length(3), info); + PAlloc a2 = Get(kAllocSize, info); + PAlloc a3 = Get(kAllocSize + Length(1), info); + PAlloc a4 = Get(kAllocSize + Length(2), info); + + Put(a2); + Put(a4); + // We now have a hugepage that looks like [alloced] [free] [alloced] [free]. + // The free parts should be released when we mark the hugepage as such if + // successful, but not the allocated parts. + ExpectPages(a2, /*success=*/true); + ExpectPages(a4, /*success=*/false); + ReleaseFree(); + mock_.VerifyAndClear(); + + EXPECT_EQ(tracker_.released_pages(), a2.n); + EXPECT_EQ(tracker_.free_pages(), a2.n + a4.n); + + // Now we return the other parts, and they shouldn't get released. + Put(a1); + Put(a3); + + mock_.VerifyAndClear(); + + // But they will if we ReleaseFree. We attempt to coalesce the deallocation + // of a3/a4. + ExpectPages(a1, /*success=*/true); + ExpectPages(PAlloc{std::min(a3.p, a4.p), a3.n + a4.n, info}, + /*success=*/false); + ReleaseFree(); + mock_.VerifyAndClear(); + + EXPECT_EQ(tracker_.released_pages(), a1.n + a2.n); + EXPECT_EQ(tracker_.free_pages(), a1.n + a2.n + a3.n + a4.n); +} + TEST_F(PageTrackerTest, Defrag) { absl::BitGen rng; const Length N = absl::GetFlag(FLAGS_page_tracker_defrag_lim); + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; auto dist = EmpiricalDistribution(N); std::vector allocs; @@ -362,7 +394,7 @@ TEST_F(PageTrackerTest, Defrag) { do { n = Length(dist(rng)); } while (n > tracker_.longest_free_range()); - PAlloc a = Get(n); + PAlloc a = Get(n, info); (absl::Bernoulli(rng, 1.0 / 2) ? allocs : doomed).push_back(a); } @@ -405,7 +437,7 @@ TEST_F(PageTrackerTest, Defrag) { do { n = Length(dist(rng)); } while (n > tracker_.longest_free_range()); - allocs.push_back(Get(n)); + allocs.push_back(Get(n, info)); } } @@ -424,9 +456,6 @@ TEST_F(PageTrackerTest, Defrag) { // We'd like to prety consistently rely on (75% of the time) reasonable // defragmentation (50% of space is fully usable...) // ...but we currently can't hit that mark consistently. - // The situation is worse on ppc with larger huge pages: - // pass rate for test is ~50% at 0.20. Reducing from 0.2 to 0.07. - // TODO(b/127466107) figure out a better solution. EXPECT_GE(p25, 0.07); } @@ -439,12 +468,12 @@ TEST_F(PageTrackerTest, Defrag) { printf("Longest free quantiles:\n"); printf("p10: %zu p25: %zu p50: %zu p75: %zu p90: %zu\n", p10.raw_num(), p25.raw_num(), p50.raw_num(), p75.raw_num(), p90.raw_num()); - // Similarly, we'd really like for there usually (p25) to be a space + // Similarly, we'd really like for there usually (p50) to be a space // for a large allocation (N - note that we've cooked the books so that // the page tracker is going to be something like half empty (ish) and N // is small, so that should be doable.) // ...but, of course, it isn't. - EXPECT_GE(p25, Length(4)); + EXPECT_GE(p50, Length(4)); } for (auto a : allocs) { @@ -454,14 +483,13 @@ TEST_F(PageTrackerTest, Defrag) { TEST_F(PageTrackerTest, Stats) { struct Helper { - static void Stat(const TestPageTracker& tracker, + static void Stat(const PageTracker& tracker, std::vector* small_backed, - std::vector* small_unbacked, LargeSpanStats* large, - double* avg_age_backed, double* avg_age_unbacked) { + std::vector* small_unbacked, + LargeSpanStats* large) { SmallSpanStats small; *large = LargeSpanStats(); - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - tracker.AddSpanStats(&small, large, &ages); + tracker.AddSpanStats(&small, large); small_backed->clear(); small_unbacked->clear(); for (auto i = Length(0); i < kMaxPages; ++i) { @@ -473,99 +501,77 @@ TEST_F(PageTrackerTest, Stats) { small_unbacked->push_back(i); } } - - *avg_age_backed = ages.GetTotalHistogram(false)->avg_age(); - *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age(); } }; LargeSpanStats large; std::vector small_backed, small_unbacked; - double avg_age_backed, avg_age_unbacked; - const PageId p = Get(kPagesPerHugePage).p; + SpanAllocInfo info1 = {kPagesPerHugePage.raw_num(), + AccessDensityPrediction::kDense}; + const PageId p = Get(kPagesPerHugePage, info1).p; const PageId end = p + kPagesPerHugePage; PageId next = p; - Put({next, kMaxPages + Length(1)}); + Length n = kMaxPages + Length(1); + SpanAllocInfo info2 = {n.raw_num(), AccessDensityPrediction::kDense}; + Put({next, n, info2}); next += kMaxPages + Length(1); - absl::SleepFor(absl::Milliseconds(10)); - Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large); EXPECT_THAT(small_backed, testing::ElementsAre()); EXPECT_THAT(small_unbacked, testing::ElementsAre()); - EXPECT_EQ(1, large.spans); - EXPECT_EQ(kMaxPages + Length(1), large.normal_pages); - EXPECT_EQ(Length(0), large.returned_pages); - EXPECT_LE(0.01, avg_age_backed); + EXPECT_EQ(large.spans, 1); + EXPECT_EQ(large.normal_pages, kMaxPages + Length(1)); + EXPECT_EQ(large.returned_pages, Length(0)); ++next; - Put({next, Length(1)}); + SpanAllocInfo info3 = {1, AccessDensityPrediction::kSparse}; + Put({next, Length(1), info3}); next += Length(1); - absl::SleepFor(absl::Milliseconds(20)); - Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); - EXPECT_EQ(1, large.spans); - EXPECT_EQ(kMaxPages + Length(1), large.normal_pages); - EXPECT_EQ(Length(0), large.returned_pages); - EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.03 + 1 * 0.02) / - (kMaxPages + Length(2)).raw_num(), - avg_age_backed); - EXPECT_EQ(0, avg_age_unbacked); + EXPECT_EQ(large.spans, 1); + EXPECT_EQ(large.normal_pages, kMaxPages + Length(1)); + EXPECT_EQ(large.returned_pages, Length(0)); ++next; - Put({next, Length(2)}); + SpanAllocInfo info4 = {2, AccessDensityPrediction::kSparse}; + Put({next, Length(2), info4}); next += Length(2); - absl::SleepFor(absl::Milliseconds(30)); - Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); - EXPECT_EQ(1, large.spans); - EXPECT_EQ(kMaxPages + Length(1), large.normal_pages); - EXPECT_EQ(Length(0), large.returned_pages); - EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.06 + 1 * 0.05 + 2 * 0.03) / - (kMaxPages + Length(4)).raw_num(), - avg_age_backed); - EXPECT_EQ(0, avg_age_unbacked); + EXPECT_EQ(large.spans, 1); + EXPECT_EQ(large.normal_pages, kMaxPages + Length(1)); + EXPECT_EQ(large.returned_pages, Length(0)); ++next; - Put({next, Length(3)}); + SpanAllocInfo info5 = {3, AccessDensityPrediction::kSparse}; + Put({next, Length(3), info5}); next += Length(3); ASSERT_LE(next, end); - absl::SleepFor(absl::Milliseconds(40)); - Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2), Length(3))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); - EXPECT_EQ(1, large.spans); - EXPECT_EQ(kMaxPages + Length(1), large.normal_pages); - EXPECT_EQ(Length(0), large.returned_pages); - EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.10 + 1 * 0.09 + 2 * 0.07 + - 3 * 0.04) / - (kMaxPages + Length(7)).raw_num(), - avg_age_backed); - EXPECT_EQ(0, avg_age_unbacked); - - ExpectPages({p, kMaxPages + Length(1)}); - ExpectPages({p + kMaxPages + Length(2), Length(1)}); - ExpectPages({p + kMaxPages + Length(4), Length(2)}); - ExpectPages({p + kMaxPages + Length(7), Length(3)}); + EXPECT_EQ(large.spans, 1); + EXPECT_EQ(large.normal_pages, kMaxPages + Length(1)); + EXPECT_EQ(large.returned_pages, Length(0)); + + n = kMaxPages + Length(1); + ExpectPages({p, n, info2}); + ExpectPages({p + kMaxPages + Length(2), Length(1), info3}); + ExpectPages({p + kMaxPages + Length(4), Length(2), info4}); + ExpectPages({p + kMaxPages + Length(7), Length(3), info5}); EXPECT_EQ(kMaxPages + Length(7), ReleaseFree()); - absl::SleepFor(absl::Milliseconds(100)); - Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large); EXPECT_THAT(small_backed, testing::ElementsAre()); EXPECT_THAT(small_unbacked, testing::ElementsAre(Length(1), Length(2), Length(3))); - EXPECT_EQ(1, large.spans); - EXPECT_EQ(Length(0), large.normal_pages); - EXPECT_EQ(kMaxPages + Length(1), large.returned_pages); - EXPECT_EQ(0, avg_age_backed); - EXPECT_LE(0.1, avg_age_unbacked); + EXPECT_EQ(large.spans, 1); + EXPECT_EQ(large.normal_pages, Length(0)); + EXPECT_EQ(large.returned_pages, kMaxPages + Length(1)); } TEST_F(PageTrackerTest, b151915873) { @@ -585,8 +591,9 @@ TEST_F(PageTrackerTest, b151915873) { std::vector allocs; allocs.reserve(kPagesPerHugePage.raw_num()); + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; for (int i = 0; i < kPagesPerHugePage.raw_num(); i++) { - allocs.push_back(Get(Length(1))); + allocs.push_back(Get(Length(1), info)); } std::sort(allocs.begin(), allocs.end(), @@ -599,9 +606,8 @@ TEST_F(PageTrackerTest, b151915873) { SmallSpanStats small; LargeSpanStats large; - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - tracker_.AddSpanStats(&small, &large, &ages); + tracker_.AddSpanStats(&small, &large); EXPECT_EQ(small.normal_length[1], 1); EXPECT_THAT(0, @@ -609,33 +615,35 @@ TEST_F(PageTrackerTest, b151915873) { &small.normal_length[kMaxPages.raw_num()])); } -class BlockingUnback { +class BlockingUnback final : public MemoryModifyFunction { public: - static void Unback(void* p, size_t len) { + constexpr BlockingUnback() = default; + + [[nodiscard]] bool operator()(Range r) override { if (!mu_) { - return; + return success_; } - if (counter) { - counter->DecrementCount(); + if (counter_) { + counter_->DecrementCount(); } mu_->Lock(); mu_->Unlock(); + return success_; } - static void set_lock(absl::Mutex* mu) { mu_ = mu; } - - static absl::BlockingCounter* counter; + absl::BlockingCounter* counter_ = nullptr; + bool success_ = true; private: static thread_local absl::Mutex* mu_; }; thread_local absl::Mutex* BlockingUnback::mu_ = nullptr; -absl::BlockingCounter* BlockingUnback::counter = nullptr; -class FillerTest : public testing::TestWithParam { +class FillerTest : public testing::TestWithParam< + std::tuple> { protected: // Allow tests to modify the clock used by the cache. static int64_t FakeClock() { return clock_; } @@ -647,12 +655,6 @@ class FillerTest : public testing::TestWithParam { } static void ResetClock() { clock_ = 1234; } - static void Unback(void* p, size_t len) {} - - // Our templating approach lets us directly override certain functions - // and have mocks without virtualization. It's a bit funky but works. - typedef PageTracker FakeTracker; - // We have backing of one word per (normal-sized) page for our "hugepages". std::vector backing_; // This is space efficient enough that we won't bother recycling pages. @@ -660,7 +662,7 @@ class FillerTest : public testing::TestWithParam { intptr_t i = backing_.size(); backing_.resize(i + kPagesPerHugePage.raw_num()); intptr_t addr = i << kPageShift; - CHECK_CONDITION(addr % kHugePageSize == 0); + TC_CHECK_EQ(addr % kHugePageSize, 0); return HugePageContaining(reinterpret_cast(addr)); } @@ -678,21 +680,30 @@ class FillerTest : public testing::TestWithParam { } } - HugePageFiller filler_; + HugePageFiller filler_; + BlockingUnback blocking_unback_; FillerTest() - : filler_(GetParam(), - Clock{.now = FakeClock, .freq = GetFakeClockFrequency}) { + : filler_(Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, + /*dense_tracker_type=*/std::get<0>(GetParam()), + blocking_unback_, blocking_unback_), + dense_tracker_sorted_on_allocs_( + std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { ResetClock(); + // Reset success state + blocking_unback_.success_ = true; } - ~FillerTest() override { EXPECT_EQ(NHugePages(0), filler_.size()); } + ~FillerTest() override { EXPECT_EQ(filler_.size(), NHugePages(0)); } struct PAlloc { - FakeTracker* pt; + PageTracker* pt; PageId p; Length n; size_t mark; + SpanAllocInfo span_alloc_info; + bool from_released; }; void Mark(const PAlloc& alloc) { MarkRange(alloc.p, alloc.n, alloc.mark); } @@ -705,36 +716,176 @@ class FillerTest : public testing::TestWithParam { Length total_allocated_{0}; absl::InsecureBitGen gen_; + // We usually choose the number of objects per span at random, but in tests + // where the output is hardcoded, we disable randomization through the + // variable below. + bool randomize_density_ = true; + bool dense_tracker_sorted_on_allocs_ = false; void CheckStats() { - EXPECT_EQ(hp_contained_, filler_.size()); + EXPECT_EQ(filler_.size(), hp_contained_); auto stats = filler_.stats(); const uint64_t freelist_bytes = stats.free_bytes + stats.unmapped_bytes; const uint64_t used_bytes = stats.system_bytes - freelist_bytes; - EXPECT_EQ(total_allocated_.in_bytes(), used_bytes); - EXPECT_EQ((hp_contained_.in_pages() - total_allocated_).in_bytes(), - freelist_bytes); + EXPECT_EQ(used_bytes, total_allocated_.in_bytes()); + EXPECT_EQ(freelist_bytes, + (hp_contained_.in_pages() - total_allocated_).in_bytes()); + } + + PAlloc AllocateWithSpanAllocInfo(Length n, SpanAllocInfo span_alloc_info, + bool donated = false) { + TC_CHECK_LE(n, kPagesPerHugePage); + PAlloc ret = AllocateRaw(n, span_alloc_info, donated); + ret.n = n; + Mark(ret); + CheckStats(); + return ret; + } + + std::vector AllocateVectorWithSpanAllocInfo( + Length n, SpanAllocInfo span_alloc_info, bool donated = false) { + TC_CHECK_LE(n, kPagesPerHugePage); + Length t(0); + std::vector ret; + Length alloc_len = + (dense_tracker_sorted_on_allocs_ && + span_alloc_info.density == AccessDensityPrediction::kDense) + ? Length(1) + : n; + while (t < n) { + ret.push_back(AllocateRaw(alloc_len, span_alloc_info, donated)); + ret.back().n = alloc_len; + Mark(ret.back()); + CheckStats(); + t += alloc_len; + } + return ret; + } + + std::vector AllocateVector(Length n, bool donated = false) { + std::vector ret; + size_t objects = + randomize_density_ ? (1 << absl::Uniform(gen_, 0, 8)) : 1; + AccessDensityPrediction density = + randomize_density_ + ? (absl::Bernoulli(gen_, 0.5) ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense) + : AccessDensityPrediction::kSparse; + + SpanAllocInfo info = {.objects_per_span = objects, .density = density}; + Length alloc_len = (dense_tracker_sorted_on_allocs_ && + density == AccessDensityPrediction::kDense) + ? Length(1) + : n; + Length total_len(0); + while (total_len < n) { + ret.push_back(AllocateRaw(alloc_len, info, donated)); + ret.back().n = alloc_len; + Mark(ret.back()); + CheckStats(); + total_len += alloc_len; + } + return ret; + } + + PAlloc Allocate(Length n, bool donated = false) { + TC_CHECK_LE(n, kPagesPerHugePage); + PAlloc ret; + size_t objects = + randomize_density_ ? (1 << absl::Uniform(gen_, 0, 8)) : 1; + + AccessDensityPrediction density = + randomize_density_ + ? (absl::Bernoulli(gen_, 0.5) ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense) + : AccessDensityPrediction::kSparse; + SpanAllocInfo info = {.objects_per_span = objects, .density = density}; + ret = AllocateRaw(n, info, donated); + ret.n = n; + Mark(ret); + CheckStats(); + return ret; + } + + bool AllReleased(const std::vector& pv) const { + for (const auto& p : pv) { + if (!p.pt->released()) return false; + } + return true; + } + + // Returns true iff the filler returned an empty hugepage + bool Delete(const PAlloc& p) { + Check(p); + bool r = DeleteRaw(p); + CheckStats(); + return r; + } + + // Return true iff the final Delete() call returns true. + bool DeleteVector(const std::vector& pv) { + bool ret = false; + for (const auto& p : pv) { + ret = Delete(p); + } + return ret; + } + + Length ReleasePages(Length desired, SkipSubreleaseIntervals intervals = {}) { + PageHeapSpinLockHolder l; + return filler_.ReleasePages(desired, intervals, + /*release_partial_alloc_pages=*/false, + /*hit_limit=*/false); + } + + Length ReleasePartialPages(Length desired, + SkipSubreleaseIntervals intervals = {}) { + PageHeapSpinLockHolder l; + return filler_.ReleasePages(desired, intervals, + /*release_partial_alloc_pages=*/true, + /*hit_limit=*/false); + } + + Length HardReleasePages(Length desired) { + PageHeapSpinLockHolder l; + return filler_.ReleasePages(desired, SkipSubreleaseIntervals{}, + /*release_partial_alloc_pages=*/false, + /*hit_limit=*/true); } - PAlloc AllocateRaw(Length n, bool donated = false) { + + // Generates an "interesting" pattern of allocations that highlights all the + // various features of our stats. + std::vector GenerateInterestingAllocs(); + + // Tests fragmentation + void FragmentationTest(); + + private: + PAlloc AllocateRaw(Length n, SpanAllocInfo span_alloc_info, bool donated) { EXPECT_LT(n, kPagesPerHugePage); + // Densely-accessed spans are not allocated from donated hugepages. So + // assert that we do not test such a situation. + EXPECT_TRUE(!donated || + span_alloc_info.density == AccessDensityPrediction::kSparse); PAlloc ret; ret.n = n; ret.pt = nullptr; ret.mark = ++next_mark_; + ret.span_alloc_info = span_alloc_info; if (!donated) { // Donated means always create a new hugepage - absl::base_internal::SpinLockHolder l(&pageheap_lock); - auto [pt, page] = filler_.TryGet(n); + PageHeapSpinLockHolder l; + auto [pt, page, from_released] = filler_.TryGet(n, span_alloc_info); ret.pt = pt; ret.p = page; + ret.from_released = from_released; } if (ret.pt == nullptr) { - ret.pt = - new FakeTracker(GetBacking(), absl::base_internal::CycleClock::Now()); + ret.pt = new PageTracker(GetBacking(), donated, clock_); { - absl::base_internal::SpinLockHolder l(&pageheap_lock); + PageHeapSpinLockHolder l; ret.p = ret.pt->Get(n).page; } - filler_.Contribute(ret.pt, donated); + filler_.Contribute(ret.pt, donated, span_alloc_info); ++hp_contained_; } @@ -742,25 +893,16 @@ class FillerTest : public testing::TestWithParam { return ret; } - PAlloc Allocate(Length n, bool donated = false) { - CHECK_CONDITION(n <= kPagesPerHugePage); - PAlloc ret = AllocateRaw(n, donated); - ret.n = n; - Mark(ret); - CheckStats(); - return ret; - } - // Returns true iff the filler returned an empty hugepage. bool DeleteRaw(const PAlloc& p) { - FakeTracker* pt; + PageTracker* pt; { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - pt = filler_.Put(p.pt, p.p, p.n); + PageHeapSpinLockHolder l; + pt = filler_.Put(p.pt, Range(p.p, p.n)); } total_allocated_ -= p.n; if (pt != nullptr) { - EXPECT_EQ(kPagesPerHugePage, pt->longest_free_range()); + EXPECT_EQ(pt->longest_free_range(), kPagesPerHugePage); EXPECT_TRUE(pt->empty()); --hp_contained_; delete pt; @@ -770,29 +912,6 @@ class FillerTest : public testing::TestWithParam { return false; } - // Returns true iff the filler returned an empty hugepage - bool Delete(const PAlloc& p) { - Check(p); - bool r = DeleteRaw(p); - CheckStats(); - return r; - } - - Length ReleasePages(Length desired, absl::Duration d = absl::ZeroDuration()) { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - return filler_.ReleasePages(desired, d, false); - } - - Length HardReleasePages(Length desired) { - absl::base_internal::SpinLockHolder l(&pageheap_lock); - return filler_.ReleasePages(desired, absl::ZeroDuration(), true); - } - - // Generates an "interesting" pattern of allocations that highlights all the - // various features of our stats. - std::vector GenerateInterestingAllocs(); - - private: static int64_t clock_; }; @@ -800,23 +919,24 @@ int64_t FillerTest::clock_{1234}; TEST_P(FillerTest, Density) { absl::BitGen rng; - // Start with a really annoying setup: some hugepages half - // empty (randomly) + // Start with a really annoying setup: some hugepages half empty (randomly) std::vector allocs; std::vector doomed_allocs; static const HugeLength kNumHugePages = NHugePages(64); for (auto i = Length(0); i < kNumHugePages.in_pages(); ++i) { - ASSERT_EQ(i, filler_.pages_allocated()); + ASSERT_EQ(filler_.pages_allocated(), i); + PAlloc p = Allocate(Length(1)); if (absl::Bernoulli(rng, 1.0 / 2)) { - allocs.push_back(Allocate(Length(1))); + allocs.push_back(p); } else { - doomed_allocs.push_back(Allocate(Length(1))); + doomed_allocs.push_back(p); } } for (auto d : doomed_allocs) { Delete(d); } - EXPECT_EQ(kNumHugePages, filler_.size()); + EXPECT_LE(filler_.size(), kNumHugePages + NHugePages(1)); + EXPECT_GE(filler_.size(), kNumHugePages); // We want a good chance of touching ~every allocation. size_t n = allocs.size(); // Now, randomly add and delete to the allocations. @@ -827,60 +947,335 @@ TEST_P(FillerTest, Density) { for (int i = 0; i < n; ++i) { Delete(allocs[i]); allocs[i] = Allocate(Length(1)); - ASSERT_EQ(Length(n), filler_.pages_allocated()); + ASSERT_EQ(filler_.pages_allocated(), Length(n)); } } - EXPECT_GE(allocs.size() / kPagesPerHugePage.raw_num() + 1, + EXPECT_GE(allocs.size() / kPagesPerHugePage.raw_num() + 3, filler_.size().raw_num()); // clean up, check for failures for (auto a : allocs) { Delete(a); - ASSERT_EQ(Length(--n), filler_.pages_allocated()); + ASSERT_EQ(filler_.pages_allocated(), Length(--n)); } } -TEST_P(FillerTest, Release) { +// This test makes sure that we continue releasing from regular (non-partial) +// allocs when we enable a feature to release all free pages from partial +// allocs. +TEST_P(FillerTest, ReleaseFromFullAllocs) { + const Length kAlloc = kPagesPerHugePage / 2; + // Maintain the object count for the second allocation so that the alloc list + // remains the same for the two allocations. + std::vector p1 = AllocateVector(kAlloc - Length(1)); + ASSERT_TRUE(!p1.empty()); + std::vector p2 = AllocateVectorWithSpanAllocInfo( + kAlloc + Length(1), p1.front().span_alloc_info); + + std::vector p3 = AllocateVector(kAlloc - Length(2)); + ASSERT_TRUE(!p3.empty()); + std::vector p4 = AllocateVectorWithSpanAllocInfo( + kAlloc + Length(2), p3.front().span_alloc_info); + // We have two hugepages, both full: nothing to release. + ASSERT_EQ(ReleasePartialPages(kMaxValidPages), Length(0)); + DeleteVector(p1); + DeleteVector(p3); + // Now we should see the p1 hugepage - emptier - released. + ASSERT_EQ(ReleasePartialPages(kAlloc - Length(1)), kAlloc - Length(1)); + EXPECT_EQ(filler_.unmapped_pages(), kAlloc - Length(1)); + ASSERT_TRUE(AllReleased(p1)); + ASSERT_FALSE(AllReleased(p3)); + for (const auto& pa : p3) { + ASSERT_FALSE(pa.from_released); + } + + // Check subrelease stats. + SubreleaseStats subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, kAlloc - Length(1)); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, Length(0)); + + // We expect to reuse both p1.pt and p3.pt. + std::vector p5 = AllocateVectorWithSpanAllocInfo( + kAlloc - Length(1), p1.front().span_alloc_info); + for (const auto& pa : p5) { + if (dense_tracker_sorted_on_allocs_) { + ASSERT_TRUE(pa.pt == p1.front().pt || pa.pt == p3.front().pt); + } else { + ASSERT_EQ(pa.pt, p1.front().pt); + ASSERT_TRUE(pa.from_released); + } + } + + DeleteVector(p2); + DeleteVector(p4); + ASSERT_TRUE(DeleteVector(p5)); +} + +// This test makes sure that we release all the free pages from partial allocs +// even when we request fewer pages to release. It also confirms that we +// continue to release desired number of pages from the full allocs even when +// release_partial_alloc_pages option is enabled. +TEST_P(FillerTest, ReleaseFreePagesInPartialAllocs) { + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } static const Length kAlloc = kPagesPerHugePage / 2; - PAlloc p1 = Allocate(kAlloc - Length(1)); - PAlloc p2 = Allocate(kAlloc + Length(1)); + static const Length kL1 = kAlloc - Length(1); + static const Length kL2 = kAlloc + Length(1); + + static const Length kL3 = kAlloc - Length(1); + static const Length kL4 = kAlloc + Length(1); + PAlloc p1 = Allocate(kL1); + PAlloc p2 = AllocateWithSpanAllocInfo(kL2, p1.span_alloc_info); + PAlloc p3 = Allocate(kL3); + PAlloc p4 = AllocateWithSpanAllocInfo(kL4, p3.span_alloc_info); + + // As there are no free pages, we shouldn't be able to release anything. + EXPECT_EQ(ReleasePartialPages(kMaxValidPages), Length(0)); + + Delete(p2); + Delete(p4); + + // Check subrelease stats. + SubreleaseStats subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, Length(0)); + + // As we do not have any pages in partially-released lists, we should continue + // to release the requested number of pages. + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(ReleasePartialPages(kL2), kL2); + EXPECT_EQ(ReleasePartialPages(kL4), kL4); + + // Check subrelease stats. + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, kL2 + kL4); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, Length(0)); + // Now we allocate more. + static const Length kL5 = kL2 - Length(2); + static const Length kL6 = kL4 - Length(2); + PAlloc p5 = AllocateWithSpanAllocInfo(kL5, p1.span_alloc_info); + PAlloc p6 = AllocateWithSpanAllocInfo(kL6, p3.span_alloc_info); + EXPECT_EQ(filler_.used_pages_in_released(), kL1 + kL3 + kL5 + kL6); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + + Delete(p5); + Delete(p6); + + // We have some free pages in partially-released allocs now. + EXPECT_EQ(filler_.used_pages_in_partial_released(), kL1 + kL3); + // Because we gradually release free pages from partially-released allocs, we + // shouldn't be able to release all the k5+k6 free pages at once. + EXPECT_EQ(ReleasePartialPages(kL5), kL5); + EXPECT_EQ(ReleasePartialPages(kL6), kL6); + + // Check subrelease stats. + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, kL5 + kL6); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, kL5 + kL6); - PAlloc p3 = Allocate(kAlloc - Length(2)); - PAlloc p4 = Allocate(kAlloc + Length(2)); - // We have two hugepages, both full: nothing to release. - ASSERT_EQ(Length(0), ReleasePages(kMaxValidPages)); Delete(p1); Delete(p3); +} + +TEST_P(FillerTest, ReleaseFreePagesInPartialAllocs_SpansAllocated) { + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test since !kSpansAllocated"; + } + randomize_density_ = false; + SpanAllocInfo info = {kPagesPerHugePage.raw_num(), + AccessDensityPrediction::kDense}; + static const Length kAlloc = kPagesPerHugePage / 2; + static const Length kL1 = kAlloc - Length(1); + static const Length kL2 = kAlloc + Length(1); + + static const Length kL3 = kAlloc - Length(1); + static const Length kL4 = kAlloc + Length(1); + std::vector p1 = AllocateVectorWithSpanAllocInfo(kL1, info); + ASSERT_TRUE(!p1.empty()); + std::vector p2 = + AllocateVectorWithSpanAllocInfo(kL2, p1.front().span_alloc_info); + std::vector p3 = AllocateVectorWithSpanAllocInfo(kL3, info); + ASSERT_TRUE(!p3.empty()); + std::vector p4 = + AllocateVectorWithSpanAllocInfo(kL4, p3.front().span_alloc_info); + + // As there are no free pages, we shouldn't be able to release anything. + EXPECT_EQ(ReleasePartialPages(kMaxValidPages), Length(0)); + + DeleteVector(p2); + DeleteVector(p4); + + // Check subrelease stats. + SubreleaseStats subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, Length(0)); + + // As we do not have any pages in partially-released lists, we should continue + // to release the requested number of pages. + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(ReleasePartialPages(kL2), kL2); + EXPECT_EQ(ReleasePartialPages(kL4), kL4); + + // Check subrelease stats. + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, kL2 + kL4); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, Length(0)); + // Now we allocate more. + static const Length kL5 = kL2 - Length(2); + static const Length kL6 = kL4 - Length(2); + std::vector p5 = + AllocateVectorWithSpanAllocInfo(kL5, p1.front().span_alloc_info); + std::vector p6 = + AllocateVectorWithSpanAllocInfo(kL6, p3.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages_in_released(), kL3 + kL6 - Length(2)); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + + DeleteVector(p5); + DeleteVector(p6); + + // We have some free pages in partially-released allocs now. + EXPECT_EQ(filler_.used_pages_in_partial_released(), kL3); + // Because we gradually release free pages from partially-released allocs, we + // should be able to release all the k5+k6 free pages when the dense tracker + // is sorted on spans allocated. + static const Length kLReleased5 = ReleasePartialPages(kL5); + static const Length kLReleased6 = ReleasePartialPages(kL6); + EXPECT_TRUE(kLReleased5 == kL5 + kL6 && kLReleased6 == Length(0)); + + // Check subrelease stats. + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.num_pages_subreleased, kL5 + kL6); + EXPECT_EQ(subrelease.num_partial_alloc_pages_subreleased, kL6 - Length(2)); + + DeleteVector(p1); + DeleteVector(p3); +} + +TEST_P(FillerTest, AccountingForUsedPartialReleased) { + static const Length kAlloc = kPagesPerHugePage / 2; + static const Length kL1 = kAlloc + Length(3); + static const Length kL2 = kAlloc + Length(5); + std::vector p1 = AllocateVector(kL1); + ASSERT_TRUE(!p1.empty()); + std::vector p2 = AllocateVector(kL2); + ASSERT_TRUE(!p2.empty()); + // We have two hugepages. They maybe both partially allocated, or one of them + // is fully allocated and the other partially when the hugepages in the dense + // tracker are sorted on spans allocated. + ASSERT_EQ(ReleasePages(kMaxValidPages), + kPagesPerHugePage - kL1 + kPagesPerHugePage - kL2); + ASSERT_TRUE(filler_.used_pages_in_released() == kL1 + kL2 || + // When the hugepages in the dense tracker are sorted on spans and + // the two allocations above are both for dense spans. + filler_.used_pages_in_released() == + kL1 + kL2 - kPagesPerHugePage); + // Now we allocate more. + static const Length kL3 = kAlloc - Length(4); + static const Length kL4 = kAlloc - Length(7); + // Maintain the object count as above so that same alloc lists are used for + // the following two allocations. + std::vector p3 = + AllocateVectorWithSpanAllocInfo(kL3, p1.front().span_alloc_info); + std::vector p4 = + AllocateVectorWithSpanAllocInfo(kL4, p2.front().span_alloc_info); + EXPECT_TRUE(filler_.used_pages_in_released() == kL1 + kL2 + kL3 + kL4 || + filler_.used_pages_in_released() == + kL1 + kL2 + kL3 + kL4 - kPagesPerHugePage); + DeleteVector(p3); + DeleteVector(p4); + EXPECT_TRUE(filler_.used_pages_in_partial_released() == kL1 + kL2 || + // When the hugepages in the dense tracker are sorted on spans and + // the two allocations above are both for dense spans. + filler_.used_pages_in_partial_released() == + kL1 + kL2 - kPagesPerHugePage); + EXPECT_EQ(filler_.used_pages_in_released(), Length(0)); + DeleteVector(p1); + DeleteVector(p2); +} + +TEST_P(FillerTest, Release) { + static const Length kAlloc = kPagesPerHugePage / 2; + // Maintain the object count for the second allocation so that the alloc + // list + // remains the same for the two allocations. + std::vector p1 = AllocateVector(kAlloc - Length(1)); + ASSERT_TRUE(!p1.empty()); + std::vector p2 = AllocateVectorWithSpanAllocInfo( + kAlloc + Length(1), p1.front().span_alloc_info); + + std::vector p3 = AllocateVector(kAlloc - Length(2)); + ASSERT_TRUE(!p3.empty()); + std::vector p4 = AllocateVectorWithSpanAllocInfo( + kAlloc + Length(2), p3.front().span_alloc_info); + // We have two hugepages, both full: nothing to release. + ASSERT_EQ(ReleasePages(kMaxValidPages), Length(0)); + DeleteVector(p1); + DeleteVector(p3); // Now we should see the p1 hugepage - emptier - released. - ASSERT_EQ(kAlloc - Length(1), ReleasePages(kAlloc - Length(1))); - EXPECT_EQ(kAlloc - Length(1), filler_.unmapped_pages()); - ASSERT_TRUE(p1.pt->released()); - ASSERT_FALSE(p3.pt->released()); + ASSERT_EQ(ReleasePages(kAlloc - Length(1)), kAlloc - Length(1)); + EXPECT_EQ(filler_.unmapped_pages(), kAlloc - Length(1)); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); + ASSERT_TRUE(AllReleased(p1)); + for (const auto& pa : p1) { + ASSERT_FALSE(pa.from_released); + } + ASSERT_FALSE(AllReleased(p3)); + for (const auto& pa : p3) { + ASSERT_FALSE(pa.from_released); + } // We expect to reuse p1.pt. - PAlloc p5 = Allocate(kAlloc - Length(1)); - ASSERT_TRUE(p1.pt == p5.pt || p3.pt == p5.pt); + std::vector p5 = AllocateVectorWithSpanAllocInfo( + kAlloc - Length(1), p1.front().span_alloc_info); + const bool dense_tracker_and_sorted_on_allocs = + (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated); + if (dense_tracker_and_sorted_on_allocs) { + ASSERT_TRUE(p1.front().pt == p5.front().pt || + p3.front().pt == p5.front().pt); + } else { + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(1)); + ASSERT_TRUE(p1.front().pt == p5.front().pt); + } - Delete(p2); - Delete(p4); - Delete(p5); + DeleteVector(p2); + DeleteVector(p4); + DeleteVector(p5); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); +} + +TEST_P(FillerTest, ReleaseZero) { + // Trying to release no pages should not crash. + EXPECT_EQ( + ReleasePages(Length(0), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)}), + Length(0)); } -TEST_P(FillerTest, Fragmentation) { +void FillerTest::FragmentationTest() { + constexpr Length kRequestLimit = Length(32); + constexpr Length kSizeLimit = Length(512 * 1024); + constexpr size_t kReps = 1000; + absl::BitGen rng; - auto dist = EmpiricalDistribution(absl::GetFlag(FLAGS_frag_req_limit)); + auto dist = EmpiricalDistribution(kRequestLimit); - std::vector allocs; + std::vector> allocs; + std::vector lengths; Length total; - while (total < absl::GetFlag(FLAGS_frag_size)) { + while (total < kSizeLimit) { auto n = Length(dist(rng)); total += n; - allocs.push_back(AllocateRaw(n)); + allocs.push_back(AllocateVector(n)); + lengths.push_back(n); } double max_slack = 0.0; - const size_t kReps = absl::GetFlag(FLAGS_frag_iters); for (size_t i = 0; i < kReps; ++i) { auto stats = filler_.stats(); double slack = static_cast(stats.free_bytes) / stats.system_bytes; @@ -893,23 +1288,28 @@ TEST_P(FillerTest, Fragmentation) { if (absl::Bernoulli(rng, 1.0 / 2)) { size_t index = absl::Uniform(rng, 0, allocs.size()); std::swap(allocs[index], allocs.back()); - DeleteRaw(allocs.back()); - total -= allocs.back().n; + std::swap(lengths[index], lengths.back()); + DeleteVector(allocs.back()); + total -= lengths.back(); allocs.pop_back(); + lengths.pop_back(); } else { auto n = Length(dist(rng)); - allocs.push_back(AllocateRaw(n)); + allocs.push_back(AllocateVector(n)); + lengths.push_back(n); total += n; } } - EXPECT_LE(max_slack, 0.05); + EXPECT_LE(max_slack, 0.06); for (auto a : allocs) { - DeleteRaw(a); + DeleteVector(a); } } +TEST_P(FillerTest, Fragmentation) { FragmentationTest(); } + TEST_P(FillerTest, PrintFreeRatio) { // This test is sensitive to the number of pages per hugepage, as we are // printing raw stats. @@ -917,63 +1317,60 @@ TEST_P(FillerTest, PrintFreeRatio) { GTEST_SKIP(); } + // We prevent randomly choosing the number of objects per span since this + // test has hardcoded output which will change if the objects per span are + // chosen at random. + randomize_density_ = false; + // Allocate two huge pages, release one, verify that we do not get an invalid // (>1.) ratio of free : non-fulls. // First huge page - PAlloc a1 = Allocate(kPagesPerHugePage / 2); - PAlloc a2 = Allocate(kPagesPerHugePage / 2); + std::vector a1 = AllocateVector(kPagesPerHugePage / 2); + ASSERT_TRUE(!a1.empty()); + std::vector a2 = AllocateVectorWithSpanAllocInfo( + kPagesPerHugePage / 2, a1.front().span_alloc_info); // Second huge page constexpr Length kQ = kPagesPerHugePage / 4; - PAlloc a3 = Allocate(kQ); - PAlloc a4 = Allocate(kQ); - PAlloc a5 = Allocate(kQ); - PAlloc a6 = Allocate(kQ); - - Delete(a6); + std::vector a3 = AllocateVector(kQ); + ASSERT_TRUE(!a3.empty()); + std::vector a4 = + AllocateVectorWithSpanAllocInfo(kQ, a3.front().span_alloc_info); + std::vector a5 = + AllocateVectorWithSpanAllocInfo(kQ, a3.front().span_alloc_info); + std::vector a6 = + AllocateVectorWithSpanAllocInfo(kQ, a3.front().span_alloc_info); + DeleteVector(a6); ReleasePages(kQ); - - Delete(a5); - + DeleteVector(a5); std::string buffer(1024 * 1024, '\0'); { + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - filler_.Print(&printer, /*everything=*/true); + filler_.Print(printer, /*everything=*/true); buffer.erase(printer.SpaceRequired()); } - if (GetParam() == FillerPartialRerelease::Retain) { - EXPECT_THAT( - buffer, - testing::StartsWith( - R"(HugePageFiller: densely pack small requests into hugepages -HugePageFiller: 2 total, 1 full, 0 partial, 1 released (1 partially), 0 quarantined + EXPECT_THAT(buffer, + testing::StartsWith( + R"(HugePageFiller: densely pack small requests into hugepages +HugePageFiller: Overall, 2 total, 1 full, 0 partial, 1 released (1 partially), 0 quarantined +HugePageFiller: those with sparsely-accessed spans, 2 total, 1 full, 0 partial, 1 released (1 partially), 0 quarantined +HugePageFiller: those with densely-accessed spans, 0 total, 0 full, 0 partial, 0 released (0 partially), 0 quarantined HugePageFiller: 64 pages free in 2 hugepages, 0.1250 free HugePageFiller: among non-fulls, 0.2500 free HugePageFiller: 128 used pages in subreleased hugepages (128 of them in partially released) HugePageFiller: 1 hugepages partially released, 0.2500 released HugePageFiller: 0.6667 of used pages hugepageable)")); - } else { - EXPECT_THAT( - buffer, - testing::StartsWith( - R"(HugePageFiller: densely pack small requests into hugepages -HugePageFiller: 2 total, 1 full, 0 partial, 1 released (0 partially), 0 quarantined -HugePageFiller: 0 pages free in 2 hugepages, 0.0000 free -HugePageFiller: among non-fulls, 0.0000 free -HugePageFiller: 128 used pages in subreleased hugepages (0 of them in partially released) -HugePageFiller: 1 hugepages partially released, 0.5000 released -HugePageFiller: 0.6667 of used pages hugepageable)")); - } // Cleanup remaining allocs. - Delete(a1); - Delete(a2); - Delete(a3); - Delete(a4); + DeleteVector(a1); + DeleteVector(a2); + DeleteVector(a3); + DeleteVector(a4); } static double BytesToMiB(size_t bytes) { return bytes / (1024.0 * 1024.0); } @@ -989,50 +1386,54 @@ TEST_P(FillerTest, HugePageFrac) { EXPECT_THAT(filler_.hugepage_frac(), AnyOf(Eq(0), Eq(1))); static const Length kQ = kPagesPerHugePage / 4; // These are all on one page: - auto a1 = Allocate(kQ); - auto a2 = Allocate(kQ); - auto a3 = Allocate(kQ - Length(1)); - auto a4 = Allocate(kQ + Length(1)); + auto a1 = AllocateVector(kQ); + ASSERT_TRUE(!a1.empty()); + auto a2 = AllocateVectorWithSpanAllocInfo(kQ, a1.front().span_alloc_info); + auto a3 = AllocateVectorWithSpanAllocInfo(kQ - Length(1), + a1.front().span_alloc_info); + auto a4 = AllocateVectorWithSpanAllocInfo(kQ + Length(1), + a1.front().span_alloc_info); // As are these: - auto a5 = Allocate(kPagesPerHugePage - kQ); - auto a6 = Allocate(kQ); + auto a5 = AllocateVector(kPagesPerHugePage - kQ); + ASSERT_TRUE(!a5.empty()); + auto a6 = AllocateVectorWithSpanAllocInfo(kQ, a5.front().span_alloc_info); - EXPECT_EQ(1, filler_.hugepage_frac()); + EXPECT_EQ(filler_.hugepage_frac(), 1); // Free space doesn't affect it... - Delete(a4); - Delete(a6); + DeleteVector(a4); + DeleteVector(a6); - EXPECT_EQ(1, filler_.hugepage_frac()); + EXPECT_EQ(filler_.hugepage_frac(), 1); // Releasing the hugepage does. - ASSERT_EQ(kQ + Length(1), ReleasePages(kQ + Length(1))); - EXPECT_EQ((3.0 * kQ.raw_num()) / (6.0 * kQ.raw_num() - 1.0), - filler_.hugepage_frac()); + ASSERT_EQ(ReleasePages(kQ + Length(1)), kQ + Length(1)); + EXPECT_EQ(filler_.hugepage_frac(), + (3.0 * kQ.raw_num()) / (6.0 * kQ.raw_num() - 1.0)); // Check our arithmetic in a couple scenarios. // 2 kQs on the release and 3 on the hugepage - Delete(a2); - EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1), - filler_.hugepage_frac()); + DeleteVector(a2); + EXPECT_EQ(filler_.hugepage_frac(), + (3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1)); // This releases the free page on the partially released hugepage. - ASSERT_EQ(kQ, ReleasePages(kQ)); - EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1), - filler_.hugepage_frac()); + ASSERT_EQ(ReleasePages(kQ), kQ); + EXPECT_EQ(filler_.hugepage_frac(), + (3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1)); // just-over-1 kQ on the release and 3 on the hugepage - Delete(a3); - EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac()); + DeleteVector(a3); + EXPECT_EQ(filler_.hugepage_frac(), (3 * kQ.raw_num()) / (4.0 * kQ.raw_num())); // This releases the free page on the partially released hugepage. - ASSERT_EQ(kQ - Length(1), ReleasePages(kQ - Length(1))); - EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac()); + ASSERT_EQ(ReleasePages(kQ - Length(1)), kQ - Length(1)); + EXPECT_EQ(filler_.hugepage_frac(), (3 * kQ.raw_num()) / (4.0 * kQ.raw_num())); // All huge! - Delete(a1); - EXPECT_EQ(1, filler_.hugepage_frac()); + DeleteVector(a1); + EXPECT_EQ(filler_.hugepage_frac(), 1); - Delete(a5); + DeleteVector(a5); } // Repeatedly grow from FLAG_bytes to FLAG_bytes * growth factor, then shrink @@ -1051,12 +1452,12 @@ TEST_P(FillerTest, DISABLED_ReleaseFrac) { std::vector allocs; while (filler_.used_pages() < baseline) { - allocs.push_back(AllocateRaw(Length(1))); + allocs.push_back(Allocate(Length(1))); } while (true) { while (filler_.used_pages() < peak) { - allocs.push_back(AllocateRaw(Length(1))); + allocs.push_back(Allocate(Length(1))); } const double peak_frac = filler_.hugepage_frac(); // VSS @@ -1067,7 +1468,7 @@ TEST_P(FillerTest, DISABLED_ReleaseFrac) { size_t limit = allocs.size(); while (filler_.used_pages() > baseline) { --limit; - DeleteRaw(allocs[limit]); + Delete(allocs[limit]); } allocs.resize(limit); while (filler_.free_pages() > free_target) { @@ -1080,345 +1481,518 @@ TEST_P(FillerTest, DISABLED_ReleaseFrac) { } } -TEST_P(FillerTest, ReleaseAccounting) { +// Make sure we release appropriate number of pages when using +// ReleasePartialPages. +TEST_P(FillerTest, ReleasePagesFromPartialAllocs) { const Length N = kPagesPerHugePage; - auto big = Allocate(N - Length(2)); - auto tiny1 = Allocate(Length(1)); - auto tiny2 = Allocate(Length(1)); - auto half1 = Allocate(N / 2); - auto half2 = Allocate(N / 2); + auto big = AllocateVector(N - Length(2)); + ASSERT_TRUE(!big.empty()); + auto tiny1 = + AllocateWithSpanAllocInfo(Length(1), big.front().span_alloc_info); + auto tiny2 = + AllocateWithSpanAllocInfo(Length(1), big.front().span_alloc_info); + auto half1 = AllocateVector(N / 2); + ASSERT_TRUE(!half1.empty()); + auto half2 = + AllocateVectorWithSpanAllocInfo(N / 2, half1.front().span_alloc_info); + + DeleteVector(half1); + DeleteVector(big); + + ASSERT_EQ(filler_.size(), NHugePages(2)); - Delete(half1); - Delete(big); + // We should pick the [empty big][full tiny] hugepage here. + EXPECT_EQ(ReleasePartialPages(N - Length(2)), N - Length(2)); + EXPECT_EQ(filler_.unmapped_pages(), N - Length(2)); + // This shouldn't trigger a release. + Delete(tiny1); + EXPECT_EQ(filler_.unmapped_pages(), N - Length(2)); + // Until we call ReleasePartialPages() again. + EXPECT_EQ(ReleasePartialPages(Length(1)), Length(1)); + + // As should this, but this will drop the whole hugepage. + Delete(tiny2); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.size(), NHugePages(1)); + + // We should release tiny2 here. + EXPECT_EQ(ReleasePartialPages(Length(1)), Length(1)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.size(), NHugePages(1)); + + // Check subrelease stats. + EXPECT_EQ(filler_.used_pages(), N / 2); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), Length(0)); + + // Now we pick the half/half hugepage. We should be able to release pages from + // full allocs with ReleasePartialPages even though partially-released allocs + // are empty. + EXPECT_EQ(ReleasePartialPages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.unmapped_pages(), N / 2); + + // Check subrelease stats. + EXPECT_EQ(filler_.used_pages(), N / 2); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), N / 2); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), N / 2); + + DeleteVector(half2); + EXPECT_EQ(filler_.size(), NHugePages(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); +} - ASSERT_EQ(NHugePages(2), filler_.size()); +TEST_P(FillerTest, ReleaseAccounting) { + const Length N = kPagesPerHugePage; + auto big = AllocateVector(N - Length(2)); + ASSERT_TRUE(!big.empty()); + auto tiny1 = + AllocateWithSpanAllocInfo(Length(1), big.front().span_alloc_info); + auto tiny2 = + AllocateWithSpanAllocInfo(Length(1), big.front().span_alloc_info); + auto half1 = AllocateVector(N / 2); + ASSERT_TRUE(!half1.empty()); + auto half2 = + AllocateVectorWithSpanAllocInfo(N / 2, half1.front().span_alloc_info); + ASSERT_TRUE(!half2.empty()); + + DeleteVector(half1); + DeleteVector(big); + + ASSERT_EQ(filler_.size(), NHugePages(2)); // We should pick the [empty big][full tiny] hugepage here. - EXPECT_EQ(N - Length(2), ReleasePages(N - Length(2))); - EXPECT_EQ(N - Length(2), filler_.unmapped_pages()); + EXPECT_EQ(ReleasePages(N - Length(2)), N - Length(2)); + EXPECT_EQ(filler_.unmapped_pages(), N - Length(2)); // This shouldn't trigger a release Delete(tiny1); - if (GetParam() == FillerPartialRerelease::Retain) { - EXPECT_EQ(N - Length(2), filler_.unmapped_pages()); - // Until we call ReleasePages() - EXPECT_EQ(Length(1), ReleasePages(Length(1))); - } - EXPECT_EQ(N - Length(1), filler_.unmapped_pages()); + EXPECT_EQ(filler_.unmapped_pages(), N - Length(2)); + // Until we call ReleasePages() + EXPECT_EQ(ReleasePages(Length(1)), Length(1)); + EXPECT_EQ(filler_.unmapped_pages(), N - Length(1)); // As should this, but this will drop the whole hugepage Delete(tiny2); - EXPECT_EQ(Length(0), filler_.unmapped_pages()); - EXPECT_EQ(NHugePages(1), filler_.size()); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.size(), NHugePages(1)); // This shouldn't trigger any release: we just claim credit for the // releases we did automatically on tiny2. - if (GetParam() == FillerPartialRerelease::Retain) { - EXPECT_EQ(Length(1), ReleasePages(Length(1))); - } else { - EXPECT_EQ(Length(2), ReleasePages(Length(2))); - } - EXPECT_EQ(Length(0), filler_.unmapped_pages()); - EXPECT_EQ(NHugePages(1), filler_.size()); + EXPECT_EQ(ReleasePages(Length(1)), Length(1)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.size(), NHugePages(1)); // Check subrelease stats - EXPECT_EQ(N / 2, filler_.used_pages()); - EXPECT_EQ(Length(0), filler_.used_pages_in_any_subreleased()); - EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released()); - EXPECT_EQ(Length(0), filler_.used_pages_in_released()); + EXPECT_EQ(filler_.used_pages(), N / 2); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), Length(0)); // Now we pick the half/half hugepage - EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages)); - EXPECT_EQ(N / 2, filler_.unmapped_pages()); + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.unmapped_pages(), N / 2); // Check subrelease stats - EXPECT_EQ(N / 2, filler_.used_pages()); - EXPECT_EQ(N / 2, filler_.used_pages_in_any_subreleased()); - EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released()); - EXPECT_EQ(N / 2, filler_.used_pages_in_released()); - - // Check accounting for partially released hugepages with partial rerelease - if (GetParam() == FillerPartialRerelease::Retain) { - // Allocating and deallocating a small object causes the page to turn from - // a released hugepage into a partially released hugepage. - auto tiny3 = Allocate(Length(1)); - auto tiny4 = Allocate(Length(1)); - Delete(tiny4); - EXPECT_EQ(N / 2 + Length(1), filler_.used_pages()); - EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_any_subreleased()); - EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_partial_released()); - EXPECT_EQ(Length(0), filler_.used_pages_in_released()); - Delete(tiny3); - } - - Delete(half2); - EXPECT_EQ(NHugePages(0), filler_.size()); - EXPECT_EQ(Length(0), filler_.unmapped_pages()); + EXPECT_EQ(filler_.used_pages(), N / 2); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), N / 2); + EXPECT_EQ(filler_.used_pages_in_partial_released(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), N / 2); + + // Check accounting for partially released hugepages with partial rerelease. + // Allocating and deallocating a small object causes the page to turn from a + // released hugepage into a partially released hugepage. + // + // The number of objects for each allocation is same as that for half2 so to + // ensure that same alloc list is used. + auto tiny3 = + AllocateWithSpanAllocInfo(Length(1), half2.front().span_alloc_info); + auto tiny4 = + AllocateWithSpanAllocInfo(Length(1), half2.front().span_alloc_info); + Delete(tiny4); + EXPECT_EQ(filler_.used_pages(), N / 2 + Length(1)); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), N / 2 + Length(1)); + EXPECT_EQ(filler_.used_pages_in_partial_released(), N / 2 + Length(1)); + EXPECT_EQ(filler_.used_pages_in_released(), Length(0)); + Delete(tiny3); + + DeleteVector(half2); + EXPECT_EQ(filler_.size(), NHugePages(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); } TEST_P(FillerTest, ReleaseWithReuse) { const Length N = kPagesPerHugePage; - auto half = Allocate(N / 2); - auto tiny1 = Allocate(N / 4); - auto tiny2 = Allocate(N / 4); + auto half = AllocateVector(N / 2); + ASSERT_TRUE(!half.empty()); + auto tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); + auto tiny2 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); - Delete(half); - - ASSERT_EQ(NHugePages(1), filler_.size()); + DeleteVector(half); + ASSERT_EQ(filler_.size(), NHugePages(1)); // We should be able to release the pages from half1. - EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages)); - EXPECT_EQ(N / 2, filler_.unmapped_pages()); + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.unmapped_pages(), N / 2); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); // Release tiny1, release more. - Delete(tiny1); + DeleteVector(tiny1); - EXPECT_EQ(N / 4, ReleasePages(kMaxValidPages)); - EXPECT_EQ(3 * N / 4, filler_.unmapped_pages()); + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 4); + EXPECT_EQ(filler_.unmapped_pages(), 3 * N / 4); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); // Repopulate, confirm we can't release anything and unmapped pages goes to 0. - tiny1 = Allocate(N / 4); + tiny1 = AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages)); EXPECT_EQ(N / 2, filler_.unmapped_pages()); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); // Continue repopulating. - half = Allocate(N / 2); - EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages)); - EXPECT_EQ(Length(0), filler_.unmapped_pages()); - EXPECT_EQ(NHugePages(1), filler_.size()); + half = AllocateVectorWithSpanAllocInfo(N / 2, half.front().span_alloc_info); + EXPECT_EQ(ReleasePages(kMaxValidPages), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.size(), NHugePages(1)); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(1)); // Release everything and cleanup. - Delete(half); - Delete(tiny1); - Delete(tiny2); - EXPECT_EQ(NHugePages(0), filler_.size()); - EXPECT_EQ(Length(0), filler_.unmapped_pages()); + DeleteVector(half); + DeleteVector(tiny1); + DeleteVector(tiny2); + EXPECT_EQ(filler_.size(), NHugePages(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); } -TEST_P(FillerTest, AvoidArbitraryQuarantineVMGrowth) { +TEST_P(FillerTest, CheckPreviouslyReleasedStats) { const Length N = kPagesPerHugePage; - // Guarantee we have a ton of released pages go empty. - for (int i = 0; i < 10 * 1000; ++i) { - auto half1 = Allocate(N / 2); - auto half2 = Allocate(N / 2); - Delete(half1); - ASSERT_EQ(N / 2, ReleasePages(N / 2)); - Delete(half2); - } + auto half = AllocateVector(N / 2); + ASSERT_TRUE(!half.empty()); + auto tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); + auto tiny2 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); - auto s = filler_.stats(); - EXPECT_GE(1024 * 1024 * 1024, s.system_bytes); -} + DeleteVector(half); + ASSERT_EQ(filler_.size(), NHugePages(1)); -TEST_P(FillerTest, StronglyPreferNonDonated) { - // We donate several huge pages of varying fullnesses. Then we make several - // allocations that would be perfect fits for the donated hugepages, *after* - // making one allocation that won't fit, to ensure that a huge page is - // contributed normally. Finally, we verify that we can still get the - // donated huge pages back. (I.e. they weren't used.) - std::vector donated; - ASSERT_GE(kPagesPerHugePage, Length(10)); - for (auto i = Length(1); i <= Length(3); ++i) { - donated.push_back(Allocate(kPagesPerHugePage - i, /*donated=*/true)); - } + // We should be able to release the pages from half1. + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); - std::vector regular; - for (auto i = Length(4); i >= Length(1); --i) { - regular.push_back(Allocate(i)); + std::string buffer(1024 * 1024, '\0'); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } - - for (const PAlloc& alloc : donated) { - // All the donated huge pages should be freeable. - EXPECT_TRUE(Delete(alloc)); + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, testing::HasSubstr( + "HugePageFiller: 0 hugepages became full after " + "being previously released, " + "out of which 0 pages are hugepage backed.")); + + // Repopulate. + ASSERT_TRUE(!tiny1.empty()); + half = AllocateVectorWithSpanAllocInfo(N / 2, tiny1.front().span_alloc_info); + EXPECT_EQ(ReleasePages(kMaxValidPages), Length(0)); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(1)); + buffer.resize(1024 * 1024); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } - for (const PAlloc& alloc : regular) { - Delete(alloc); + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, + testing::HasSubstr("HugePageFiller: 1 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); + + // Release everything and cleanup. + DeleteVector(half); + DeleteVector(tiny1); + DeleteVector(tiny2); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); + buffer.resize(1024 * 1024); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, + testing::HasSubstr("HugePageFiller: 0 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); } -TEST_P(FillerTest, ParallelUnlockingSubrelease) { - if (GetParam() == FillerPartialRerelease::Retain) { - // When rerelease happens without going to Unback(), this test - // (intentionally) deadlocks, as we never receive the call. - return; - } +// Make sure that previously_released_huge_pages stat is correct when a huge +// page toggles from full -> released -> full -> released. +TEST_P(FillerTest, CheckFullReleasedFullReleasedState) { + const Length N = kPagesPerHugePage; + auto half = AllocateVector(N / 2); + ASSERT_TRUE(!half.empty()); + ASSERT_EQ(filler_.size(), NHugePages(1)); - // Verify that we can deallocate a partial huge page and successfully unlock - // the pageheap_lock without introducing race conditions around the metadata - // for PageTracker::released_. - // - // Currently, HPAA unbacks *all* subsequent deallocations to a huge page once - // we have broken up *any* part of it. - // - // If multiple deallocations are in-flight, we need to leave sufficient - // breadcrumbs to ourselves (PageTracker::releasing_ is a Length, not a bool) - // so that one deallocation completing does not have us "forget" that another - // deallocation is about to unback other parts of the hugepage. - // - // If PageTracker::releasing_ were a bool, the completion of "t1" and - // subsequent reallocation of "a2" in this test would mark the entirety of the - // page as full, so we would choose to *not* unback a2 (when deallocated) or - // a3 (when deallocated by t3). - constexpr Length N = kPagesPerHugePage; + // We should be able to release the N/2 pages that are free. + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); - auto a1 = AllocateRaw(N / 2); - auto a2 = AllocateRaw(Length(1)); - auto a3 = AllocateRaw(Length(1)); + std::string buffer(1024 * 1024, '\0'); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, + testing::HasSubstr("HugePageFiller: 0 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); + + // Repopulate. + auto half1 = + AllocateVectorWithSpanAllocInfo(N / 2, half.front().span_alloc_info); + EXPECT_EQ(ReleasePages(kMaxValidPages), Length(0)); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(1)); + buffer.resize(1024 * 1024); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } - // Trigger subrelease. The filler now has a partial hugepage, so subsequent - // calls to Delete() will cause us to unback the remainder of it. - EXPECT_GT(ReleasePages(kMaxValidPages), Length(0)); + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, + testing::HasSubstr("HugePageFiller: 1 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); - auto m1 = absl::make_unique(); - auto m2 = absl::make_unique(); + // Release again. + DeleteVector(half1); + EXPECT_EQ(ReleasePages(kMaxValidPages), N / 2); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); - m1->Lock(); - m2->Lock(); + buffer.resize(1024 * 1024); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, - absl::BlockingCounter counter(2); - BlockingUnback::counter = &counter; + testing::HasSubstr("HugePageFiller: 0 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); - std::thread t1([&]() { - BlockingUnback::set_lock(m1.get()); + // Release everything and cleanup. + DeleteVector(half); + EXPECT_EQ(filler_.previously_released_huge_pages(), NHugePages(0)); + buffer.resize(1024 * 1024); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, + testing::HasSubstr("HugePageFiller: 0 hugepages became full " + "after being previously released, " + "out of which 0 pages are hugepage backed.")); +} - DeleteRaw(a2); - }); +TEST_P(FillerTest, AvoidArbitraryQuarantineVMGrowth) { + const Length N = kPagesPerHugePage; + // Guarantee we have a ton of released pages go empty. + for (int i = 0; i < 10 * 1000; ++i) { + auto half1 = AllocateVector(N / 2); + auto half2 = AllocateVector(N / 2); + DeleteVector(half1); + ASSERT_EQ(ReleasePages(N / 2), N / 2); + DeleteVector(half2); + } - std::thread t2([&]() { - BlockingUnback::set_lock(m2.get()); + auto s = filler_.stats(); + EXPECT_LE(s.system_bytes, 1024 * 1024 * 1024); +} - DeleteRaw(a3); - }); +TEST_P(FillerTest, StronglyPreferNonDonated) { + // We donate several huge pages of varying fullnesses. Then we make several + // allocations that would be perfect fits for the donated hugepages, *after* + // making one allocation that won't fit, to ensure that a huge page is + // contributed normally. Finally, we verify that we can still get the + // donated huge pages back. (I.e. they weren't used.) + std::vector> donated; + SpanAllocInfo info = {1, AccessDensityPrediction::kSparse}; + ASSERT_GE(kPagesPerHugePage, Length(10)); + for (auto i = Length(1); i <= Length(3); ++i) { + donated.push_back(AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - i, + info, + /*donated=*/true)); + } - // Wait for t1 and t2 to block. - counter.Wait(); + std::vector> regular; + // Only sparsely-accessed spans are allocated from donated hugepages. So + // create a hugepage with a sparsely-accessed span. The test should prefer + // this hugepage for sparsely-accessed spans and should allocate a new + // hugepage for densely-accessed spans. + regular.push_back(AllocateVectorWithSpanAllocInfo(Length(4), info)); - // At this point, t1 and t2 are blocked (as if they were on a long-running - // syscall) on "unback" (m1 and m2, respectively). pageheap_lock is not held. - // - // Allocating a4 will complete the hugepage, but we have on-going releaser - // threads. - auto a4 = AllocateRaw((N / 2) - Length(2)); - EXPECT_EQ(NHugePages(1), filler_.size()); - - // Let one of the threads proceed. The huge page consists of: - // * a1 (N/2 ): Allocated - // * a2 ( 1): Unbacked - // * a3 ( 1): Unbacking (blocked on m2) - // * a4 (N/2-2): Allocated - m1->Unlock(); - t1.join(); - - // Reallocate a2. We should still consider the huge page partially backed for - // purposes of subreleasing. - a2 = AllocateRaw(Length(1)); - EXPECT_EQ(NHugePages(1), filler_.size()); - DeleteRaw(a2); - - // Let the other thread proceed. The huge page consists of: - // * a1 (N/2 ): Allocated - // * a2 ( 1): Unbacked - // * a3 ( 1): Unbacked - // * a4 (N/2-2): Allocated - m2->Unlock(); - t2.join(); - - EXPECT_EQ(filler_.used_pages(), N - Length(2)); - EXPECT_EQ(filler_.unmapped_pages(), Length(2)); - EXPECT_EQ(filler_.free_pages(), Length(0)); + for (auto i = Length(3); i >= Length(1); --i) { + regular.push_back(AllocateVector(i)); + } - // Clean up. - DeleteRaw(a1); - DeleteRaw(a4); + for (const std::vector& alloc : donated) { + // All the donated huge pages should be freeable. + EXPECT_TRUE(DeleteVector(alloc)); + } - BlockingUnback::counter = nullptr; + for (const std::vector& alloc : regular) { + DeleteVector(alloc); + } } -TEST_P(FillerTest, SkipSubrelease) { +TEST_P(FillerTest, SkipPartialAllocSubrelease) { // This test is sensitive to the number of pages per hugepage, as we are // printing raw stats. if (kPagesPerHugePage != Length(256)) { GTEST_SKIP(); } + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } - // Generate a peak, wait for time interval a, generate a trough, subrelease, - // wait for time interval b, generate another peak. - const auto peak_trough_peak = [&](absl::Duration a, absl::Duration b, - absl::Duration peak_interval, - bool expected_subrelease) { + // Firstly, this test generates a peak (long-term demand peak) and waits for + // time interval a. Then, it generates a higher peak plus a short-term + // fluctuation peak, and waits for time interval b. It then generates a trough + // in demand and tries to subrelease. Finally, it waits for time interval c to + // generate the highest peak for evaluating subrelease correctness. Skip + // subrelease selects those demand points using provided time intervals. + const auto demand_pattern = [&](absl::Duration a, absl::Duration b, + absl::Duration c, + SkipSubreleaseIntervals intervals, + bool expected_subrelease) { const Length N = kPagesPerHugePage; - PAlloc half = Allocate(N / 2); - PAlloc tiny1 = Allocate(N / 4); - PAlloc tiny2 = Allocate(N / 4); - - // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is - // necessary after we delete `half` below, as a half huge page for the peak - // would fill into the gap previously occupied by it. + // First peak: min_demand 3/4N, max_demand 1N. PAlloc peak1a = Allocate(3 * N / 4); - PAlloc peak1b = Allocate(N / 4); - EXPECT_EQ(filler_.used_pages(), 2 * N); + PAlloc peak1b = AllocateWithSpanAllocInfo(N / 4, peak1a.span_alloc_info); + Advance(a); + // Second peak: min_demand 0, max_demand 2N. Delete(peak1a); Delete(peak1b); - Advance(a); - - Delete(half); - - EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), - ReleasePages(10 * N, peak_interval)); - Advance(b); + PAlloc half = Allocate(N / 2); + PAlloc tiny1 = AllocateWithSpanAllocInfo(N / 4, half.span_alloc_info); + PAlloc tiny2 = AllocateWithSpanAllocInfo(N / 4, half.span_alloc_info); + // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is + // necessary after we delete `half` below, as a half huge page for the + // peak would fill into the gap previously occupied by it. PAlloc peak2a = Allocate(3 * N / 4); - PAlloc peak2b = Allocate(N / 4); + PAlloc peak2b = AllocateWithSpanAllocInfo(N / 4, peak2a.span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N); + Delete(peak2a); + Delete(peak2b); + Advance(b); + Delete(half); + EXPECT_EQ(filler_.free_pages(), Length(N / 2)); + // The number of released pages is limited to the number of free pages. + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), + ReleasePartialPages(10 * N, intervals)); + Advance(c); + // Third peak: min_demand 1/2N, max_demand (2+1/2)N. PAlloc peak3a = Allocate(3 * N / 4); - PAlloc peak3b = Allocate(N / 4); + PAlloc peak3b = AllocateWithSpanAllocInfo(N / 4, peak3a.span_alloc_info); + + PAlloc peak4a = Allocate(3 * N / 4); + PAlloc peak4b = AllocateWithSpanAllocInfo(N / 4, peak4a.span_alloc_info); Delete(tiny1); Delete(tiny2); - Delete(peak2a); - Delete(peak2b); Delete(peak3a); Delete(peak3b); + Delete(peak4a); + Delete(peak4b); EXPECT_EQ(filler_.used_pages(), Length(0)); EXPECT_EQ(filler_.unmapped_pages(), Length(0)); EXPECT_EQ(filler_.free_pages(), Length(0)); - EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), ReleasePages(10 * N)); + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), + ReleasePartialPages(10 * N)); }; { - SCOPED_TRACE("peak-trough-peak 1"); - peak_trough_peak(absl::Minutes(2), absl::Minutes(2), absl::Minutes(3), - false); + // Skip subrelease feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, true); } Advance(absl::Minutes(30)); { - SCOPED_TRACE("peak-trough-peak 2"); - peak_trough_peak(absl::Minutes(2), absl::Minutes(7), absl::Minutes(3), - false); + // Uses short-term and long-term intervals for skipping subrelease. It + // incorrectly skips 128 pages. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + false); } Advance(absl::Minutes(30)); { - SCOPED_TRACE("peak-trough-peak 3"); - peak_trough_peak(absl::Minutes(5), absl::Minutes(3), absl::Minutes(2), - true); + // Uses short-term and long-term intervals for skipping subrelease, + // subreleasing all free pages. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + true); + } + Advance(absl::Minutes(30)); + + { + // Uses only short-term interval for skipping subrelease. It correctly + // skips 128 pages. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + false); } Advance(absl::Minutes(30)); - // This captures a corner case: If we hit another peak immediately after a - // subrelease decision (in the same time series epoch), do not count this as - // a correct subrelease decision. { - SCOPED_TRACE("peak-trough-peak 4"); - peak_trough_peak(absl::Milliseconds(10), absl::Milliseconds(10), - absl::Minutes(2), false); + // Uses only long-term interval for skipping subrelease, subreleased all + // free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + true); + } + + Advance(absl::Minutes(30)); + + // Repeats the "demand_pattern 9" test using short-term and long-term + // intervals, to show that subrelease decisions are evaluated independently. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + false); } Advance(absl::Minutes(30)); @@ -1429,2099 +2003,1032 @@ TEST_P(FillerTest, SkipSubrelease) { std::string buffer(1024 * 1024, '\0'); { + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - filler_.Print(&printer, true); + filler_.Print(printer, true); } buffer.resize(strlen(buffer.c_str())); - EXPECT_THAT(buffer, testing::HasSubstr(R"( -HugePageFiller: Since the start of the execution, 4 subreleases (512 pages) were skipped due to recent (120s) peaks. -HugePageFiller: 25.0000% of decisions confirmed correct, 0 pending (25.0000% of pages, 0 pending). + if (!dense_tracker_sorted_on_allocs_) { + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: Since the start of the execution, 3 subreleases (384 pages) were skipped due to the sum of short-term (60s) fluctuations and long-term (120s) trends. +HugePageFiller: 33.3333% of decisions confirmed correct, 0 pending (33.3333% of pages, 0 pending). )")); + } } -class FillerStatsTrackerTest : public testing::Test { - private: - static int64_t clock_; - static int64_t FakeClock() { return clock_; } - static double GetFakeClockFrequency() { - return absl::ToDoubleNanoseconds(absl::Seconds(2)); +TEST_P(FillerTest, SkipPartialAllocSubrelease_SpansAllocated) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (kPagesPerHugePage != Length(256)) { + GTEST_SKIP(); } + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for !kSpansAllocated"; + } + randomize_density_ = false; + SpanAllocInfo info = {kPagesPerHugePage.raw_num(), + AccessDensityPrediction::kDense}; + + // Firstly, this test generates a peak (long-term demand peak) and waits for + // time interval a. Then, it generates a higher peak plus a short-term + // fluctuation peak, and waits for time interval b. It then generates a trough + // in demand and tries to subrelease. Finally, it waits for time interval c to + // generate the highest peak for evaluating subrelease correctness. Skip + // subrelease selects those demand points using provided time intervals. + const auto demand_pattern = [&](absl::Duration a, absl::Duration b, + absl::Duration c, + SkipSubreleaseIntervals intervals, + bool expected_subrelease) { + const Length N = kPagesPerHugePage; + // First peak: min_demand 3/4N, max_demand 1N. + std::vector peak1a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak1a.empty()); + std::vector peak1b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + Advance(a); + // Second peak: min_demand 0, max_demand 2N. + DeleteVector(peak1a); + DeleteVector(peak1b); - protected: - static constexpr absl::Duration kWindow = absl::Minutes(10); - - using StatsTrackerType = FillerStatsTracker<16>; - StatsTrackerType tracker_{ - Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kWindow, - absl::Minutes(5)}; + std::vector half = AllocateVectorWithSpanAllocInfo(N / 2, info); + ASSERT_TRUE(!half.empty()); + std::vector tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); + std::vector tiny2 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); - void Advance(absl::Duration d) { - clock_ += static_cast(absl::ToDoubleSeconds(d) * - GetFakeClockFrequency()); - } + // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is + // necessary after we delete `half` below, as a half huge page for the + // peak would fill into the gap previously occupied by it. + std::vector peak2a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak2a.empty()); + std::vector peak2b = + AllocateVectorWithSpanAllocInfo(N / 4, peak2a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N); + DeleteVector(peak2a); + DeleteVector(peak2b); + Advance(b); + DeleteVector(half); + EXPECT_EQ(filler_.free_pages(), Length(N / 2)); + // The number of released pages is limited to the number of free pages. + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), + ReleasePartialPages(10 * N, intervals)); + + Advance(c); + half = AllocateVectorWithSpanAllocInfo(N / 2, half.front().span_alloc_info); + // Third peak: min_demand 1/2N, max_demand (2+1/2)N. + std::vector peak3a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak3a.empty()); + std::vector peak3b = + AllocateVectorWithSpanAllocInfo(N / 4, peak3a.front().span_alloc_info); + + std::vector peak4a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak4a.empty()); + std::vector peak4b = + AllocateVectorWithSpanAllocInfo(N / 4, peak4a.front().span_alloc_info); + + DeleteVector(half); + DeleteVector(tiny1); + DeleteVector(tiny2); + DeleteVector(peak3a); + DeleteVector(peak3b); + DeleteVector(peak4a); + DeleteVector(peak4b); - // Generates four data points for the tracker that represent "interesting" - // points (i.e., min/max pages demand, min/max hugepages). - void GenerateInterestingPoints(Length num_pages, HugeLength num_hugepages, - Length num_free_pages); + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); - // Generates a data point with a particular amount of demand pages, while - // ignoring the specific number of hugepages. - void GenerateDemandPoint(Length num_pages, Length num_free_pages); -}; + EXPECT_EQ(Length(0), ReleasePartialPages(10 * N)); + }; -int64_t FillerStatsTrackerTest::clock_{0}; - -void FillerStatsTrackerTest::GenerateInterestingPoints(Length num_pages, - HugeLength num_hugepages, - Length num_free_pages) { - for (int i = 0; i <= 1; ++i) { - for (int j = 0; j <= 1; ++j) { - StatsTrackerType::FillerStats stats; - stats.num_pages = num_pages + Length((i == 0) ? 4 : 8 * j); - stats.free_pages = num_free_pages + Length(10 * i + j); - stats.unmapped_pages = Length(10); - stats.used_pages_in_subreleased_huge_pages = num_pages; - stats.huge_pages[StatsTrackerType::kRegular] = - num_hugepages + ((i == 1) ? NHugePages(4) : NHugePages(8) * j); - stats.huge_pages[StatsTrackerType::kDonated] = num_hugepages; - stats.huge_pages[StatsTrackerType::kPartialReleased] = NHugePages(i); - stats.huge_pages[StatsTrackerType::kReleased] = NHugePages(j); - tracker_.Report(stats); - } + { + // Skip subrelease feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, true); } -} -void FillerStatsTrackerTest::GenerateDemandPoint(Length num_pages, - Length num_free_pages) { - HugeLength hp = NHugePages(1); - StatsTrackerType::FillerStats stats; - stats.num_pages = num_pages; - stats.free_pages = num_free_pages; - stats.unmapped_pages = Length(0); - stats.used_pages_in_subreleased_huge_pages = Length(0); - stats.huge_pages[StatsTrackerType::kRegular] = hp; - stats.huge_pages[StatsTrackerType::kDonated] = hp; - stats.huge_pages[StatsTrackerType::kPartialReleased] = hp; - stats.huge_pages[StatsTrackerType::kReleased] = hp; - tracker_.Report(stats); -} + Advance(absl::Minutes(30)); -// Tests that the tracker aggregates all data correctly. The output is tested by -// comparing the text output of the tracker. While this is a bit verbose, it is -// much cleaner than extracting and comparing all data manually. -TEST_F(FillerStatsTrackerTest, Works) { - // Ensure that the beginning (when free pages are 0) is outside the 5-min - // window the instrumentation is recording. - GenerateInterestingPoints(Length(1), NHugePages(1), Length(1)); - Advance(absl::Minutes(5)); + { + // Uses short-term and long-term intervals for skipping subrelease. It + // incorrectly skips 128 pages. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + false); + } - GenerateInterestingPoints(Length(100), NHugePages(5), Length(200)); + Advance(absl::Minutes(30)); - Advance(absl::Minutes(1)); + { + // Uses short-term and long-term intervals for skipping subrelease, + // subreleasing all free pages. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + true); + } + Advance(absl::Minutes(30)); - GenerateInterestingPoints(Length(200), NHugePages(10), Length(100)); + { + // Uses only short-term interval for skipping subrelease. It correctly + // skips 128 pages. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + false); + } - Advance(absl::Minutes(1)); + Advance(absl::Minutes(30)); - // Test text output (time series summary). { - std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); - { - tracker_.Print(&printer); - buffer.erase(printer.SpaceRequired()); - } - - EXPECT_THAT(buffer, StrEq(R"(HugePageFiller: time series over 5 min interval + // Uses only long-term interval for skipping subrelease, subreleased all + // free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + true); + } -HugePageFiller: realized fragmentation: 0.8 MiB -HugePageFiller: minimum free pages: 110 (100 backed) -HugePageFiller: at peak demand: 208 pages (and 111 free, 10 unmapped) -HugePageFiller: at peak demand: 26 hps (14 regular, 10 donated, 1 partial, 1 released) -HugePageFiller: at peak hps: 208 pages (and 111 free, 10 unmapped) -HugePageFiller: at peak hps: 26 hps (14 regular, 10 donated, 1 partial, 1 released) + Advance(absl::Minutes(30)); -HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks. -HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). -HugePageFiller: Subrelease stats last 10 min: total 0 pages subreleased, 0 hugepages broken -)")); + // This captures a corner case: If we hit another peak immediately after a + // subrelease decision (in the same time series epoch), do not count this as + // a correct subrelease decision. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + false); } - // Test pbtxt output (full time series). + Advance(absl::Minutes(30)); + + // Ensure that the tracker is updated. + auto tiny = Allocate(Length(1)); + Delete(tiny); + + std::string buffer(1024 * 1024, '\0'); { - std::string buffer(1024 * 1024, '\0'); + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - { - PbtxtRegion region(&printer, kTop, /*indent=*/0); - tracker_.PrintInPbtxt(®ion); - } - buffer.erase(printer.SpaceRequired()); - - EXPECT_THAT(buffer, StrEq(R"( - filler_skipped_subrelease { - skipped_subrelease_interval_ms: 0 - skipped_subrelease_pages: 0 - correctly_skipped_subrelease_pages: 0 - pending_skipped_subrelease_pages: 0 - skipped_subrelease_count: 0 - correctly_skipped_subrelease_count: 0 - pending_skipped_subrelease_count: 0 - } - filler_stats_timeseries { - window_ms: 37500 - epochs: 16 - min_free_pages_interval_ms: 300000 - min_free_pages: 110 - min_free_backed_pages: 100 - measurements { - epoch: 6 - timestamp_ms: 0 - min_free_pages: 11 - min_free_backed_pages: 1 - num_pages_subreleased: 0 - num_hugepages_broken: 0 - at_minimum_demand { - num_pages: 1 - regular_huge_pages: 5 - donated_huge_pages: 1 - partial_released_huge_pages: 1 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 1 - } - at_maximum_demand { - num_pages: 9 - regular_huge_pages: 5 - donated_huge_pages: 1 - partial_released_huge_pages: 1 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 1 - } - at_minimum_huge_pages { - num_pages: 5 - regular_huge_pages: 1 - donated_huge_pages: 1 - partial_released_huge_pages: 0 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 1 - } - at_maximum_huge_pages { - num_pages: 5 - regular_huge_pages: 9 - donated_huge_pages: 1 - partial_released_huge_pages: 0 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 1 - } - } - measurements { - epoch: 14 - timestamp_ms: 300000 - min_free_pages: 210 - min_free_backed_pages: 200 - num_pages_subreleased: 0 - num_hugepages_broken: 0 - at_minimum_demand { - num_pages: 100 - regular_huge_pages: 9 - donated_huge_pages: 5 - partial_released_huge_pages: 1 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 100 - } - at_maximum_demand { - num_pages: 108 - regular_huge_pages: 9 - donated_huge_pages: 5 - partial_released_huge_pages: 1 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 100 - } - at_minimum_huge_pages { - num_pages: 104 - regular_huge_pages: 5 - donated_huge_pages: 5 - partial_released_huge_pages: 0 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 100 - } - at_maximum_huge_pages { - num_pages: 104 - regular_huge_pages: 13 - donated_huge_pages: 5 - partial_released_huge_pages: 0 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 100 - } - } - measurements { - epoch: 15 - timestamp_ms: 337500 - min_free_pages: 110 - min_free_backed_pages: 100 - num_pages_subreleased: 0 - num_hugepages_broken: 0 - at_minimum_demand { - num_pages: 200 - regular_huge_pages: 14 - donated_huge_pages: 10 - partial_released_huge_pages: 1 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 200 - } - at_maximum_demand { - num_pages: 208 - regular_huge_pages: 14 - donated_huge_pages: 10 - partial_released_huge_pages: 1 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 200 - } - at_minimum_huge_pages { - num_pages: 204 - regular_huge_pages: 10 - donated_huge_pages: 10 - partial_released_huge_pages: 0 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 200 - } - at_maximum_huge_pages { - num_pages: 204 - regular_huge_pages: 18 - donated_huge_pages: 10 - partial_released_huge_pages: 0 - released_huge_pages: 1 - used_pages_in_subreleased_huge_pages: 200 - } - } + filler_.Print(printer, true); } + buffer.resize(strlen(buffer.c_str())); + + if (!dense_tracker_sorted_on_allocs_) { + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: Since the start of the execution, 3 subreleases (384 pages) were skipped due to the sum of short-term (60s) fluctuations and long-term (120s) trends. +HugePageFiller: 33.3333% of decisions confirmed correct, 0 pending (33.3333% of pages, 0 pending). )")); } } -TEST_F(FillerStatsTrackerTest, InvalidDurations) { - // These should not crash. - tracker_.min_free_pages(absl::InfiniteDuration()); - tracker_.min_free_pages(kWindow + absl::Seconds(1)); - tracker_.min_free_pages(-(kWindow + absl::Seconds(1))); - tracker_.min_free_pages(-absl::InfiniteDuration()); -} - -TEST_F(FillerStatsTrackerTest, ComputeRecentPeaks) { - GenerateDemandPoint(Length(3000), Length(1000)); - Advance(absl::Minutes(1.25)); - GenerateDemandPoint(Length(1500), Length(0)); - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(100), Length(2000)); - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(200), Length(3000)); - - GenerateDemandPoint(Length(200), Length(3000)); - FillerStatsTracker<>::FillerStats stats = - tracker_.GetRecentPeak(absl::Minutes(3)); - EXPECT_EQ(stats.num_pages, Length(1500)); - EXPECT_EQ(stats.free_pages, Length(0)); - - FillerStatsTracker<>::FillerStats stats2 = - tracker_.GetRecentPeak(absl::Minutes(5)); - EXPECT_EQ(stats2.num_pages, Length(3000)); - EXPECT_EQ(stats2.free_pages, Length(1000)); - - Advance(absl::Minutes(4)); - GenerateDemandPoint(Length(200), Length(3000)); - - FillerStatsTracker<>::FillerStats stats3 = - tracker_.GetRecentPeak(absl::Minutes(4)); - EXPECT_EQ(stats3.num_pages, Length(200)); - EXPECT_EQ(stats3.free_pages, Length(3000)); - - Advance(absl::Minutes(5)); - GenerateDemandPoint(Length(200), Length(3000)); - - FillerStatsTracker<>::FillerStats stats4 = - tracker_.GetRecentPeak(absl::Minutes(5)); - EXPECT_EQ(stats4.num_pages, Length(200)); - EXPECT_EQ(stats4.free_pages, Length(3000)); -} +TEST_P(FillerTest, SkipSubrelease) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (kPagesPerHugePage != Length(256)) { + GTEST_SKIP(); + } + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } -TEST_F(FillerStatsTrackerTest, TrackCorrectSubreleaseDecisions) { - // First peak (large) - GenerateDemandPoint(Length(1000), Length(1000)); + // Firstly, this test generates a peak (long-term demand peak) and waits for + // time interval a. Then, it generates a higher peak plus a short-term + // fluctuation peak, and waits for time interval b. It then generates a trough + // in demand and tries to subrelease. Finally, it waits for time interval c to + // generate the highest peak for evaluating subrelease correctness. Skip + // subrelease selects those demand points using provided time intervals. + const auto demand_pattern = [&](absl::Duration a, absl::Duration b, + absl::Duration c, + SkipSubreleaseIntervals intervals, + bool expected_subrelease) { + const Length N = kPagesPerHugePage; + // First peak: min_demand 3/4N, max_demand 1N. + PAlloc peak1a = Allocate(3 * N / 4); + PAlloc peak1b = AllocateWithSpanAllocInfo(N / 4, peak1a.span_alloc_info); + Advance(a); + // Second peak: min_demand 0, max_demand 2N. + Delete(peak1a); + Delete(peak1b); - // Incorrect subrelease: Subrelease to 1000 - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(100), Length(1000)); - tracker_.ReportSkippedSubreleasePages(Length(900), Length(1000), - absl::Minutes(3)); + PAlloc half = Allocate(N / 2); + PAlloc tiny1 = AllocateWithSpanAllocInfo(N / 4, half.span_alloc_info); + PAlloc tiny2 = AllocateWithSpanAllocInfo(N / 4, half.span_alloc_info); - // Second peak (small) - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(500), Length(1000)); + // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is + // necessary after we delete `half` below, as a half huge page for the + // peak would fill into the gap previously occupied by it. + PAlloc peak2a = Allocate(3 * N / 4); + PAlloc peak2b = AllocateWithSpanAllocInfo(N / 4, peak2a.span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N); + Delete(peak2a); + Delete(peak2b); + Advance(b); + Delete(half); + EXPECT_EQ(filler_.free_pages(), Length(N / 2)); + // The number of released pages is limited to the number of free pages. + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), + ReleasePages(10 * N, intervals)); - EXPECT_EQ(tracker_.total_skipped().pages, Length(900)); - EXPECT_EQ(tracker_.total_skipped().count, 1); - EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0)); - EXPECT_EQ(tracker_.correctly_skipped().count, 0); - EXPECT_EQ(tracker_.pending_skipped().pages, Length(900)); - EXPECT_EQ(tracker_.pending_skipped().count, 1); + Advance(c); + // Third peak: min_demand 1/2N, max_demand (2+1/2)N. + PAlloc peak3a = Allocate(3 * N / 4); + PAlloc peak3b = AllocateWithSpanAllocInfo(N / 4, peak3a.span_alloc_info); - // Correct subrelease: Subrelease to 500 - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(500), Length(100)); - tracker_.ReportSkippedSubreleasePages(Length(50), Length(550), - absl::Minutes(3)); - GenerateDemandPoint(Length(500), Length(50)); - tracker_.ReportSkippedSubreleasePages(Length(50), Length(500), - absl::Minutes(3)); - GenerateDemandPoint(Length(500), Length(0)); - - EXPECT_EQ(tracker_.total_skipped().pages, Length(1000)); - EXPECT_EQ(tracker_.total_skipped().count, 3); - EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0)); - EXPECT_EQ(tracker_.correctly_skipped().count, 0); - EXPECT_EQ(tracker_.pending_skipped().pages, Length(1000)); - EXPECT_EQ(tracker_.pending_skipped().count, 3); - - // Third peak (large, too late for first peak) - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(1100), Length(1000)); + PAlloc peak4a = Allocate(3 * N / 4); + PAlloc peak4b = AllocateWithSpanAllocInfo(N / 4, peak4a.span_alloc_info); - Advance(absl::Minutes(5)); - GenerateDemandPoint(Length(1100), Length(1000)); + Delete(tiny1); + Delete(tiny2); + Delete(peak3a); + Delete(peak3b); + Delete(peak4a); + Delete(peak4b); - EXPECT_EQ(tracker_.total_skipped().pages, Length(1000)); - EXPECT_EQ(tracker_.total_skipped().count, 3); - EXPECT_EQ(tracker_.correctly_skipped().pages, Length(100)); - EXPECT_EQ(tracker_.correctly_skipped().count, 2); - EXPECT_EQ(tracker_.pending_skipped().pages, Length(0)); - EXPECT_EQ(tracker_.pending_skipped().count, 0); -} + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), ReleasePages(10 * N)); + }; -TEST_F(FillerStatsTrackerTest, SubreleaseCorrectnessWithChangingIntervals) { - // First peak (large) - GenerateDemandPoint(Length(1000), Length(1000)); + { + // Skip subrelease feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, true); + } - Advance(absl::Minutes(1)); - GenerateDemandPoint(Length(100), Length(1000)); + Advance(absl::Minutes(30)); - tracker_.ReportSkippedSubreleasePages(Length(50), Length(1000), - absl::Minutes(4)); - Advance(absl::Minutes(1)); + { + // Uses short-term and long-term intervals for skipping subrelease. It + // incorrectly skips 128 pages. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + false); + } - // With two correctness intervals in the same epoch, take the maximum - tracker_.ReportSkippedSubreleasePages(Length(100), Length(1000), - absl::Minutes(1)); - tracker_.ReportSkippedSubreleasePages(Length(200), Length(1000), - absl::Minutes(7)); - - Advance(absl::Minutes(5)); - GenerateDemandPoint(Length(1100), Length(1000)); - Advance(absl::Minutes(10)); - GenerateDemandPoint(Length(1100), Length(1000)); - - EXPECT_EQ(tracker_.total_skipped().pages, Length(350)); - EXPECT_EQ(tracker_.total_skipped().count, 3); - EXPECT_EQ(tracker_.correctly_skipped().pages, Length(300)); - EXPECT_EQ(tracker_.correctly_skipped().count, 2); - EXPECT_EQ(tracker_.pending_skipped().pages, Length(0)); - EXPECT_EQ(tracker_.pending_skipped().count, 0); -} + Advance(absl::Minutes(30)); -std::vector FillerTest::GenerateInterestingAllocs() { - PAlloc a = Allocate(Length(1)); - EXPECT_EQ(ReleasePages(kMaxValidPages), kPagesPerHugePage - Length(1)); - Delete(a); - // Get the report on the released page - EXPECT_EQ(ReleasePages(kMaxValidPages), Length(1)); + { + // Uses short-term and long-term intervals for skipping subrelease, + // subreleasing all free pages. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + true); + } + Advance(absl::Minutes(30)); - // Use a maximally-suboptimal pattern to get lots of hugepages into the - // filler. - std::vector result; - static_assert(kPagesPerHugePage > Length(7), - "Not enough pages per hugepage!"); - for (auto i = Length(0); i < Length(7); ++i) { - result.push_back(Allocate(kPagesPerHugePage - i - Length(1))); + { + // Uses only short-term interval for skipping subrelease. It correctly + // skips 128 pages. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + false); } - // Get two released hugepages. - EXPECT_EQ(ReleasePages(Length(7)), Length(7)); - EXPECT_EQ(ReleasePages(Length(6)), Length(6)); + Advance(absl::Minutes(30)); - // Fill some of the remaining pages with small allocations. - for (int i = 0; i < 9; ++i) { - result.push_back(Allocate(Length(1))); + { + // Uses only long-term interval for skipping subrelease, subreleased all + // free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + true); } - // Finally, donate one hugepage. - result.push_back(Allocate(Length(1), /*donated=*/true)); - return result; -} + Advance(absl::Minutes(30)); -// Test the output of Print(). This is something of a change-detector test, -// but that's not all bad in this case. -TEST_P(FillerTest, Print) { - if (kPagesPerHugePage != Length(256)) { - // The output is hardcoded on this assumption, and dynamically calculating - // it would be way too much of a pain. - return; + // This captures a corner case: If we hit another peak immediately after a + // subrelease decision (in the same time series epoch), do not count this as + // a correct subrelease decision. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + false); } - auto allocs = GenerateInterestingAllocs(); + + Advance(absl::Minutes(30)); + + // Ensure that the tracker is updated. + auto tiny = Allocate(Length(1)); + Delete(tiny); std::string buffer(1024 * 1024, '\0'); { + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - filler_.Print(&printer, /*everything=*/true); - buffer.erase(printer.SpaceRequired()); + filler_.Print(printer, true); } + buffer.resize(strlen(buffer.c_str())); - EXPECT_THAT( - buffer, - StrEq(R"(HugePageFiller: densely pack small requests into hugepages -HugePageFiller: 8 total, 3 full, 3 partial, 2 released (0 partially), 0 quarantined -HugePageFiller: 261 pages free in 8 hugepages, 0.1274 free -HugePageFiller: among non-fulls, 0.3398 free -HugePageFiller: 499 used pages in subreleased hugepages (0 of them in partially released) -HugePageFiller: 2 hugepages partially released, 0.0254 released -HugePageFiller: 0.7187 of used pages hugepageable -HugePageFiller: Since startup, 269 pages subreleased, 3 hugepages broken, (0 pages, 0 hugepages due to reaching tcmalloc limit) - -HugePageFiller: fullness histograms - -HugePageFiller: # of regular hps with a<= # of free pages (GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } + randomize_density_ = false; + SpanAllocInfo info = {kPagesPerHugePage.raw_num(), + AccessDensityPrediction::kDense}; + + // Firstly, this test generates a peak (long-term demand peak) and waits for + // time interval a. Then, it generates a higher peak plus a short-term + // fluctuation peak, and waits for time interval b. It then generates a trough + // in demand and tries to subrelease. Finally, it waits for time interval c to + // generate the highest peak for evaluating subrelease correctness. Skip + // subrelease selects those demand points using provided time intervals. + const auto demand_pattern = [&](absl::Duration a, absl::Duration b, + absl::Duration c, + SkipSubreleaseIntervals intervals, + bool expected_subrelease) { + const Length N = kPagesPerHugePage; + // First peak: min_demand 3/4N, max_demand 1N. + std::vector peak1a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak1a.empty()); + std::vector peak1b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + Advance(a); + // Second peak: min_demand 0, max_demand 2N. + DeleteVector(peak1a); + DeleteVector(peak1b); -HugePageFiller: time series over 5 min interval + std::vector half = AllocateVectorWithSpanAllocInfo(N / 2, info); + ASSERT_TRUE(!half.empty()); + std::vector tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); + std::vector tiny2 = + AllocateVectorWithSpanAllocInfo(N / 4, half.front().span_alloc_info); -HugePageFiller: realized fragmentation: 0.0 MiB -HugePageFiller: minimum free pages: 0 (0 backed) -HugePageFiller: at peak demand: 1774 pages (and 261 free, 13 unmapped) -HugePageFiller: at peak demand: 8 hps (5 regular, 1 donated, 0 partial, 2 released) -HugePageFiller: at peak hps: 1774 pages (and 261 free, 13 unmapped) -HugePageFiller: at peak hps: 8 hps (5 regular, 1 donated, 0 partial, 2 released) + // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is + // necessary after we delete `half` below, as a half huge page for the + // peak would fill into the gap previously occupied by it. + std::vector peak2a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak2a.empty()); + std::vector peak2b = + AllocateVectorWithSpanAllocInfo(N / 4, peak2a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N); + DeleteVector(peak2a); + DeleteVector(peak2b); + Advance(b); + DeleteVector(half); + EXPECT_EQ(filler_.free_pages(), Length(N / 2)); + // The number of released pages is limited to the number of free pages. + EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), + ReleasePages(10 * N, intervals)); + + Advance(c); + // Third peak: min_demand 1/2N, max_demand (2+1/2)N. + std::vector peak3a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak3a.empty()); + std::vector peak3b = + AllocateVectorWithSpanAllocInfo(N / 4, peak3a.front().span_alloc_info); + + std::vector peak4a = + AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak4a.empty()); + std::vector peak4b = + AllocateVectorWithSpanAllocInfo(N / 4, peak4a.front().span_alloc_info); + + DeleteVector(tiny1); + DeleteVector(tiny2); + DeleteVector(peak3a); + DeleteVector(peak3b); + DeleteVector(peak4a); + DeleteVector(peak4b); + + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(Length(0), ReleasePages(10 * N)); + }; + + { + // Skip subrelease feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, true); + } + + Advance(absl::Minutes(30)); + + { + // Uses short-term and long-term intervals for skipping subrelease. It + // incorrectly skips 128 pages. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + false); + } + + Advance(absl::Minutes(30)); + + { + // Uses short-term and long-term intervals for skipping subrelease, + // subreleasing all free pages. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + true); + } + Advance(absl::Minutes(30)); + + { + // Uses only short-term interval for skipping subrelease. It correctly + // skips 128 pages. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + false); + } + + Advance(absl::Minutes(30)); + + { + // Uses only long-term interval for skipping subrelease, subreleased all + // free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + true); + } + + Advance(absl::Minutes(30)); + + // Repeats the "demand_pattern 9" test using short-term and long-term + // intervals, to show that subrelease decisions are evaluated independently. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + false); + } + + Advance(absl::Minutes(30)); + + // Ensure that the tracker is updated. + auto tiny = Allocate(Length(1)); + Delete(tiny); + + std::string buffer(1024 * 1024, '\0'); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } + buffer.resize(strlen(buffer.c_str())); -HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks. + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: Since the start of the execution, 4 subreleases (511 pages) were skipped due to the sum of short-term (60s) fluctuations and long-term (120s) trends. HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). -HugePageFiller: Subrelease stats last 10 min: total 269 pages subreleased, 3 hugepages broken )")); - for (const auto& alloc : allocs) { - Delete(alloc); - } } -// Test the output of PrintInPbtxt(). This is something of a change-detector -// test, but that's not all bad in this case. -TEST_P(FillerTest, PrintInPbtxt) { +TEST_P(FillerTest, LifetimeTelemetryTest) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. if (kPagesPerHugePage != Length(256)) { - // The output is hardcoded on this assumption, and dynamically calculating - // it would be way too much of a pain. - return; + GTEST_SKIP(); } - auto allocs = GenerateInterestingAllocs(); + + const Length N = kPagesPerHugePage; + SpanAllocInfo info_sparsely_accessed = {1, AccessDensityPrediction::kSparse}; + PAlloc small_alloc = AllocateWithSpanAllocInfo(N / 4, info_sparsely_accessed); + PAlloc large_alloc = + AllocateWithSpanAllocInfo(3 * N / 4, info_sparsely_accessed); std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); { - PbtxtRegion region(&printer, kTop, /*indent=*/0); - filler_.PrintInPbtxt(®ion); - } - buffer.erase(printer.SpaceRequired()); - - EXPECT_THAT(buffer, StrEq(R"( - filler_full_huge_pages: 3 - filler_partial_huge_pages: 3 - filler_released_huge_pages: 2 - filler_partially_released_huge_pages: 0 - filler_free_pages: 261 - filler_used_pages_in_subreleased: 499 - filler_used_pages_in_partial_released: 0 - filler_unmapped_bytes: 0 - filler_hugepageable_used_bytes: 10444800 - filler_num_pages_subreleased: 269 - filler_num_hugepages_broken: 3 - filler_num_pages_subreleased_due_to_limit: 0 - filler_num_hugepages_broken_due_to_limit: 0 - filler_tracker { - type: REGULAR - free_pages_histogram { - lower_bound: 0 - upper_bound: 0 - value: 3 - } - free_pages_histogram { - lower_bound: 1 - upper_bound: 1 - value: 1 - } - free_pages_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - free_pages_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - free_pages_histogram { - lower_bound: 4 - upper_bound: 15 - value: 1 - } - free_pages_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - free_pages_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - free_pages_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - free_pages_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - free_pages_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - free_pages_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - free_pages_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - free_pages_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - free_pages_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - free_pages_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - free_pages_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - free_pages_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - free_pages_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - free_pages_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - free_pages_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - free_pages_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - free_pages_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - free_pages_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - free_pages_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - longest_free_range_histogram { - lower_bound: 0 - upper_bound: 0 - value: 3 - } - longest_free_range_histogram { - lower_bound: 1 - upper_bound: 1 - value: 1 - } - longest_free_range_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - longest_free_range_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - longest_free_range_histogram { - lower_bound: 4 - upper_bound: 15 - value: 1 - } - longest_free_range_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - longest_free_range_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - longest_free_range_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - longest_free_range_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - longest_free_range_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - longest_free_range_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - longest_free_range_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - longest_free_range_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - longest_free_range_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - longest_free_range_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - longest_free_range_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - longest_free_range_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - longest_free_range_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - longest_free_range_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - longest_free_range_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - longest_free_range_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - longest_free_range_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - longest_free_range_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - longest_free_range_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 1 - upper_bound: 1 - value: 1 - } - allocations_histogram { - lower_bound: 2 - upper_bound: 2 - value: 1 - } - allocations_histogram { - lower_bound: 3 - upper_bound: 3 - value: 1 - } - allocations_histogram { - lower_bound: 4 - upper_bound: 4 - value: 2 - } - allocations_histogram { - lower_bound: 5 - upper_bound: 16 - value: 0 - } - allocations_histogram { - lower_bound: 17 - upper_bound: 32 - value: 0 - } - allocations_histogram { - lower_bound: 33 - upper_bound: 48 - value: 0 - } - allocations_histogram { - lower_bound: 49 - upper_bound: 64 - value: 0 - } - allocations_histogram { - lower_bound: 65 - upper_bound: 80 - value: 0 - } - allocations_histogram { - lower_bound: 81 - upper_bound: 96 - value: 0 - } - allocations_histogram { - lower_bound: 97 - upper_bound: 112 - value: 0 - } - allocations_histogram { - lower_bound: 113 - upper_bound: 128 - value: 0 - } - allocations_histogram { - lower_bound: 129 - upper_bound: 144 - value: 0 - } - allocations_histogram { - lower_bound: 145 - upper_bound: 160 - value: 0 - } - allocations_histogram { - lower_bound: 161 - upper_bound: 176 - value: 0 - } - allocations_histogram { - lower_bound: 177 - upper_bound: 192 - value: 0 - } - allocations_histogram { - lower_bound: 193 - upper_bound: 208 - value: 0 - } - allocations_histogram { - lower_bound: 209 - upper_bound: 224 - value: 0 - } - allocations_histogram { - lower_bound: 225 - upper_bound: 240 - value: 0 - } - allocations_histogram { - lower_bound: 241 - upper_bound: 252 - value: 0 - } - allocations_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - allocations_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - allocations_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 256 - upper_bound: 256 - value: 0 - } + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } - filler_tracker { - type: DONATED - free_pages_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - free_pages_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - free_pages_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - free_pages_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - free_pages_histogram { - lower_bound: 4 - upper_bound: 15 - value: 0 - } - free_pages_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - free_pages_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - free_pages_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - free_pages_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - free_pages_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - free_pages_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - free_pages_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - free_pages_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - free_pages_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - free_pages_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - free_pages_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - free_pages_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - free_pages_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - free_pages_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - free_pages_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - free_pages_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - free_pages_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - free_pages_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - free_pages_histogram { - lower_bound: 255 - upper_bound: 255 - value: 1 - } - longest_free_range_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - longest_free_range_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - longest_free_range_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - longest_free_range_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - longest_free_range_histogram { - lower_bound: 4 - upper_bound: 15 - value: 0 - } - longest_free_range_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - longest_free_range_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - longest_free_range_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - longest_free_range_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - longest_free_range_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - longest_free_range_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - longest_free_range_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - longest_free_range_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - longest_free_range_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - longest_free_range_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - longest_free_range_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - longest_free_range_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - longest_free_range_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - longest_free_range_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - longest_free_range_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - longest_free_range_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - longest_free_range_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - longest_free_range_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - longest_free_range_histogram { - lower_bound: 255 - upper_bound: 255 - value: 1 - } - allocations_histogram { - lower_bound: 1 - upper_bound: 1 - value: 1 - } - allocations_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - allocations_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - allocations_histogram { - lower_bound: 4 - upper_bound: 4 - value: 0 - } - allocations_histogram { - lower_bound: 5 - upper_bound: 16 - value: 0 - } - allocations_histogram { - lower_bound: 17 - upper_bound: 32 - value: 0 - } - allocations_histogram { - lower_bound: 33 - upper_bound: 48 - value: 0 - } - allocations_histogram { - lower_bound: 49 - upper_bound: 64 - value: 0 - } - allocations_histogram { - lower_bound: 65 - upper_bound: 80 - value: 0 - } - allocations_histogram { - lower_bound: 81 - upper_bound: 96 - value: 0 - } - allocations_histogram { - lower_bound: 97 - upper_bound: 112 - value: 0 - } - allocations_histogram { - lower_bound: 113 - upper_bound: 128 - value: 0 - } - allocations_histogram { - lower_bound: 129 - upper_bound: 144 - value: 0 - } - allocations_histogram { - lower_bound: 145 - upper_bound: 160 - value: 0 - } - allocations_histogram { - lower_bound: 161 - upper_bound: 176 - value: 0 - } - allocations_histogram { - lower_bound: 177 - upper_bound: 192 - value: 0 - } - allocations_histogram { - lower_bound: 193 - upper_bound: 208 - value: 0 - } - allocations_histogram { - lower_bound: 209 - upper_bound: 224 - value: 0 - } - allocations_histogram { - lower_bound: 225 - upper_bound: 240 - value: 0 - } - allocations_histogram { - lower_bound: 241 - upper_bound: 252 - value: 0 - } - allocations_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - allocations_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - allocations_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 256 - upper_bound: 256 - value: 0 - } + buffer.resize(strlen(buffer.c_str())); + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: # of sparsely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 1 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of donated hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of hps with >= 224 free pages, with different lifetimes. +HugePageFiller: # of sparsely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of donated hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of hps with lifetime >= 100000 ms. +HugePageFiller: # of sparsely-accessed regular hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed regular hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of donated hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of sparsely-accessed partial released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed partial released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of sparsely-accessed released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 +)")); + + Advance(absl::Seconds(101)); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } - filler_tracker { - type: PARTIAL - free_pages_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - free_pages_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - free_pages_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - free_pages_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - free_pages_histogram { - lower_bound: 4 - upper_bound: 15 - value: 0 - } - free_pages_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - free_pages_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - free_pages_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - free_pages_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - free_pages_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - free_pages_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - free_pages_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - free_pages_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - free_pages_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - free_pages_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - free_pages_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - free_pages_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - free_pages_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - free_pages_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - free_pages_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - free_pages_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - free_pages_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - free_pages_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - free_pages_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - longest_free_range_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - longest_free_range_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - longest_free_range_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - longest_free_range_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - longest_free_range_histogram { - lower_bound: 4 - upper_bound: 15 - value: 0 - } - longest_free_range_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - longest_free_range_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - longest_free_range_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - longest_free_range_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - longest_free_range_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - longest_free_range_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - longest_free_range_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - longest_free_range_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - longest_free_range_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - longest_free_range_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - longest_free_range_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - longest_free_range_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - longest_free_range_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - longest_free_range_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - longest_free_range_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - longest_free_range_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - longest_free_range_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - longest_free_range_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - longest_free_range_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - allocations_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - allocations_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - allocations_histogram { - lower_bound: 4 - upper_bound: 4 - value: 0 - } - allocations_histogram { - lower_bound: 5 - upper_bound: 16 - value: 0 - } - allocations_histogram { - lower_bound: 17 - upper_bound: 32 - value: 0 - } - allocations_histogram { - lower_bound: 33 - upper_bound: 48 - value: 0 - } - allocations_histogram { - lower_bound: 49 - upper_bound: 64 - value: 0 - } - allocations_histogram { - lower_bound: 65 - upper_bound: 80 - value: 0 - } - allocations_histogram { - lower_bound: 81 - upper_bound: 96 - value: 0 - } - allocations_histogram { - lower_bound: 97 - upper_bound: 112 - value: 0 - } - allocations_histogram { - lower_bound: 113 - upper_bound: 128 - value: 0 - } - allocations_histogram { - lower_bound: 129 - upper_bound: 144 - value: 0 - } - allocations_histogram { - lower_bound: 145 - upper_bound: 160 - value: 0 - } - allocations_histogram { - lower_bound: 161 - upper_bound: 176 - value: 0 - } - allocations_histogram { - lower_bound: 177 - upper_bound: 192 - value: 0 - } - allocations_histogram { - lower_bound: 193 - upper_bound: 208 - value: 0 - } - allocations_histogram { - lower_bound: 209 - upper_bound: 224 - value: 0 - } - allocations_histogram { - lower_bound: 225 - upper_bound: 240 - value: 0 - } - allocations_histogram { - lower_bound: 241 - upper_bound: 252 - value: 0 - } - allocations_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - allocations_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - allocations_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 256 - upper_bound: 256 - value: 0 - } + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: # of sparsely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 1 < 1000000 ms <= 0 +)")); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: # of hps with >= 224 free pages, with different lifetimes. +HugePageFiller: # of sparsely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 +)")); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: # of hps with lifetime >= 100000 ms. +HugePageFiller: # of sparsely-accessed regular hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 1 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 +)")); + + Delete(small_alloc); + Delete(large_alloc); +} + +TEST_P(FillerTest, SkipSubReleaseDemandPeak) { + // Tests that HugePageFiller can cap filler's short-term long-term + // skip-subrelease mechanism using the demand measured by subrelease + // intervals. + + const Length N = kPagesPerHugePage; + + // We trigger the demand such that short-term + long-term demand exceeds the + // peak demand. We should be able to sub-release memory from the HugeFiller + // up to the peak demand measured in the previous intervals. + + // min_demand = 0.75N, max_demand = 2.5N + std::vector peak1a = AllocateVector(3 * N / 4); + ASSERT_TRUE(!peak1a.empty()); + std::vector peak1b = AllocateVectorWithSpanAllocInfo( + 3 * N / 4, peak1a.front().span_alloc_info); + std::vector half1a = + AllocateVectorWithSpanAllocInfo(N / 2, peak1a.front().span_alloc_info); + std::vector half1b = + AllocateVectorWithSpanAllocInfo(N / 2, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N + N / 2); + Advance(absl::Minutes(1)); + + // min_demand = 2N, max_demand = 2.5N + DeleteVector(half1b); + std::vector half1c = + AllocateVectorWithSpanAllocInfo(N / 2, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N + N / 2); + EXPECT_EQ(filler_.free_pages(), N / 2); + Advance(absl::Minutes(1)); + + // At this point, short-term fluctuation, which is the maximum of the + // difference between max_demand and min_demand in the previous two + // intervals, is equal to 1.75N. Long-term demand, which is the maximum of + // min_demand in the previous two intervals, is 2N. As peak demand of 2.5N is + // lower than 3.75N, we should be able to subrelease 0.5N pages. + EXPECT_EQ(Length(N / 2), + ReleasePages(10 * N, SkipSubreleaseIntervals{ + .short_interval = absl::Minutes(2), + .long_interval = absl::Minutes(2)})); + DeleteVector(peak1a); + DeleteVector(peak1b); + DeleteVector(half1a); + DeleteVector(half1c); +} + +TEST_P(FillerTest, ReportSkipSubreleases) { + // Tests that HugePageFiller reports skipped subreleases using demand + // requirement that is the smaller of two (recent peak and its + // current capacity). This fix makes evaluating skip subrelease more accurate, + // which is useful for cross-comparing performance of different + // skip-subrelease intervals. + + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (kPagesPerHugePage != Length(256)) { + GTEST_SKIP(); } - filler_tracker { - type: RELEASED - free_pages_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - free_pages_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - free_pages_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - free_pages_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - free_pages_histogram { - lower_bound: 4 - upper_bound: 15 - value: 2 - } - free_pages_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - free_pages_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - free_pages_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - free_pages_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - free_pages_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - free_pages_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - free_pages_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - free_pages_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - free_pages_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - free_pages_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - free_pages_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - free_pages_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - free_pages_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - free_pages_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - free_pages_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - free_pages_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - free_pages_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - free_pages_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - free_pages_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - longest_free_range_histogram { - lower_bound: 0 - upper_bound: 0 - value: 0 - } - longest_free_range_histogram { - lower_bound: 1 - upper_bound: 1 - value: 0 - } - longest_free_range_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - longest_free_range_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - longest_free_range_histogram { - lower_bound: 4 - upper_bound: 15 - value: 2 - } - longest_free_range_histogram { - lower_bound: 16 - upper_bound: 31 - value: 0 - } - longest_free_range_histogram { - lower_bound: 32 - upper_bound: 47 - value: 0 - } - longest_free_range_histogram { - lower_bound: 48 - upper_bound: 63 - value: 0 - } - longest_free_range_histogram { - lower_bound: 64 - upper_bound: 79 - value: 0 - } - longest_free_range_histogram { - lower_bound: 80 - upper_bound: 95 - value: 0 - } - longest_free_range_histogram { - lower_bound: 96 - upper_bound: 111 - value: 0 - } - longest_free_range_histogram { - lower_bound: 112 - upper_bound: 127 - value: 0 - } - longest_free_range_histogram { - lower_bound: 128 - upper_bound: 143 - value: 0 - } - longest_free_range_histogram { - lower_bound: 144 - upper_bound: 159 - value: 0 - } - longest_free_range_histogram { - lower_bound: 160 - upper_bound: 175 - value: 0 - } - longest_free_range_histogram { - lower_bound: 176 - upper_bound: 191 - value: 0 - } - longest_free_range_histogram { - lower_bound: 192 - upper_bound: 207 - value: 0 - } - longest_free_range_histogram { - lower_bound: 208 - upper_bound: 223 - value: 0 - } - longest_free_range_histogram { - lower_bound: 224 - upper_bound: 239 - value: 0 - } - longest_free_range_histogram { - lower_bound: 240 - upper_bound: 251 - value: 0 - } - longest_free_range_histogram { - lower_bound: 252 - upper_bound: 252 - value: 0 - } - longest_free_range_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - longest_free_range_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - longest_free_range_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 1 - upper_bound: 1 - value: 2 - } - allocations_histogram { - lower_bound: 2 - upper_bound: 2 - value: 0 - } - allocations_histogram { - lower_bound: 3 - upper_bound: 3 - value: 0 - } - allocations_histogram { - lower_bound: 4 - upper_bound: 4 - value: 0 - } - allocations_histogram { - lower_bound: 5 - upper_bound: 16 - value: 0 - } - allocations_histogram { - lower_bound: 17 - upper_bound: 32 - value: 0 - } - allocations_histogram { - lower_bound: 33 - upper_bound: 48 - value: 0 - } - allocations_histogram { - lower_bound: 49 - upper_bound: 64 - value: 0 - } - allocations_histogram { - lower_bound: 65 - upper_bound: 80 - value: 0 - } - allocations_histogram { - lower_bound: 81 - upper_bound: 96 - value: 0 - } - allocations_histogram { - lower_bound: 97 - upper_bound: 112 - value: 0 - } - allocations_histogram { - lower_bound: 113 - upper_bound: 128 - value: 0 - } - allocations_histogram { - lower_bound: 129 - upper_bound: 144 - value: 0 - } - allocations_histogram { - lower_bound: 145 - upper_bound: 160 - value: 0 - } - allocations_histogram { - lower_bound: 161 - upper_bound: 176 - value: 0 - } - allocations_histogram { - lower_bound: 177 - upper_bound: 192 - value: 0 - } - allocations_histogram { - lower_bound: 193 - upper_bound: 208 - value: 0 - } - allocations_histogram { - lower_bound: 209 - upper_bound: 224 - value: 0 - } - allocations_histogram { - lower_bound: 225 - upper_bound: 240 - value: 0 - } - allocations_histogram { - lower_bound: 241 - upper_bound: 252 - value: 0 - } - allocations_histogram { - lower_bound: 253 - upper_bound: 253 - value: 0 - } - allocations_histogram { - lower_bound: 254 - upper_bound: 254 - value: 0 - } - allocations_histogram { - lower_bound: 255 - upper_bound: 255 - value: 0 - } - allocations_histogram { - lower_bound: 256 - upper_bound: 256 - value: 0 - } + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; } - filler_skipped_subrelease { - skipped_subrelease_interval_ms: 0 - skipped_subrelease_pages: 0 - correctly_skipped_subrelease_pages: 0 - pending_skipped_subrelease_pages: 0 - skipped_subrelease_count: 0 - correctly_skipped_subrelease_count: 0 - pending_skipped_subrelease_count: 0 - } - filler_stats_timeseries { - window_ms: 1000 - epochs: 600 - min_free_pages_interval_ms: 300000 - min_free_pages: 0 - min_free_backed_pages: 0 - measurements { - epoch: 599 - timestamp_ms: 0 - min_free_pages: 0 - min_free_backed_pages: 0 - num_pages_subreleased: 269 - num_hugepages_broken: 3 - at_minimum_demand { - num_pages: 0 - regular_huge_pages: 0 - donated_huge_pages: 0 - partial_released_huge_pages: 0 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 0 - } - at_maximum_demand { - num_pages: 1774 - regular_huge_pages: 5 - donated_huge_pages: 1 - partial_released_huge_pages: 0 - released_huge_pages: 2 - used_pages_in_subreleased_huge_pages: 499 - } - at_minimum_huge_pages { - num_pages: 0 - regular_huge_pages: 0 - donated_huge_pages: 0 - partial_released_huge_pages: 0 - released_huge_pages: 0 - used_pages_in_subreleased_huge_pages: 0 - } - at_maximum_huge_pages { - num_pages: 1774 - regular_huge_pages: 5 - donated_huge_pages: 1 - partial_released_huge_pages: 0 - released_huge_pages: 2 - used_pages_in_subreleased_huge_pages: 499 - } - } + const Length N = kPagesPerHugePage; + // Reports skip subrelease using the recent demand peak (2.5N): it is smaller + // than the total number of pages (3N) when 0.25N free pages are skipped. The + // skipping is correct as the future demand is 2.5N. + std::vector peak1a = AllocateVector(3 * N / 4); + ASSERT_TRUE(!peak1a.empty()); + std::vector peak1b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + std::vector peak2a = AllocateVectorWithSpanAllocInfo( + 3 * N / 4, peak1a.front().span_alloc_info); + std::vector peak2b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + std::vector half1 = + AllocateVectorWithSpanAllocInfo(N / 2, peak1a.front().span_alloc_info); + Advance(absl::Minutes(2)); + DeleteVector(half1); + DeleteVector(peak1b); + DeleteVector(peak2b); + std::vector peak3a = AllocateVectorWithSpanAllocInfo( + 3 * N / 4, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.free_pages(), 3 * N / 4); + // Subreleases 0.5N free pages and skips 0.25N free pages. + EXPECT_EQ(N / 2, + ReleasePages(10 * N, SkipSubreleaseIntervals{ + .short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(3)})); + Advance(absl::Minutes(3)); + std::vector tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N + N / 2); + EXPECT_EQ(filler_.unmapped_pages(), N / 2); + EXPECT_EQ(filler_.free_pages(), Length(0)); + DeleteVector(peak1a); + DeleteVector(peak2a); + DeleteVector(peak3a); + DeleteVector(tiny1); + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + // Accounts for pages that are eagerly unmapped (unmapping_unaccounted_). + EXPECT_EQ(N + N / 2, ReleasePages(10 * N)); + + Advance(absl::Minutes(30)); + + // Reports skip subrelease using HugePageFiller's capacity (N pages): it is + // smaller than the recent peak (2N) when 0.5N pages are skipped. They are + // correctly skipped as the future demand is N. + std::vector peak4a = AllocateVector(3 * N / 4); + ASSERT_TRUE(!peak4a.empty()); + std::vector peak4b = + AllocateVectorWithSpanAllocInfo(N / 4, peak4a.front().span_alloc_info); + std::vector peak5a = AllocateVector(3 * N / 4); + ASSERT_TRUE(!peak5a.empty()); + std::vector peak5b = + AllocateVectorWithSpanAllocInfo(N / 4, peak5a.front().span_alloc_info); + Advance(absl::Minutes(2)); + DeleteVector(peak4a); + DeleteVector(peak4b); + DeleteVector(peak5a); + DeleteVector(peak5b); + std::vector half2 = AllocateVector(N / 2); + EXPECT_EQ(Length(0), + ReleasePages(10 * N, SkipSubreleaseIntervals{ + .short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(3)})); + Advance(absl::Minutes(3)); + std::vector half3 = AllocateVector(N / 2); + DeleteVector(half2); + DeleteVector(half3); + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(Length(0), ReleasePages(10 * N)); + Advance(absl::Minutes(30)); + // Ensures that the tracker is updated. + auto tiny2 = Allocate(Length(1)); + Delete(tiny2); + + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: Since the start of the execution, 2 subreleases (192 pages) were skipped due to the sum of short-term (180s) fluctuations and long-term (180s) trends. +HugePageFiller: 100.0000% of decisions confirmed correct, 0 pending (100.0000% of pages, 0 pending). )")); - for (const auto& alloc : allocs) { - Delete(alloc); +} + +TEST_P(FillerTest, ReportSkipSubreleases_SpansAllocated) { + // Tests that HugePageFiller reports skipped subreleases using demand + // requirement that is the smaller of two (recent peak and its + // current capacity). This fix makes evaluating skip subrelease more accurate, + // which is useful for cross-comparing performance of different + // skip-subrelease intervals. + + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (kPagesPerHugePage != Length(256)) { + GTEST_SKIP(); + } + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for !kSpansAllocated"; + } + randomize_density_ = false; + const Length N = kPagesPerHugePage; + SpanAllocInfo info = {kPagesPerHugePage.raw_num(), + AccessDensityPrediction::kDense}; + // Reports skip subrelease using the recent demand peak (2.5N): it is smaller + // than the total number of pages (3N) when 0.25N free pages are skipped. The + // skipping is correct as the future demand is 2.5N. + std::vector peak1a = AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak1a.empty()); + std::vector peak1b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + std::vector peak2a = AllocateVectorWithSpanAllocInfo( + 3 * N / 4, peak1a.front().span_alloc_info); + std::vector peak2b = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + std::vector half1 = + AllocateVectorWithSpanAllocInfo(N / 2, peak1a.front().span_alloc_info); + Advance(absl::Minutes(2)); + DeleteVector(half1); + DeleteVector(peak1b); + DeleteVector(peak2b); + std::vector peak3a = AllocateVectorWithSpanAllocInfo( + 3 * N / 4, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.free_pages(), 3 * N / 4); + // Subreleases 0.75N free pages. + EXPECT_EQ(3 * N / 4, + ReleasePages(10 * N, SkipSubreleaseIntervals{ + .short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)})); + Advance(absl::Minutes(3)); + std::vector tiny1 = + AllocateVectorWithSpanAllocInfo(N / 4, peak1a.front().span_alloc_info); + EXPECT_EQ(filler_.used_pages(), 2 * N + N / 2); + EXPECT_EQ(filler_.unmapped_pages(), N / 2); + EXPECT_EQ(filler_.free_pages(), Length(0)); + DeleteVector(peak1a); + DeleteVector(peak2a); + DeleteVector(peak3a); + DeleteVector(tiny1); + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + // Accounts for pages that are eagerly unmapped (unmapping_unaccounted_). + EXPECT_EQ(N / 2, ReleasePages(10 * N)); + + Advance(absl::Minutes(30)); + + // Reports skip subrelease using HugePageFiller's capacity (N pages): it is + // smaller than the recent peak (2N) when 0.5N pages are skipped. They are + // correctly skipped as the future demand is N. + std::vector peak4a = AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak4a.empty()); + std::vector peak4b = + AllocateVectorWithSpanAllocInfo(N / 4, peak4a.front().span_alloc_info); + std::vector peak5a = AllocateVectorWithSpanAllocInfo(3 * N / 4, info); + ASSERT_TRUE(!peak5a.empty()); + std::vector peak5b = + AllocateVectorWithSpanAllocInfo(N / 4, peak5a.front().span_alloc_info); + Advance(absl::Minutes(2)); + DeleteVector(peak4a); + DeleteVector(peak4b); + DeleteVector(peak5a); + DeleteVector(peak5b); + std::vector half2 = AllocateVectorWithSpanAllocInfo(N / 2, info); + EXPECT_EQ(Length(0), + ReleasePages(10 * N, SkipSubreleaseIntervals{ + .short_interval = absl::Seconds(1), + .long_interval = absl::Seconds(1)})); + Advance(absl::Minutes(3)); + std::vector half3 = AllocateVectorWithSpanAllocInfo(N / 2, info); + DeleteVector(half2); + DeleteVector(half3); + EXPECT_EQ(filler_.used_pages(), Length(0)); + EXPECT_EQ(filler_.unmapped_pages(), Length(0)); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(Length(0), ReleasePages(10 * N)); + Advance(absl::Minutes(30)); + // Ensures that the tracker is updated. + auto tiny2 = Allocate(Length(1)); + Delete(tiny2); + + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, true); + } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugePageFiller: Since the start of the execution, 2 subreleases (191 pages) were skipped due to the sum of short-term (1s) fluctuations and long-term (1s) trends. +HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). +)")); +} + +std::vector FillerTest::GenerateInterestingAllocs() { + SpanAllocInfo info_sparsely_accessed = {1, AccessDensityPrediction::kSparse}; + SpanAllocInfo info_densely_accessed = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + PAlloc a = AllocateWithSpanAllocInfo(Length(1), info_sparsely_accessed); + EXPECT_EQ(ReleasePages(kMaxValidPages), kPagesPerHugePage - Length(1)); + Delete(a); + // Get the report on the released page + EXPECT_EQ(ReleasePages(kMaxValidPages), Length(1)); + + // Use a maximally-suboptimal pattern to get lots of hugepages into the + // filler. + std::vector result; + static_assert(kPagesPerHugePage > Length(7), + "Not enough pages per hugepage!"); + for (auto i = Length(0); i < Length(7); ++i) { + std::vector temp = AllocateVectorWithSpanAllocInfo( + kPagesPerHugePage - i - Length(1), info_sparsely_accessed); + result.insert(result.end(), temp.begin(), temp.end()); + temp = AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - i - Length(1), + info_densely_accessed); + result.insert(result.end(), temp.begin(), temp.end()); + } + + // Get released hugepages. + Length l = ReleasePages(Length(7)); + EXPECT_TRUE(l == Length(7) || l == Length(28)); + l = ReleasePages(Length(7)); + EXPECT_EQ(l, Length(7)); + l = ReleasePages(Length(6)); + EXPECT_EQ(l, Length(6)); + l = ReleasePages(Length(6)); + EXPECT_TRUE(l == Length(6) || l == Length(9)); + + // Fill some of the remaining pages with small allocations. + for (int i = 0; i < 9; ++i) { + result.push_back( + AllocateWithSpanAllocInfo(Length(1), info_sparsely_accessed)); + result.push_back( + AllocateWithSpanAllocInfo(Length(1), info_densely_accessed)); } + + // Finally, donate one hugepage. + result.push_back(AllocateWithSpanAllocInfo(Length(1), info_sparsely_accessed, + /*donated=*/true)); + return result; } // Testing subrelase stats: ensure that the cumulative number of released // pages and broken hugepages is no less than those of the last 10 mins TEST_P(FillerTest, CheckSubreleaseStats) { + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } // Get lots of hugepages into the filler. Advance(absl::Minutes(1)); - std::vector result; + std::vector> result; static_assert(kPagesPerHugePage > Length(10), "Not enough pages per hugepage!"); + // Fix the object count since very specific statistics are being tested. + const AccessDensityPrediction kDensity = + absl::Bernoulli(gen_, 0.5) ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense; + const size_t kObjects = (1 << absl::Uniform(gen_, 0, 8)); + const SpanAllocInfo kAllocInfo = {kObjects, kDensity}; + for (int i = 0; i < 10; ++i) { - result.push_back(Allocate(kPagesPerHugePage - Length(i + 1))); + result.push_back(AllocateVectorWithSpanAllocInfo( + kPagesPerHugePage - Length(i + 1), kAllocInfo)); } // Breaking up 2 hugepages, releasing 19 pages due to reaching limit, @@ -3531,6 +3038,7 @@ TEST_P(FillerTest, CheckSubreleaseStats) { Advance(absl::Minutes(1)); SubreleaseStats subrelease = filler_.subrelease_stats(); EXPECT_EQ(subrelease.total_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 0); EXPECT_EQ(subrelease.num_pages_subreleased, Length(19)); EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 2); @@ -3539,10 +3047,11 @@ TEST_P(FillerTest, CheckSubreleaseStats) { // Do some work so that the timeseries updates its stats for (int i = 0; i < 5; ++i) { - result.push_back(Allocate(Length(1))); + result.push_back(AllocateVectorWithSpanAllocInfo(Length(1), kAllocInfo)); } subrelease = filler_.subrelease_stats(); EXPECT_EQ(subrelease.total_pages_subreleased, Length(19)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2); EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0); @@ -3556,6 +3065,7 @@ TEST_P(FillerTest, CheckSubreleaseStats) { subrelease = filler_.subrelease_stats(); EXPECT_EQ(subrelease.total_pages_subreleased, Length(19)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2); EXPECT_EQ(subrelease.num_pages_subreleased, Length(21)); EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 3); @@ -3565,10 +3075,11 @@ TEST_P(FillerTest, CheckSubreleaseStats) { Advance(absl::Minutes(10)); // This forces timeseries to wrap // Do some work for (int i = 0; i < 5; ++i) { - result.push_back(Allocate(Length(1))); + result.push_back(AllocateVectorWithSpanAllocInfo(Length(1), kAllocInfo)); } subrelease = filler_.subrelease_stats(); EXPECT_EQ(subrelease.total_pages_subreleased, Length(40)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 5); EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0); @@ -3577,8 +3088,9 @@ TEST_P(FillerTest, CheckSubreleaseStats) { std::string buffer(1024 * 1024, '\0'); { + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - filler_.Print(&printer, /*everything=*/true); + filler_.Print(printer, /*everything=*/true); buffer.erase(printer.SpaceRequired()); } @@ -3590,10 +3102,119 @@ TEST_P(FillerTest, CheckSubreleaseStats) { "limit)")); ASSERT_THAT(buffer, testing::EndsWith( "HugePageFiller: Subrelease stats last 10 min: total " - "21 pages subreleased, 3 hugepages broken\n")); + "21 pages subreleased (0 pages from partial allocs), " + "3 hugepages broken\n")); for (const auto& alloc : result) { - Delete(alloc); + DeleteVector(alloc); + } +} + +TEST_P(FillerTest, CheckSubreleaseStats_SpansAllocated) { + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for !kSpansAllocated"; + } + randomize_density_ = false; + // Get lots of hugepages into the filler. + Advance(absl::Minutes(1)); + std::vector> result; + std::vector> temporary; + static_assert(kPagesPerHugePage > Length(10), + "Not enough pages per hugepage!"); + // Fix the object count since very specific statistics are being tested. + const AccessDensityPrediction kDensity = + absl::Bernoulli(gen_, 0.5) ? AccessDensityPrediction::kSparse + : AccessDensityPrediction::kDense; + const size_t kObjects = (1 << absl::Uniform(gen_, 0, 8)); + const SpanAllocInfo kAllocInfo = {kObjects, kDensity}; + + for (int i = 0; i < 10; ++i) { + result.push_back(AllocateVectorWithSpanAllocInfo( + kPagesPerHugePage - Length(i + 1), kAllocInfo)); + temporary.push_back( + AllocateVectorWithSpanAllocInfo(Length(i + 1), kAllocInfo)); + } + for (const auto& alloc : temporary) { + DeleteVector(alloc); + } + + // Breaking up 2 hugepages, releasing 19 pages due to reaching limit, + EXPECT_EQ(HardReleasePages(Length(10)), Length(10)); + EXPECT_EQ(HardReleasePages(Length(9)), Length(9)); + + Advance(absl::Minutes(1)); + SubreleaseStats subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.total_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 0); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(19)); + EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 2); + EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19)); + EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2); + + // Do some work so that the timeseries updates its stats + for (int i = 0; i < 5; ++i) { + result.push_back(AllocateVectorWithSpanAllocInfo(Length(1), kAllocInfo)); + } + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.total_pages_subreleased, Length(19)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0); + EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19)); + EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2); + + // Breaking up 3 hugepages, releasing 21 pages (background thread) + EXPECT_EQ(ReleasePages(Length(8)), Length(8)); + EXPECT_EQ(ReleasePages(Length(7)), Length(7)); + EXPECT_EQ(ReleasePages(Length(6)), Length(6)); + + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.total_pages_subreleased, Length(19)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(21)); + EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 3); + EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19)); + EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2); + + Advance(absl::Minutes(10)); // This forces timeseries to wrap + // Do some work + for (int i = 0; i < 5; ++i) { + result.push_back(AllocateVectorWithSpanAllocInfo(Length(1), kAllocInfo)); + } + subrelease = filler_.subrelease_stats(); + EXPECT_EQ(subrelease.total_pages_subreleased, Length(40)); + EXPECT_EQ(subrelease.total_partial_alloc_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 5); + EXPECT_EQ(subrelease.num_pages_subreleased, Length(0)); + EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0); + EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19)); + EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2); + + std::string buffer(1024 * 1024, '\0'); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, /*everything=*/true); + buffer.erase(printer.SpaceRequired()); + } + + ASSERT_THAT( + buffer, + testing::HasSubstr( + "HugePageFiller: Since startup, 40 pages subreleased, 5 hugepages " + "broken, (19 pages, 2 hugepages due to reaching tcmalloc " + "limit)")); + ASSERT_THAT(buffer, testing::EndsWith( + "HugePageFiller: Subrelease stats last 10 min: total " + "21 pages subreleased (0 pages from partial allocs), " + "3 hugepages broken\n")); + + for (const auto& alloc : result) { + DeleteVector(alloc); } } @@ -3613,8 +3234,12 @@ TEST_P(FillerTest, ConstantBrokenHugePages) { auto size = Length(absl::Uniform(rng, 2, kPagesPerHugePage.raw_num() - 1)); alloc_small.push_back(Allocate(Length(1))); - alloc.push_back(Allocate(size - Length(1))); - dead.push_back(Allocate(kPagesPerHugePage - size)); + SpanAllocInfo info = alloc_small.back().span_alloc_info; + std::vector temp = + AllocateVectorWithSpanAllocInfo(size - Length(1), info); + alloc.insert(alloc.end(), temp.begin(), temp.end()); + temp = AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - size, info); + dead.insert(dead.end(), temp.begin(), temp.end()); } ASSERT_EQ(filler_.size(), kHugePages); @@ -3629,8 +3254,9 @@ TEST_P(FillerTest, ConstantBrokenHugePages) { std::string buffer(1024 * 1024, '\0'); { + PageHeapSpinLockHolder l; Printer printer(&*buffer.begin(), buffer.size()); - filler_.Print(&printer, /*everything=*/false); + filler_.Print(printer, /*everything=*/false); buffer.erase(printer.SpaceRequired()); } @@ -3664,29 +3290,28 @@ TEST_P(FillerTest, ConstantBrokenHugePages) { TEST_P(FillerTest, CheckBufferSize) { const int kEpochs = 600; const absl::Duration kEpochLength = absl::Seconds(1); - - PAlloc big = Allocate(kPagesPerHugePage - Length(4)); + std::vector big = AllocateVector(kPagesPerHugePage - Length(4)); for (int i = 0; i < kEpochs; i += 2) { - auto tiny = Allocate(Length(2)); + auto tiny = AllocateVector(Length(2)); Advance(kEpochLength); - Delete(tiny); + DeleteVector(tiny); Advance(kEpochLength); } - Delete(big); + DeleteVector(big); std::string buffer(1024 * 1024, '\0'); Printer printer(&*buffer.begin(), buffer.size()); { - PbtxtRegion region(&printer, kTop, /*indent=*/0); - filler_.PrintInPbtxt(®ion); + PageHeapSpinLockHolder l; + PbtxtRegion region(printer, kTop); + filler_.PrintInPbtxt(region); } // We assume a maximum buffer size of 1 MiB. When increasing this size, ensure // that all places processing mallocz protos get updated as well. size_t buffer_size = printer.SpaceRequired(); - printf("HugePageFiller buffer size: %zu\n", buffer_size); EXPECT_LE(buffer_size, 1024 * 1024); } @@ -3699,32 +3324,34 @@ TEST_P(FillerTest, ReleasePriority) { // We will ensure that we fill full huge pages, then deallocate some parts of // those to provide space for subrelease. absl::BitGen rng; - std::vector alloc; + std::vector> alloc; alloc.reserve(kHugePages.raw_num()); - std::vector dead; + std::vector> dead; dead.reserve(kHugePages.raw_num()); - absl::flat_hash_set unique_pages; + absl::flat_hash_set unique_pages; unique_pages.reserve(kHugePages.raw_num()); for (HugeLength i; i < kHugePages; ++i) { Length size(absl::Uniform(rng, 1, kPagesPerHugePage.raw_num() - 1)); - PAlloc a = Allocate(size); - unique_pages.insert(a.pt); + std::vector a = AllocateVector(size); + ASSERT_TRUE(!a.empty()); + for (const auto& pa : a) unique_pages.insert(pa.pt); alloc.push_back(a); - dead.push_back(Allocate(kPagesPerHugePage - size)); + dead.push_back(AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - size, + a.front().span_alloc_info)); } ASSERT_EQ(filler_.size(), kHugePages); for (auto& a : dead) { - Delete(a); + DeleteVector(a); } // As of 5/2020, our release priority is to subrelease huge pages with the // fewest used pages. Bucket unique_pages by that used_pages(). - std::vector> ordered(kPagesPerHugePage.raw_num()); + std::vector> ordered(kPagesPerHugePage.raw_num()); for (auto* pt : unique_pages) { // None of these should be released yet. EXPECT_FALSE(pt->released()); @@ -3786,13 +3413,883 @@ TEST_P(FillerTest, ReleasePriority) { } for (auto& a : alloc) { + DeleteVector(a); + } +} + +TEST_P(FillerTest, b258965495) { + // 1 huge page: 2 pages allocated, kPagesPerHugePage-2 free, 0 released + auto a1 = AllocateVector(Length(2)); + ASSERT_TRUE(!a1.empty()); + EXPECT_EQ(filler_.size(), NHugePages(1)); + + ASSERT_TRUE(blocking_unback_.success_); + // 1 huge page: 2 pages allocated, 0 free, kPagesPerHugePage-2 released + EXPECT_EQ(HardReleasePages(kPagesPerHugePage), kPagesPerHugePage - Length(2)); + + blocking_unback_.success_ = false; + // 1 huge page: 3 pages allocated, 0 free, kPagesPerHugePage-3 released + auto a2 = AllocateWithSpanAllocInfo(Length(1), a1.front().span_alloc_info); + EXPECT_EQ(filler_.size(), NHugePages(1)); + // Even if PartialRerelease::Return, returning a2 fails, so a2's pages stay + // freed rather than released. + // + // 1 huge page: 2 pages allocated, 1 free, kPagesPerHugePage-3 released + Delete(a2); + + blocking_unback_.success_ = true; + // During the deallocation of a1 under PartialRerelease::Return, but before we + // mark the pages as free (PageTracker::MaybeRelease), we have: + // + // 1 huge page: 2 pages allocated, 1 free, kPagesPerHugePage-1 released + // + // The page appears fully (free_pages() <= released_pages()), rather than + // partially released, so we look for it on the wrong list. + DeleteVector(a1); +} + +TEST_P(FillerTest, CheckFillerStats) { + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } + if (kPagesPerHugePage != Length(256)) { + // The output is hardcoded on this assumption, and dynamically calculating + // it would be way too much of a pain. + return; + } + // We prevent randomly choosing the number of objects per span since this + // test has hardcoded output which will change if the objects per span are + // chosen at random. + randomize_density_ = false; + auto allocs = GenerateInterestingAllocs(); + + const HugePageFillerStats stats = filler_.GetStats(); + for (int i = 0; i < AccessDensityPrediction::kPredictionCounts; ++i) { + EXPECT_GE(stats.n_fully_released[i].raw_num(), 0); + } + // Check sparsely-accessed filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kSparse].raw_num(), + 2); + EXPECT_EQ(stats.n_released[AccessDensityPrediction::kSparse].raw_num(), 2); + EXPECT_EQ( + stats.n_partial_released[AccessDensityPrediction::kSparse].raw_num(), 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kSparse].raw_num(), 8); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kSparse].raw_num(), 3); + EXPECT_EQ(stats.n_partial[AccessDensityPrediction::kSparse].raw_num(), 3); + + // Check densely-accessed filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kDense].raw_num(), + 2); + EXPECT_EQ(stats.n_released[AccessDensityPrediction::kDense].raw_num(), 2); + EXPECT_EQ(stats.n_partial_released[AccessDensityPrediction::kDense].raw_num(), + 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kDense].raw_num(), 7); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kDense].raw_num(), 3); + EXPECT_EQ(stats.n_partial[AccessDensityPrediction::kDense].raw_num(), 2); + + // Check total filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kPredictionCounts] + .raw_num(), + 4); + EXPECT_EQ( + stats.n_released[AccessDensityPrediction::kPredictionCounts].raw_num(), + 4); + EXPECT_EQ(stats.n_partial_released[AccessDensityPrediction::kPredictionCounts] + .raw_num(), + 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kPredictionCounts].raw_num(), + 15); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kPredictionCounts].raw_num(), + 6); + EXPECT_EQ( + stats.n_partial[AccessDensityPrediction::kPredictionCounts].raw_num(), 5); + + for (const auto& alloc : allocs) { + Delete(alloc); + } +} + +TEST_P(FillerTest, CheckFillerStats_SpansAllocated) { + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for !kSpansAllocated"; + } + if (kPagesPerHugePage != Length(256)) { + // The output is hardcoded on this assumption, and dynamically calculating + // it would be way too much of a pain. + return; + } + // We prevent randomly choosing the number of objects per span since this + // test has hardcoded output which will change if the objects per span are + // chosen at random. + randomize_density_ = false; + auto allocs = GenerateInterestingAllocs(); + + const HugePageFillerStats stats = filler_.GetStats(); + for (int i = 0; i < AccessDensityPrediction::kPredictionCounts; ++i) { + EXPECT_GE(stats.n_fully_released[i].raw_num(), 0); + } + // Check sparsely-accessed filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kSparse].raw_num(), + 4); + EXPECT_EQ(stats.n_released[AccessDensityPrediction::kSparse].raw_num(), 4); + EXPECT_EQ( + stats.n_partial_released[AccessDensityPrediction::kSparse].raw_num(), 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kSparse].raw_num(), 8); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kSparse].raw_num(), 3); + EXPECT_EQ(stats.n_partial[AccessDensityPrediction::kSparse].raw_num(), 1); + + // Check densely-accessed filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kDense].raw_num(), + 1); + EXPECT_EQ(stats.n_released[AccessDensityPrediction::kDense].raw_num(), 1); + EXPECT_EQ(stats.n_partial_released[AccessDensityPrediction::kDense].raw_num(), + 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kDense].raw_num(), 7); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kDense].raw_num(), 6); + EXPECT_EQ(stats.n_partial[AccessDensityPrediction::kDense].raw_num(), 0); + + // Check total filler stats. + EXPECT_EQ(stats.n_fully_released[AccessDensityPrediction::kPredictionCounts] + .raw_num(), + 5); + EXPECT_EQ( + stats.n_released[AccessDensityPrediction::kPredictionCounts].raw_num(), + 5); + EXPECT_EQ(stats.n_partial_released[AccessDensityPrediction::kPredictionCounts] + .raw_num(), + 0); + EXPECT_EQ(stats.n_total[AccessDensityPrediction::kPredictionCounts].raw_num(), + 15); + EXPECT_EQ(stats.n_full[AccessDensityPrediction::kPredictionCounts].raw_num(), + 9); + EXPECT_EQ( + stats.n_partial[AccessDensityPrediction::kPredictionCounts].raw_num(), 1); + + for (const auto& alloc : allocs) { + Delete(alloc); + } +} + +// Test the output of Print(). This is something of a change-detector test, +// but that's not all bad in this case. +TEST_P(FillerTest, Print) { + // Skip test for kSpansAllocated since the output is hard coded. + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } + if (kPagesPerHugePage != Length(256)) { + // The output is hardcoded on this assumption, and dynamically calculating + // it would be way too much of a pain. + return; + } + // We prevent randomly choosing the number of objects per span since this + // test has hardcoded output which will change if the objects per span are + // chosen at random. + randomize_density_ = false; + auto allocs = GenerateInterestingAllocs(); + + std::string buffer(1024 * 1024, '\0'); + { + PageHeapSpinLockHolder l; + Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(printer, /*everything=*/true); + buffer.erase(printer.SpaceRequired()); + } + + EXPECT_THAT( + buffer, + StrEq(R"(HugePageFiller: densely pack small requests into hugepages +HugePageFiller: Overall, 15 total, 6 full, 5 partial, 4 released (0 partially), 0 quarantined +HugePageFiller: those with sparsely-accessed spans, 8 total, 3 full, 3 partial, 2 released (0 partially), 0 quarantined +HugePageFiller: those with densely-accessed spans, 7 total, 3 full, 2 partial, 2 released (0 partially), 0 quarantined +HugePageFiller: 267 pages free in 15 hugepages, 0.0695 free +HugePageFiller: among non-fulls, 0.2086 free +HugePageFiller: 998 used pages in subreleased hugepages (0 of them in partially released) +HugePageFiller: 4 hugepages partially released, 0.0254 released +HugePageFiller: 0.7186 of used pages hugepageable +HugePageFiller: Since startup, 282 pages subreleased, 5 hugepages broken, (0 pages, 0 hugepages due to reaching tcmalloc limit) +HugePageFiller: 0 hugepages became full after being previously released, out of which 0 pages are hugepage backed. + +HugePageFiller: fullness histograms + +HugePageFiller: # of sparsely-accessed regular hps with a<= # of free pages = 224 free pages, with different lifetimes. +HugePageFiller: # of sparsely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed regular hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of donated hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 1 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed partial released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of sparsely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of densely-accessed released hps with lifetime a <= # hps < b +HugePageFiller: < 0 ms <= 0 < 1 ms <= 0 < 10 ms <= 0 < 100 ms <= 0 < 1000 ms <= 0 < 10000 ms <= 0 +HugePageFiller: < 100000 ms <= 0 < 1000000 ms <= 0 + +HugePageFiller: # of hps with lifetime >= 100000 ms. +HugePageFiller: # of sparsely-accessed regular hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed regular hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of donated hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of sparsely-accessed partial released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed partial released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of sparsely-accessed released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: # of densely-accessed released hps with a <= # of allocations < b +HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 +HugePageFiller: < 6<= 0 < 7<= 0 < 8<= 0 < 16<= 0 < 32<= 0 < 48<= 0 +HugePageFiller: < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0 <128<= 0 <144<= 0 +HugePageFiller: <160<= 0 <176<= 0 <192<= 0 <208<= 0 <224<= 0 <240<= 0 +HugePageFiller: <248<= 0 <249<= 0 <250<= 0 <251<= 0 <252<= 0 <253<= 0 +HugePageFiller: <254<= 0 <255<= 0 + +HugePageFiller: 0 of sparsely-accessed regular pages hugepage backed out of 5. +HugePageFiller: 0 of densely-accessed regular pages hugepage backed out of 5. +HugePageFiller: 0 of donated pages hugepage backed out of 1. +HugePageFiller: 0 of sparsely-accessed partial released pages hugepage backed out of 0. +HugePageFiller: 0 of densely-accessed partial released pages hugepage backed out of 0. +HugePageFiller: 0 of sparsely-accessed released pages hugepage backed out of 2. +HugePageFiller: 0 of densely-accessed released pages hugepage backed out of 2. + +HugePageFiller: time series over 5 min interval + +HugePageFiller: realized fragmentation: 0.0 MiB +HugePageFiller: minimum free pages: 0 (0 backed) +HugePageFiller: at peak demand: 3547 pages (and 267 free, 26 unmapped) +HugePageFiller: at peak demand: 15 hps (10 regular, 1 donated, 0 partial, 4 released) + +HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to the sum of short-term (0s) fluctuations and long-term (0s) trends. +HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). +HugePageFiller: Subrelease stats last 10 min: total 282 pages subreleased (0 pages from partial allocs), 5 hugepages broken +)")); + + absl::flat_hash_set expected_pts, actual_pts; + for (const auto& alloc : allocs) { + expected_pts.insert(alloc.pt); + } + actual_pts.reserve(expected_pts.size()); + + bool dupe_seen = false; + { + PageHeapSpinLockHolder l; + filler_.ForEachHugePage([&](const PageTracker& pt) { + // We are holding the page heap lock, so refrain from allocating + // (including using Google Test helpers). + dupe_seen = dupe_seen || actual_pts.contains(&pt); + + if (actual_pts.size() == actual_pts.capacity()) { + return; + } + + TC_CHECK(actual_pts.insert(&pt).second); + }); + } + EXPECT_FALSE(dupe_seen); + EXPECT_THAT(actual_pts, Eq(expected_pts)); + + for (const auto& alloc : allocs) { + Delete(alloc); + } +} + +// Test Get and Put operations on the filler work correctly when number of +// objects are provided. We expect that Get requests with sparsely-accessed +// and densely-accessed spans are satisfied by their respective allocs. +TEST_P(FillerTest, GetsAndPuts) { + randomize_density_ = false; + absl::BitGen rng; + std::vector sparsely_accessed_allocs; + std::vector densely_accessed_allocs; + SpanAllocInfo sparsely_accessed_info = {1, AccessDensityPrediction::kSparse}; + SpanAllocInfo densely_accessed_info = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + static const HugeLength kNumHugePages = NHugePages(64); + for (auto i = Length(0); i < kNumHugePages.in_pages(); ++i) { + ASSERT_EQ(filler_.pages_allocated(), i); + // Randomly select whether the next span should be sparsely-accessed or + // densely-accessed. + if (absl::Bernoulli(rng, 0.5)) { + sparsely_accessed_allocs.push_back( + AllocateWithSpanAllocInfo(Length(1), sparsely_accessed_info)); + EXPECT_EQ( + filler_.pages_allocated(AccessDensityPrediction::kSparse).raw_num(), + sparsely_accessed_allocs.size()); + } else { + densely_accessed_allocs.push_back( + AllocateWithSpanAllocInfo(Length(1), densely_accessed_info)); + EXPECT_EQ( + filler_.pages_allocated(AccessDensityPrediction::kDense).raw_num(), + densely_accessed_allocs.size()); + } + } + EXPECT_GE(filler_.size(), kNumHugePages); + EXPECT_LE(filler_.size(), kNumHugePages + NHugePages(1)); + // clean up, check for failures + for (auto a : densely_accessed_allocs) { + Delete(a); + } + ASSERT_EQ(filler_.pages_allocated(AccessDensityPrediction::kDense), + Length(0)); + for (auto a : sparsely_accessed_allocs) { Delete(a); } + ASSERT_EQ(filler_.pages_allocated(AccessDensityPrediction::kSparse), + Length(0)); + ASSERT_EQ(filler_.pages_allocated(), Length(0)); +} + +// Test that filler tries to release pages from the sparsely-accessed allocs +// before attempting to release pages from the densely-accessed allocs. +TEST_P(FillerTest, ReleasePrioritySparseAndDenseAllocs) { + randomize_density_ = false; + const Length N = kPagesPerHugePage; + const Length kToBeReleased(4); + SpanAllocInfo sparsely_accessed_info = {1, AccessDensityPrediction::kSparse}; + auto sparsely_accessed_alloc = AllocateVectorWithSpanAllocInfo( + N - kToBeReleased, sparsely_accessed_info); + ASSERT_EQ(sparsely_accessed_alloc.size(), 1); + SpanAllocInfo densely_accessed_info = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + auto densely_accessed_alloc = + AllocateVectorWithSpanAllocInfo(N - kToBeReleased, densely_accessed_info); + for (auto a : densely_accessed_alloc) { + ASSERT_EQ(a.pt, densely_accessed_alloc.front().pt); + } + EXPECT_EQ(ReleasePages(Length(1)), kToBeReleased); + auto get_released_pages = [&](const std::vector& alloc) { + return alloc.front().pt->released_pages(); + }; + EXPECT_EQ(get_released_pages(sparsely_accessed_alloc), kToBeReleased); + EXPECT_EQ(get_released_pages(densely_accessed_alloc), Length(0)); + EXPECT_EQ(ReleasePages(Length(1)), kToBeReleased); + EXPECT_EQ(get_released_pages(densely_accessed_alloc), kToBeReleased); + EXPECT_EQ(get_released_pages(sparsely_accessed_alloc), kToBeReleased); + DeleteVector(sparsely_accessed_alloc); + DeleteVector(densely_accessed_alloc); +} + +// Repeatedly grow from FLAG_bytes to FLAG_bytes * growth factor, then shrink +// back down by random deletion. Then release partial hugepages until +// pageheap is bounded by some fraction of usage. Measure the blowup in VSS +// footprint. +TEST_P(FillerTest, BoundedVSS) { + randomize_density_ = false; + absl::BitGen rng; + const Length baseline = LengthFromBytes(absl::GetFlag(FLAGS_bytes)); + const Length peak = baseline * absl::GetFlag(FLAGS_growth_factor); + + std::vector allocs; + while (filler_.used_pages() < baseline) { + allocs.push_back(Allocate(Length(1))); + } + EXPECT_EQ(filler_.pages_allocated().raw_num(), allocs.size()); + + for (int i = 0; i < 10; ++i) { + while (filler_.used_pages() < peak) { + allocs.push_back(Allocate(Length(1))); + } + std::shuffle(allocs.begin(), allocs.end(), rng); + size_t limit = allocs.size(); + while (filler_.used_pages() > baseline) { + --limit; + Delete(allocs[limit]); + } + allocs.resize(limit); + ReleasePages(kMaxValidPages); + // Compare the total size of the hugepages in the filler and the allocated + // pages. + EXPECT_LE(filler_.size().in_bytes(), + 2 * filler_.pages_allocated().in_bytes()); + } + while (!allocs.empty()) { + Delete(allocs.back()); + allocs.pop_back(); + } +} + +// In b/265337869, we observed failures in the huge_page_filler due to mixing +// of hugepages between sparsely-accessed and densely-accessed allocs. The test +// below reproduces the buggy situation. +TEST_P(FillerTest, CounterUnderflow) { + randomize_density_ = false; + const Length N = kPagesPerHugePage; + const Length kToBeReleased(kPagesPerHugePage / 2 + Length(1)); + // First allocate a densely-accessed span, then release the remaining pages on + // the hugepage. This would move the hugepage to + // regular_alloc_partial_released_. + SpanAllocInfo densely_accessed_info = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + auto densely_accessed_alloc = + AllocateVectorWithSpanAllocInfo(N - kToBeReleased, densely_accessed_info); + EXPECT_EQ(ReleasePages(Length(kToBeReleased)), kToBeReleased); + // Then allocate a sparsely-accessed objects span. The previous hugepage + // should not be used since while allocating a sparsely-accessed objects span, + // we do not check densely-accessed alloc. + SpanAllocInfo sparsely_accessed_info = {1, AccessDensityPrediction::kSparse}; + auto sparsely_accessed_alloc = AllocateVectorWithSpanAllocInfo( + Length(kToBeReleased), sparsely_accessed_info); + for (const auto& a1 : sparsely_accessed_alloc) { + for (const auto& a2 : densely_accessed_alloc) { + EXPECT_NE(a1.pt, a2.pt); + } + } + DeleteVector(sparsely_accessed_alloc); + DeleteVector(densely_accessed_alloc); +} + +// In b/270916852, we observed that the huge_page_filler may fail to release +// memory when densely-accessed alloc is being used. This is due to the +// presence of partially released and fully released pages in densely-accessed +// alloc. The comparator in use does not make correct choices in presence of +// such hugepages. The test below reproduces the buggy situation. +TEST_P(FillerTest, ReleasePagesFromDenseAlloc) { + // Skip test for kSpansAllocated since the test assumes hugepages can be + // partially allocated. + if (std::get<0>(GetParam()) == + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for kSpansAllocated"; + } + randomize_density_ = false; + constexpr size_t kCandidatesForReleasingMemory = + HugePageFiller::kCandidatesForReleasingMemory; + // Make kCandidate memory allocations of length kPagesPerHugepage/2 + 1. Note + // that a fresh hugepage will be used for each alloction. + const Length kToBeUsed1(kPagesPerHugePage / 2 + Length(1)); + std::vector allocs; + SpanAllocInfo densely_accessed_info = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + for (int i = 0; i < kCandidatesForReleasingMemory; ++i) { + std::vector temp = + AllocateVectorWithSpanAllocInfo(kToBeUsed1, densely_accessed_info); + allocs.insert(allocs.end(), temp.begin(), temp.end()); + } + // Release the free portion from these hugepages. + const Length kExpectedReleased1 = + kCandidatesForReleasingMemory * (kPagesPerHugePage - kToBeUsed1); + EXPECT_EQ(ReleasePages(kExpectedReleased1), kExpectedReleased1); + // Allocate kCandidate (does not really matter) more hugepages with + // allocations of length kPagesPerHugepage/2 + 2. These allocations also need + // one fresh hugepage each and they use more pages than the previously + // allocated hugepages. + const Length kToBeUsed2(kPagesPerHugePage / 2 + Length(2)); + for (int i = 0; i < kCandidatesForReleasingMemory; ++i) { + std::vector temp = + AllocateVectorWithSpanAllocInfo(kToBeUsed2, densely_accessed_info); + allocs.insert(allocs.end(), temp.begin(), temp.end()); + } + // Try to release more memory. We should continue to make progress and return + // all of the pages we tried to. + const Length kExpectedReleased2 = + kCandidatesForReleasingMemory * (kPagesPerHugePage - kToBeUsed2); + EXPECT_EQ(ReleasePages(kExpectedReleased2), kExpectedReleased2); + EXPECT_EQ(filler_.free_pages(), Length(0)); + + for (auto alloc : allocs) { + Delete(alloc); + } +} + +TEST_P(FillerTest, ReleasePagesFromDenseAlloc_SpansAllocated) { + // Skip test for kSpansAllocated since the test assumes hugepages can be + // partially allocated. + if (std::get<0>(GetParam()) != + HugePageFillerDenseTrackerType::kSpansAllocated) { + GTEST_SKIP() << "Skipping test for !kSpansAllocated"; + } + randomize_density_ = false; + constexpr size_t kCandidatesForReleasingMemory = + HugePageFiller::kCandidatesForReleasingMemory; + // Make kCandidate memory allocations of length kPagesPerHugepage/2 + 1. Note + // that a fresh hugepage will be used for each alloction. + const Length kToBeUsed1(kPagesPerHugePage / 2 + Length(1)); + std::vector allocs; + std::vector allocs_to_be_released; + SpanAllocInfo densely_accessed_info = {kMaxValidPages.raw_num(), + AccessDensityPrediction::kDense}; + for (int i = 0; i < kCandidatesForReleasingMemory; ++i) { + std::vector temp = + AllocateVectorWithSpanAllocInfo(kToBeUsed1, densely_accessed_info); + allocs.insert(allocs.end(), temp.begin(), temp.end()); + temp = AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - kToBeUsed1, + densely_accessed_info); + allocs_to_be_released.insert(allocs_to_be_released.end(), temp.begin(), + temp.end()); + } + // Release the allocs that were made so that the actual we care about are on + // fresh hugepages. + DeleteVector(allocs_to_be_released); + allocs_to_be_released.clear(); + // Release the free portion from these hugepages. + const Length kExpectedReleased1 = + kCandidatesForReleasingMemory * (kPagesPerHugePage - kToBeUsed1); + EXPECT_EQ(ReleasePages(kExpectedReleased1), kExpectedReleased1); + // Fill up the hugepages again so that subsequent allocations are made on + // fresh hugepages. + for (int i = 0; i < kCandidatesForReleasingMemory; ++i) { + std::vector temp = AllocateVectorWithSpanAllocInfo( + kPagesPerHugePage - kToBeUsed1, densely_accessed_info); + allocs_to_be_released.insert(allocs_to_be_released.end(), temp.begin(), + temp.end()); + } + // Allocate kCandidate (does not really matter) more hugepages with + // allocations of length kPagesPerHugepage/2 + 2. These allocations also need + // one fresh hugepage each and they use more pages than the previously + // allocated hugepages. + std::vector allocs_to_be_released_2; + const Length kToBeUsed2(kPagesPerHugePage / 2 + Length(2)); + for (int i = 0; i < kCandidatesForReleasingMemory; ++i) { + std::vector temp = + AllocateVectorWithSpanAllocInfo(kToBeUsed2, densely_accessed_info); + allocs.insert(allocs.end(), temp.begin(), temp.end()); + temp = AllocateVectorWithSpanAllocInfo(kPagesPerHugePage - kToBeUsed2, + densely_accessed_info); + allocs_to_be_released_2.insert(allocs_to_be_released_2.end(), temp.begin(), + temp.end()); + } + // Release the allocs that were made so that the actual we care about are on + // fresh hugepages. + DeleteVector(allocs_to_be_released_2); + allocs_to_be_released_2.clear(); + // Try to release more memory. We should continue to make progress and return + // all of the pages we tried to. + const Length kExpectedReleased2 = + kCandidatesForReleasingMemory * (kPagesPerHugePage - kToBeUsed2); + EXPECT_EQ(ReleasePages(kExpectedReleased2), kExpectedReleased2); + EXPECT_EQ(filler_.free_pages(), Length(0)); + + for (auto alloc : allocs) { + Delete(alloc); + } + DeleteVector(allocs_to_be_released); +} + +TEST_P(FillerTest, ReleasedPagesStatistics) { + constexpr Length N = kPagesPerHugePage / 4; + + std::vector a1 = AllocateVector(N); + ASSERT_TRUE(!a1.empty()); + + const Length released = ReleasePages(kPagesPerHugePage); + // We should have released some memory. + EXPECT_NE(released, Length(0)); + // Since we have only a single allocation, its pages should all be used on + // released pages. + EXPECT_EQ(filler_.size(), NHugePages(1)); + EXPECT_EQ(filler_.used_pages(), N); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), N); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), N); + + // Now differentiate fully released from partially released. Make an + // allocation and return it. + std::vector a2 = + AllocateVectorWithSpanAllocInfo(N, a1.front().span_alloc_info); + + // We now have N pages for a1, N pages for a2, and 2N pages + // released. + EXPECT_EQ(filler_.size(), NHugePages(1)); + EXPECT_EQ(filler_.used_pages(), 2 * N); + EXPECT_EQ(filler_.free_pages(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_released(), 2 * N); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), 2 * N); + + DeleteVector(a2); + + // We now have N pages for a1, N pages free (but mapped), and 2N pages + // released. + EXPECT_EQ(filler_.used_pages(), N); + EXPECT_EQ(filler_.free_pages(), N); + EXPECT_EQ(filler_.used_pages_in_released(), Length(0)); + EXPECT_EQ(filler_.used_pages_in_any_subreleased(), N); + + DeleteVector(a1); } -INSTANTIATE_TEST_SUITE_P(All, FillerTest, - testing::Values(FillerPartialRerelease::Return, - FillerPartialRerelease::Retain)); +INSTANTIATE_TEST_SUITE_P( + All, FillerTest, + testing::Combine(testing::Values( + HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks, + HugePageFillerDenseTrackerType::kSpansAllocated))); + +TEST(SkipSubreleaseIntervalsTest, EmptyIsNotEnabled) { + // When we have a limit hit, we pass SkipSubreleaseIntervals{} to the + // filler. Make sure it doesn't signal that we should skip the limit. + EXPECT_FALSE(SkipSubreleaseIntervals{}.SkipSubreleaseEnabled()); +} } // namespace } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease.h new file mode 100644 index 000000000000..fcdf018ec2c1 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease.h @@ -0,0 +1,731 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_PAGE_SUBRELEASE_H_ +#define TCMALLOC_HUGE_PAGE_SUBRELEASE_H_ + +#include +#include + +#include + +#include "absl/base/optimization.h" +#include "absl/strings/string_view.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/timeseries_tracker.h" +#include "tcmalloc/pages.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// This and the following classes implement the adaptive hugepage subrelease +// mechanism and realized fragmentation metric described in "Adaptive Hugepage +// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers" +// (ISMM 2021). + +// Tracks correctness of skipped subrelease decisions over time. +template +class SkippedSubreleaseCorrectnessTracker { + public: + struct SkippedSubreleaseDecision { + Length pages; // Number of pages we skipped subreleasing. + size_t count; // Number of times we skipped a subrelease. + + SkippedSubreleaseDecision() : pages(0), count(0) {} + explicit SkippedSubreleaseDecision(Length pages) : pages(pages), count(1) {} + explicit SkippedSubreleaseDecision(Length pages, size_t count) + : pages(pages), count(count) {} + + SkippedSubreleaseDecision& operator+=(SkippedSubreleaseDecision rhs) { + pages += rhs.pages; + count += rhs.count; + return *this; + } + + static SkippedSubreleaseDecision Zero() { + return SkippedSubreleaseDecision(); + } + }; + + explicit constexpr SkippedSubreleaseCorrectnessTracker(Clock clock, + absl::Duration w) + : window_(w), + epoch_length_(window_ / kEpochs), + last_confirmed_peak_(0), + tracker_(clock, w) {} + + // Not copyable or movable + SkippedSubreleaseCorrectnessTracker( + const SkippedSubreleaseCorrectnessTracker&) = delete; + SkippedSubreleaseCorrectnessTracker& operator=( + const SkippedSubreleaseCorrectnessTracker&) = delete; + + void ReportSkippedSubreleasePages(Length skipped_pages, Length peak_pages, + absl::Duration summary_interval) { + total_skipped_ += SkippedSubreleaseDecision(skipped_pages); + pending_skipped_ += SkippedSubreleaseDecision(skipped_pages); + + SkippedSubreleaseUpdate update; + update.decision = SkippedSubreleaseDecision(skipped_pages); + update.num_pages_at_decision = peak_pages; + update.correctness_interval_epochs = summary_interval / epoch_length_; + tracker_.Report(update); + } + + void ReportUpdatedPeak(Length current_peak) { + // Record this peak for the current epoch (so we don't double-count correct + // predictions later) and advance the tracker. + SkippedSubreleaseUpdate update; + update.confirmed_peak = current_peak; + if (tracker_.Report(update)) { + // Also keep track of the largest peak we have confirmed this epoch. + last_confirmed_peak_ = Length(0); + } + + // Recompute currently pending decisions. + pending_skipped_ = SkippedSubreleaseDecision::Zero(); + + Length largest_peak_already_confirmed = last_confirmed_peak_; + + tracker_.IterBackwards( + [&](size_t offset, const SkippedSubreleaseEntry& e) { + // Do not clear any decisions in the current epoch. + if (offset == 0) { + return; + } + + if (e.decisions.count > 0 && + e.max_num_pages_at_decision > largest_peak_already_confirmed && + offset <= e.correctness_interval_epochs) { + if (e.max_num_pages_at_decision <= current_peak) { + // We can confirm a subrelease decision as correct and it had not + // been confirmed correct by an earlier peak yet. + correctly_skipped_ += e.decisions; + } else { + pending_skipped_ += e.decisions; + } + } + + // Did we clear any earlier decisions based on a peak in this epoch? + // Keep track of the peak, so we do not clear them again. + largest_peak_already_confirmed = + std::max(largest_peak_already_confirmed, e.max_confirmed_peak); + }, + -1); + + last_confirmed_peak_ = std::max(last_confirmed_peak_, current_peak); + } + + inline SkippedSubreleaseDecision total_skipped() const { + return total_skipped_; + } + + inline SkippedSubreleaseDecision correctly_skipped() const { + return correctly_skipped_; + } + + inline SkippedSubreleaseDecision pending_skipped() const { + return pending_skipped_; + } + + private: + struct SkippedSubreleaseUpdate { + // A subrelease decision that was made at this time step: How much did we + // decide not to release? + SkippedSubreleaseDecision decision; + + // What does our future demand have to be for this to be correct? If there + // were multiple subrelease decisions in the same epoch, use the max. + Length num_pages_at_decision; + + // How long from the time of the decision do we have before the decision + // will be determined incorrect? + int64_t correctness_interval_epochs = 0; + + // At this time step, we confirmed a demand peak at this level, which means + // all subrelease decisions in earlier time steps that had peak_demand_pages + // <= this confirmed_peak were confirmed correct and don't need to be + // considered again in the future. + Length confirmed_peak; + }; + + struct SkippedSubreleaseEntry { + SkippedSubreleaseDecision decisions = SkippedSubreleaseDecision::Zero(); + Length max_num_pages_at_decision; + int64_t correctness_interval_epochs = 0; + Length max_confirmed_peak; + + static SkippedSubreleaseEntry Nil() { return SkippedSubreleaseEntry(); } + + void Report(SkippedSubreleaseUpdate e) { + decisions += e.decision; + correctness_interval_epochs = + std::max(correctness_interval_epochs, e.correctness_interval_epochs); + max_num_pages_at_decision = + std::max(max_num_pages_at_decision, e.num_pages_at_decision); + max_confirmed_peak = std::max(max_confirmed_peak, e.confirmed_peak); + } + }; + + const absl::Duration window_; + const absl::Duration epoch_length_; + + // The largest peak we processed this epoch. This is required to avoid us + // double-counting correctly predicted decisions. + Length last_confirmed_peak_; + + SkippedSubreleaseDecision total_skipped_; + SkippedSubreleaseDecision correctly_skipped_; + SkippedSubreleaseDecision pending_skipped_; + + TimeSeriesTracker + tracker_; +}; + +struct SkipSubreleaseIntervals { + // Interval that locates recent short-term demand fluctuation. + absl::Duration short_interval; + // Interval that locates recent long-term demand trend. + absl::Duration long_interval; + // Checks if the skip subrelease feature is enabled. + bool SkipSubreleaseEnabled() const { + if (short_interval != absl::ZeroDuration() || + long_interval != absl::ZeroDuration()) { + return true; + } + return false; + } +}; + +struct SubreleaseStats { + Length total_pages_subreleased; // cumulative since startup + Length total_partial_alloc_pages_subreleased; // cumulative since startup + Length num_pages_subreleased; + Length num_partial_alloc_pages_subreleased; + HugeLength total_hugepages_broken{NHugePages(0)}; // cumulative since startup + HugeLength num_hugepages_broken{NHugePages(0)}; + + bool is_limit_hit = false; + // Keep these limit-related stats cumulative since startup only + Length total_pages_subreleased_due_to_limit; + HugeLength total_hugepages_broken_due_to_limit{NHugePages(0)}; + + void reset() { + total_pages_subreleased += num_pages_subreleased; + total_partial_alloc_pages_subreleased += + num_partial_alloc_pages_subreleased; + total_hugepages_broken += num_hugepages_broken; + num_pages_subreleased = Length(0); + num_partial_alloc_pages_subreleased = Length(0); + num_hugepages_broken = NHugePages(0); + } + + // Must be called at the beginning of each subrelease request + void set_limit_hit(bool value) { is_limit_hit = value; } + + // This only has a well-defined meaning within ReleaseCandidates where + // set_limit_hit() has been called earlier. Do not use anywhere else. + bool limit_hit() { return is_limit_hit; } +}; + +// Track subrelease statistics over a time window. +template +class SubreleaseStatsTracker { + public: + enum Type { + kRegular, + kSparse = kRegular, + kDense, + kDonated, + kPartialReleased, + kReleased, + kNumTypes + }; + + struct SubreleaseStats { + Length num_pages; + Length free_pages; + Length unmapped_pages; + Length used_pages_in_subreleased_huge_pages; + HugeLength huge_pages[kNumTypes]; + Length num_pages_subreleased; + Length num_partial_alloc_pages_subreleased; + HugeLength num_hugepages_broken = NHugePages(0); + + HugeLength total_huge_pages() const { + HugeLength total_huge_pages; + for (int i = 0; i < kNumTypes; i++) { + total_huge_pages += huge_pages[i]; + } + return total_huge_pages; + } + }; + + struct NumberOfFreePages { + Length free; + Length free_backed; + }; + + explicit constexpr SubreleaseStatsTracker(Clock clock, absl::Duration w, + absl::Duration summary_interval) + : summary_interval_(summary_interval), + window_(w), + epoch_length_(window_ / kEpochs), + tracker_(clock, w), + skipped_subrelease_correctness_(clock, w) { + // The summary_interval is used in two trackers: SubreleaseStatsTracker for + // evaluating realized fragmentation, and + // SkippedSubreleaseCorrectnessTracker for evaluating the correctness of + // skipped subrelease. Here we check the length of the two trackers are + // sufficient for the evaluation. + TC_ASSERT_LE(summary_interval, w); + } + + // Not copyable or movable + SubreleaseStatsTracker(const SubreleaseStatsTracker&) = delete; + SubreleaseStatsTracker& operator=(const SubreleaseStatsTracker&) = delete; + + void Report(const SubreleaseStats& stats) { + if (ABSL_PREDICT_FALSE(tracker_.Report(stats))) { + if (ABSL_PREDICT_FALSE(pending_skipped().count > 0)) { + // Consider the peak within the just completed epoch to confirm the + // correctness of any recent subrelease decisions. + skipped_subrelease_correctness_.ReportUpdatedPeak(std::max( + stats.num_pages, + tracker_.GetEpochAtOffset(1).stats[kStatsAtMaxDemand].num_pages)); + } + } + } + + void Print(Printer& out, absl::string_view field) const; + void PrintSubreleaseStatsInPbtxt(PbtxtRegion& hpaa, + absl::string_view field) const; + void PrintTimeseriesStatsInPbtxt(PbtxtRegion& hpaa, + absl::string_view field) const; + + // Calculates demand requirements for the skip subrelease: we do not + // subrelease if the number of free pages are than (or equal to) the demand + // computed by GetRecentDemand. The demand requirement is the sum of + // short-term demand fluctuation peak with in the last and + // long-term demand trend in the previous . When both are set, + // short_interval should be (significantly) shorter or equal to long_interval + // to avoid realized fragmentation caused by non-recent (short-term) demand + // spikes. The demand is capped to the peak observed in the time series. + Length GetRecentDemand(absl::Duration short_interval, + absl::Duration long_interval) { + return GetRecentDemand(short_interval, long_interval, window_); + } + + // Calculates demand requirements for the skip subrelease: we do not + // subrelease if the number of free pages are than (or equal to) the demand + // computed by GetRecentDemand. The demand requirement is the sum of + // short-term demand fluctuation peak with in the last and + // long-term demand trend in the previous . When both are set, + // short_interval should be (significantly) shorter or equal to long_interval + // to avoid realized fragmentation caused by non-recent (short-term) demand + // spikes. The demand is capped to the peak observed in the time series over + // the last . + Length GetRecentDemand(absl::Duration short_interval, + absl::Duration long_interval, + absl::Duration peak_interval) { + Length demand_trend = + CalculateCombinedDemandTrend(short_interval, long_interval); + Length demand_peak = CalculateDemandPeak(peak_interval); + return std::min(demand_peak, demand_trend); + } + + // Reports a skipped subrelease, which is evaluated by coming peaks within the + // realized fragmentation interval. The purpose is these skipped pages would + // only create realized fragmentation if peaks in that interval are + // smaller than peak_pages. + void ReportSkippedSubreleasePages(Length pages, Length peak_pages) { + ReportSkippedSubreleasePages(pages, peak_pages, summary_interval_); + } + + // Reports a skipped subrelease, which is evaluated by coming peaks within the + // given time interval. + void ReportSkippedSubreleasePages(Length pages, Length peak_pages, + absl::Duration summary_interval) { + if (pages == Length(0)) { + return; + } + skipped_subrelease_correctness_.ReportSkippedSubreleasePages( + pages, peak_pages, summary_interval); + } + + inline typename SkippedSubreleaseCorrectnessTracker< + kEpochs>::SkippedSubreleaseDecision + total_skipped() const { + return skipped_subrelease_correctness_.total_skipped(); + } + + inline typename SkippedSubreleaseCorrectnessTracker< + kEpochs>::SkippedSubreleaseDecision + correctly_skipped() const { + return skipped_subrelease_correctness_.correctly_skipped(); + } + + inline typename SkippedSubreleaseCorrectnessTracker< + kEpochs>::SkippedSubreleaseDecision + pending_skipped() const { + return skipped_subrelease_correctness_.pending_skipped(); + } + + // Returns the minimum number of free pages throughout the tracker period. + // The first value of the pair is the number of all free pages, the second + // value contains only the backed ones. + NumberOfFreePages min_free_pages(absl::Duration w) const { + NumberOfFreePages mins; + mins.free = Length::max(); + mins.free_backed = Length::max(); + + int64_t num_epochs = std::clamp(w / epoch_length_, int64_t{0}, + static_cast(kEpochs)); + + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + mins.free = std::min(mins.free, e.min_free_pages); + mins.free_backed = + std::min(mins.free_backed, e.min_free_backed_pages); + } + }, + num_epochs); + mins.free = (mins.free == Length::max()) ? Length(0) : mins.free; + mins.free_backed = + (mins.free_backed == Length::max()) ? Length(0) : mins.free_backed; + return mins; + } + + // Returns the realized fragmentation, which is the minimum number of free + // backed pages over the last summary_interval_ (default 5 min). + Length RealizedFragmentation() const { + Length min_free_backed = Length::max(); + int64_t num_epochs = + std::min(summary_interval_ / epoch_length_, kEpochs); + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + min_free_backed = + std::min(min_free_backed, e.min_free_backed_pages); + } + }, + num_epochs); + min_free_backed = + (min_free_backed == Length::max()) ? Length(0) : min_free_backed; + return min_free_backed; + } + + private: + // We collect subrelease statistics at four "interesting points" within each + // time step: at min/max demand of pages and at min/max use of hugepages. This + // allows us to approximate the envelope of the different metrics. + enum StatsType { kStatsAtMinDemand, kStatsAtMaxDemand, kNumStatsTypes }; + + struct SubreleaseStatsEntry { + // Collect stats at "interesting points" (minimum/maximum page demand + // and at minimum/maximum usage of huge pages). + SubreleaseStats stats[kNumStatsTypes] = {}; + static constexpr Length kDefaultValue = Length::max(); + Length min_free_pages = kDefaultValue; + Length min_free_backed_pages = kDefaultValue; + Length num_pages_subreleased; + Length num_partial_alloc_pages_subreleased; + HugeLength num_hugepages_broken = NHugePages(0); + + static SubreleaseStatsEntry Nil() { return SubreleaseStatsEntry(); } + + void Report(const SubreleaseStats& e) { + if (empty()) { + for (int i = 0; i < kNumStatsTypes; i++) { + stats[i] = e; + } + } + + if (e.num_pages < stats[kStatsAtMinDemand].num_pages) { + stats[kStatsAtMinDemand] = e; + } + + if (e.num_pages > stats[kStatsAtMaxDemand].num_pages) { + stats[kStatsAtMaxDemand] = e; + } + + min_free_pages = + std::min(min_free_pages, e.free_pages + e.unmapped_pages); + min_free_backed_pages = std::min(min_free_backed_pages, e.free_pages); + + // Subrelease stats + num_pages_subreleased += e.num_pages_subreleased; + num_partial_alloc_pages_subreleased += + e.num_partial_alloc_pages_subreleased; + num_hugepages_broken += e.num_hugepages_broken; + } + + bool empty() const { return min_free_pages == kDefaultValue; } + }; + + // Gets the peak demand recorded in the time series over the last + // . + Length CalculateDemandPeak(absl::Duration peak_interval) { + Length max_demand_pages; + int64_t num_epochs = + std::min(peak_interval / epoch_length_, kEpochs); + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + // Identify the maximum number of demand pages we have seen within + // the time interval. + if (e.stats[kStatsAtMaxDemand].num_pages > max_demand_pages) { + max_demand_pages = e.stats[kStatsAtMaxDemand].num_pages; + } + } + }, + num_epochs); + return max_demand_pages; + } + + // Gets the combined demand trend, which is the sum of the maximum demand + // difference in and the maxmin demand in . + Length CalculateCombinedDemandTrend(absl::Duration short_interval, + absl::Duration long_interval) { + if (short_interval != absl::ZeroDuration() && + long_interval != absl::ZeroDuration()) { + short_interval = std::min(short_interval, long_interval); + } + last_skip_subrelease_intervals_.short_interval = + std::min(short_interval, window_); + last_skip_subrelease_intervals_.long_interval = + std::min(long_interval, window_); + Length short_term_fluctuation_pages, long_term_trend_pages; + int short_epochs = std::min(short_interval / epoch_length_, kEpochs); + int long_epochs = std::min(long_interval / epoch_length_, kEpochs); + + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + Length demand_difference = e.stats[kStatsAtMaxDemand].num_pages - + e.stats[kStatsAtMinDemand].num_pages; + // Identifies the highest demand fluctuation (i.e., difference + // between max_demand and min_demand) that we have seen within the + // time interval. + if (demand_difference > short_term_fluctuation_pages) { + short_term_fluctuation_pages = demand_difference; + } + } + }, + short_epochs); + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + // Identifies the long-term demand peak (i.e., largest minimum + // demand) that we have seen within the time interval. + if (e.stats[kStatsAtMinDemand].num_pages > long_term_trend_pages) { + long_term_trend_pages = e.stats[kStatsAtMinDemand].num_pages; + } + } + }, + long_epochs); + return short_term_fluctuation_pages + long_term_trend_pages; + } + + // The tracker reports pages that have been free for at least this interval, + // as well as peaks within this interval. The interval is also used for + // deciding correctness of skipped subreleases by associating past skipping + // decisions to peaks within this interval. + const absl::Duration summary_interval_; + + const absl::Duration window_; + const absl::Duration epoch_length_; + + TimeSeriesTracker tracker_; + SkippedSubreleaseCorrectnessTracker skipped_subrelease_correctness_; + + // Records most recent intervals for skipping subreleases, plus expected next + // intervals for evaluating skipped subreleases. All for reporting and + // debugging only. + SkipSubreleaseIntervals last_skip_subrelease_intervals_; +}; + +// Evaluates a/b, avoiding division by zero. +inline double safe_div(Length a, Length b) { + return safe_div(a.raw_num(), b.raw_num()); +} + +template +void SubreleaseStatsTracker::Print(Printer& out, + absl::string_view field) const { + NumberOfFreePages free_pages = min_free_pages(summary_interval_); + out.printf("%s: time series over %d min interval\n\n", field, + absl::ToInt64Minutes(summary_interval_)); + + // Realized fragmentation is equivalent to backed minimum free pages over a + // 5-min interval. It is printed for convenience but not included in pbtxt. + out.printf("%s: realized fragmentation: %.1f MiB\n", field, + free_pages.free_backed.in_mib()); + out.printf("%s: minimum free pages: %zu (%zu backed)\n", field, + free_pages.free.raw_num(), free_pages.free_backed.raw_num()); + + SubreleaseStatsEntry at_peak_demand; + + tracker_.IterBackwards( + [&](size_t offset, const SubreleaseStatsEntry& e) { + if (!e.empty()) { + if (at_peak_demand.empty() || + at_peak_demand.stats[kStatsAtMaxDemand].num_pages < + e.stats[kStatsAtMaxDemand].num_pages) { + at_peak_demand = e; + } + } + }, + summary_interval_ / epoch_length_); + + out.printf( + "%s: at peak demand: %zu pages (and %zu free, %zu unmapped)\n" + "%s: at peak demand: %zu hps (%zu regular, %zu donated, " + "%zu partial, %zu released)\n", + field, at_peak_demand.stats[kStatsAtMaxDemand].num_pages.raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand].free_pages.raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(), field, + at_peak_demand.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand] + .huge_pages[kPartialReleased] + .raw_num(), + at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num()); + + out.printf( + "\n%s: Since the start of the execution, %zu subreleases (%zu" + " pages) were skipped due to the sum of short-term (%ds)" + " fluctuations and long-term (%ds) trends.\n", + field, total_skipped().count, total_skipped().pages.raw_num(), + absl::ToInt64Seconds(last_skip_subrelease_intervals_.short_interval), + absl::ToInt64Seconds(last_skip_subrelease_intervals_.long_interval)); + + Length skipped_pages = total_skipped().pages - pending_skipped().pages; + double correctly_skipped_pages_percentage = + safe_div(100.0 * correctly_skipped().pages, skipped_pages); + + size_t skipped_count = total_skipped().count - pending_skipped().count; + double correctly_skipped_count_percentage = + safe_div(100.0 * correctly_skipped().count, skipped_count); + + out.printf( + "%s: %.4f%% of decisions confirmed correct, %zu " + "pending (%.4f%% of pages, %zu pending).\n", + field, correctly_skipped_count_percentage, pending_skipped().count, + correctly_skipped_pages_percentage, pending_skipped().pages.raw_num()); + + // Print subrelease stats + Length total_subreleased; + Length total_partial_alloc_pages_subreleased; + HugeLength total_broken = NHugePages(0); + tracker_.Iter( + [&](size_t offset, const SubreleaseStatsEntry& e) { + total_subreleased += e.num_pages_subreleased; + total_partial_alloc_pages_subreleased += + e.num_partial_alloc_pages_subreleased; + total_broken += e.num_hugepages_broken; + }, + tracker_.kSkipEmptyEntries); + out.printf( + "%s: Subrelease stats last %d min: total " + "%zu pages subreleased (%zu pages from partial allocs), " + "%zu hugepages broken\n", + field, static_cast(absl::ToInt64Minutes(window_)), + total_subreleased.raw_num(), + total_partial_alloc_pages_subreleased.raw_num(), total_broken.raw_num()); +} + +template +void SubreleaseStatsTracker::PrintSubreleaseStatsInPbtxt( + PbtxtRegion& hpaa, absl::string_view field) const { + PbtxtRegion region = hpaa.CreateSubRegion(field); + region.PrintI64("skipped_subrelease_short_interval_ms", + absl::ToInt64Milliseconds( + last_skip_subrelease_intervals_.short_interval)); + region.PrintI64( + "skipped_subrelease_long_interval_ms", + absl::ToInt64Milliseconds(last_skip_subrelease_intervals_.long_interval)); + region.PrintI64("skipped_subrelease_pages", total_skipped().pages.raw_num()); + region.PrintI64("correctly_skipped_subrelease_pages", + correctly_skipped().pages.raw_num()); + region.PrintI64("pending_skipped_subrelease_pages", + pending_skipped().pages.raw_num()); + region.PrintI64("skipped_subrelease_count", total_skipped().count); + region.PrintI64("correctly_skipped_subrelease_count", + correctly_skipped().count); + region.PrintI64("pending_skipped_subrelease_count", pending_skipped().count); +} + +template +void SubreleaseStatsTracker::PrintTimeseriesStatsInPbtxt( + PbtxtRegion& hpaa, absl::string_view field) const { + PbtxtRegion region = hpaa.CreateSubRegion(field); + region.PrintI64("window_ms", absl::ToInt64Milliseconds(epoch_length_)); + region.PrintI64("epochs", kEpochs); + + NumberOfFreePages free_pages = min_free_pages(summary_interval_); + region.PrintI64("min_free_pages_interval_ms", + absl::ToInt64Milliseconds(summary_interval_)); + region.PrintI64("min_free_pages", free_pages.free.raw_num()); + region.PrintI64("min_free_backed_pages", free_pages.free_backed.raw_num()); + + static const char* labels[kNumStatsTypes] = {"at_minimum_demand", + "at_maximum_demand"}; + + tracker_.Iter( + [&](size_t offset, const SubreleaseStatsEntry& e) GOOGLE_MALLOC_SECTION { + auto subregion = region.CreateSubRegion("measurements"); + subregion.PrintI64("epoch", offset); + subregion.PrintI64("min_free_pages", e.min_free_pages.raw_num()); + subregion.PrintI64("min_free_backed_pages", + e.min_free_backed_pages.raw_num()); + subregion.PrintI64("num_pages_subreleased", + e.num_pages_subreleased.raw_num()); + subregion.PrintI64("num_hugepages_broken", + e.num_hugepages_broken.raw_num()); + subregion.PrintI64("partial_alloc_pages_subreleased", + e.num_partial_alloc_pages_subreleased.raw_num()); + for (int i = 0; i < kNumStatsTypes; i++) { + auto m = subregion.CreateSubRegion(labels[i]); + SubreleaseStats stats = e.stats[i]; + m.PrintI64("num_pages", stats.num_pages.raw_num()); + m.PrintI64("regular_huge_pages", + stats.huge_pages[kRegular].raw_num()); + m.PrintI64("donated_huge_pages", + stats.huge_pages[kDonated].raw_num()); + m.PrintI64("partial_released_huge_pages", + stats.huge_pages[kPartialReleased].raw_num()); + m.PrintI64("released_huge_pages", + stats.huge_pages[kReleased].raw_num()); + m.PrintI64("used_pages_in_subreleased_huge_pages", + stats.used_pages_in_subreleased_huge_pages.raw_num()); + } + }, + tracker_.kSkipEmptyEntries); +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_HUGE_PAGE_SUBRELEASE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease_test.cc new file mode 100644 index 000000000000..ad4a263c6645 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_subrelease_test.cc @@ -0,0 +1,351 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_page_subrelease.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include // NOLINT(build/c++11) +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/algorithm/container.h" +#include "absl/base/attributes.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/base/macros.h" +#include "absl/container/flat_hash_set.h" +#include "absl/flags/flag.h" +#include "absl/memory/memory.h" +#include "absl/random/random.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/blocking_counter.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +using tcmalloc::tcmalloc_internal::Length; +using testing::StrEq; + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +class StatsTrackerTest : public testing::Test { + private: + static int64_t clock_; + static int64_t FakeClock() { return clock_; } + static double GetFakeClockFrequency() { + return absl::ToDoubleNanoseconds(absl::Seconds(2)); + } + + protected: + static constexpr absl::Duration kWindow = absl::Minutes(10); + + using StatsTrackerType = SubreleaseStatsTracker<16>; + StatsTrackerType tracker_{ + Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kWindow, + absl::Minutes(5)}; + + void Advance(absl::Duration d) { + clock_ += static_cast(absl::ToDoubleSeconds(d) * + GetFakeClockFrequency()); + } + + // Generates four data points for the tracker that represent "interesting" + // points (i.e., min/max pages demand, min/max hugepages). + void GenerateInterestingPoints(Length num_pages, HugeLength num_hugepages, + Length num_free_pages); + + // Generates a data point with a particular amount of demand pages, while + // ignoring the specific number of hugepages. + void GenerateDemandPoint(Length num_pages, Length num_free_pages); + + void SetUp() override { + // Resets the clock used by SubreleaseStatsTracker, allowing each test + // starts in epoch 0. + clock_ = 0; + } +}; + +int64_t StatsTrackerTest::clock_{0}; + +void StatsTrackerTest::GenerateInterestingPoints(Length num_pages, + HugeLength num_hugepages, + Length num_free_pages) { + for (int i = 0; i <= 1; ++i) { + for (int j = 0; j <= 1; ++j) { + StatsTrackerType::SubreleaseStats stats; + stats.num_pages = num_pages + Length((i == 0) ? 4 : 8 * j); + stats.free_pages = num_free_pages + Length(10 * i + j); + stats.unmapped_pages = Length(10); + stats.used_pages_in_subreleased_huge_pages = num_pages; + stats.huge_pages[StatsTrackerType::kRegular] = + num_hugepages + ((i == 1) ? NHugePages(4) : NHugePages(8) * j); + stats.huge_pages[StatsTrackerType::kDonated] = num_hugepages; + stats.huge_pages[StatsTrackerType::kPartialReleased] = NHugePages(i); + stats.huge_pages[StatsTrackerType::kReleased] = NHugePages(j); + tracker_.Report(stats); + } + } +} + +void StatsTrackerTest::GenerateDemandPoint(Length num_pages, + Length num_free_pages) { + HugeLength hp = NHugePages(1); + StatsTrackerType::SubreleaseStats stats; + stats.num_pages = num_pages; + stats.free_pages = num_free_pages; + stats.unmapped_pages = Length(0); + stats.used_pages_in_subreleased_huge_pages = Length(0); + stats.huge_pages[StatsTrackerType::kRegular] = hp; + stats.huge_pages[StatsTrackerType::kDonated] = hp; + stats.huge_pages[StatsTrackerType::kPartialReleased] = hp; + stats.huge_pages[StatsTrackerType::kReleased] = hp; + tracker_.Report(stats); +} + +// Tests that the tracker aggregates all data correctly. The output is tested by +// comparing the text output of the tracker. While this is a bit verbose, it is +// much cleaner than extracting and comparing all data manually. +TEST_F(StatsTrackerTest, Works) { + // Ensure that the beginning (when free pages are 0) is outside the 5-min + // window the instrumentation is recording. + GenerateInterestingPoints(Length(1), NHugePages(1), Length(1)); + Advance(absl::Minutes(5)); + + GenerateInterestingPoints(Length(100), NHugePages(5), Length(200)); + + Advance(absl::Minutes(1)); + + GenerateInterestingPoints(Length(200), NHugePages(10), Length(100)); + + Advance(absl::Minutes(1)); + + // Test text output (time series summary). + { + std::string buffer(1024 * 1024, '\0'); + Printer printer(&*buffer.begin(), buffer.size()); + { + tracker_.Print(printer, "StatsTracker"); + buffer.erase(printer.SpaceRequired()); + } + + EXPECT_THAT(buffer, StrEq(R"(StatsTracker: time series over 5 min interval + +StatsTracker: realized fragmentation: 0.8 MiB +StatsTracker: minimum free pages: 110 (100 backed) +StatsTracker: at peak demand: 208 pages (and 111 free, 10 unmapped) +StatsTracker: at peak demand: 26 hps (14 regular, 10 donated, 1 partial, 1 released) + +StatsTracker: Since the start of the execution, 0 subreleases (0 pages) were skipped due to the sum of short-term (0s) fluctuations and long-term (0s) trends. +StatsTracker: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). +StatsTracker: Subrelease stats last 10 min: total 0 pages subreleased (0 pages from partial allocs), 0 hugepages broken +)")); + } +} + +TEST_F(StatsTrackerTest, InvalidDurations) { + // These should not crash. + tracker_.min_free_pages(absl::InfiniteDuration()); + tracker_.min_free_pages(kWindow + absl::Seconds(1)); + tracker_.min_free_pages(-(kWindow + absl::Seconds(1))); + tracker_.min_free_pages(-absl::InfiniteDuration()); +} + +TEST_F(StatsTrackerTest, ComputeRecentDemand) { + // Generates max and min demand in each epoch to create short-term demand + // fluctuations. + GenerateDemandPoint(Length(1500), Length(2000)); + GenerateDemandPoint(Length(3000), Length(1000)); + Advance(absl::Minutes(1.25)); + GenerateDemandPoint(Length(500), Length(1000)); + GenerateDemandPoint(Length(1500), Length(0)); + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(50), Length(1000)); + GenerateDemandPoint(Length(100), Length(2000)); + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(100), Length(2000)); + GenerateDemandPoint(Length(300), Length(3000)); + + Length short_long_peak_pages = + tracker_.GetRecentDemand(absl::Minutes(2), absl::Minutes(3)); + EXPECT_EQ(short_long_peak_pages, Length(700)); + Length short_long_peak_pages2 = + tracker_.GetRecentDemand(absl::Minutes(5), absl::Minutes(5)); + EXPECT_EQ(short_long_peak_pages2, Length(3000)); + + Advance(absl::Minutes(4)); + GenerateDemandPoint(Length(150), Length(500)); + GenerateDemandPoint(Length(200), Length(3000)); + + Length short_long_peak_pages3 = + tracker_.GetRecentDemand(absl::Minutes(1), absl::ZeroDuration()); + EXPECT_EQ(short_long_peak_pages3, Length(50)); + + Advance(absl::Minutes(5)); + GenerateDemandPoint(Length(100), Length(700)); + GenerateDemandPoint(Length(150), Length(800)); + + Length short_long_peak_pages4 = + tracker_.GetRecentDemand(absl::ZeroDuration(), absl::Minutes(5)); + EXPECT_EQ(short_long_peak_pages4, Length(100)); + // The short_interval needs to be shorter or equal to the long_interval when + // they are both set. We cap short_interval to long_interval when this is not + // the case. + EXPECT_EQ(tracker_.GetRecentDemand(absl::Minutes(2), absl::Minutes(1)), + tracker_.GetRecentDemand(absl::Minutes(1), absl::Minutes(1))); +} + +TEST_F(StatsTrackerTest, ComputeRecentDemandAndCappedToPeak) { + // Generates max and min demand in each epoch to create short-term demand + // fluctuations. + GenerateDemandPoint(Length(50), Length(2000)); + GenerateDemandPoint(Length(3000), Length(1000)); + Advance(absl::Minutes(2)); + GenerateDemandPoint(Length(1500), Length(0)); + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(50), Length(1000)); + GenerateDemandPoint(Length(100), Length(2000)); + // The calculated demand is 2500 (maximum demand diff) + 1500 (max + // min_demand), but capped by the peak observed in the time series. + Length demand_1 = + tracker_.GetRecentDemand(absl::Minutes(5), absl::Minutes(5)); + EXPECT_EQ(demand_1, Length(3000)); + // Capped by the peak observed in 2 mins. + Length demand_2 = tracker_.GetRecentDemand(absl::Minutes(5), absl::Minutes(5), + absl::Minutes(2)); + EXPECT_EQ(demand_2, Length(1500)); +} + +// Tests that we can compute the realized fragmentation correctly. +TEST_F(StatsTrackerTest, ComputeRealizedFragmentation) { + GenerateDemandPoint(Length(50), Length(500)); + Advance(absl::Minutes(2)); + GenerateDemandPoint(Length(3000), Length(1000)); + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(1500), Length(2000)); + Advance(absl::Minutes(2)); + Length fragmentation_1 = tracker_.RealizedFragmentation(); + EXPECT_EQ(fragmentation_1, Length(500)); + + Advance(absl::Minutes(30)); + GenerateDemandPoint(Length(1500), Length(2000)); + Advance(absl::Minutes(2)); + Length fragmentation_2 = tracker_.RealizedFragmentation(); + EXPECT_EQ(fragmentation_2, Length(2000)); +} + +TEST_F(StatsTrackerTest, TrackCorrectSubreleaseDecisions) { + // First peak (large) + GenerateDemandPoint(Length(1000), Length(1000)); + + // Incorrect subrelease: Subrelease to 1000 + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(100), Length(1000)); + tracker_.ReportSkippedSubreleasePages(Length(900), Length(1000)); + + // Second peak (small) + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(500), Length(1000)); + + EXPECT_EQ(tracker_.total_skipped().pages, Length(900)); + EXPECT_EQ(tracker_.total_skipped().count, 1); + EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0)); + EXPECT_EQ(tracker_.correctly_skipped().count, 0); + EXPECT_EQ(tracker_.pending_skipped().pages, Length(900)); + EXPECT_EQ(tracker_.pending_skipped().count, 1); + + // Correct subrelease: Subrelease to 500 + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(500), Length(100)); + tracker_.ReportSkippedSubreleasePages(Length(50), Length(550)); + GenerateDemandPoint(Length(500), Length(50)); + tracker_.ReportSkippedSubreleasePages(Length(50), Length(500)); + GenerateDemandPoint(Length(500), Length(0)); + + EXPECT_EQ(tracker_.total_skipped().pages, Length(1000)); + EXPECT_EQ(tracker_.total_skipped().count, 3); + EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0)); + EXPECT_EQ(tracker_.correctly_skipped().count, 0); + EXPECT_EQ(tracker_.pending_skipped().pages, Length(1000)); + EXPECT_EQ(tracker_.pending_skipped().count, 3); + + // Third peak (large, too late for first peak) + Advance(absl::Minutes(4)); + GenerateDemandPoint(Length(1100), Length(1000)); + + Advance(absl::Minutes(5)); + GenerateDemandPoint(Length(1100), Length(1000)); + + EXPECT_EQ(tracker_.total_skipped().pages, Length(1000)); + EXPECT_EQ(tracker_.total_skipped().count, 3); + EXPECT_EQ(tracker_.correctly_skipped().pages, Length(100)); + EXPECT_EQ(tracker_.correctly_skipped().count, 2); + EXPECT_EQ(tracker_.pending_skipped().pages, Length(0)); + EXPECT_EQ(tracker_.pending_skipped().count, 0); +} + +TEST_F(StatsTrackerTest, SubreleaseCorrectnessWithChangingIntervals) { + // First peak (large) + GenerateDemandPoint(Length(1000), Length(1000)); + + Advance(absl::Minutes(1)); + GenerateDemandPoint(Length(100), Length(1000)); + + tracker_.ReportSkippedSubreleasePages(Length(50), Length(1000), + absl::Minutes(4)); + Advance(absl::Minutes(1)); + + // With two correctness intervals in the same epoch, take the maximum + tracker_.ReportSkippedSubreleasePages(Length(100), Length(1000), + absl::Minutes(1)); + tracker_.ReportSkippedSubreleasePages(Length(200), Length(1000), + absl::Minutes(7)); + + Advance(absl::Minutes(5)); + GenerateDemandPoint(Length(1100), Length(1000)); + Advance(absl::Minutes(10)); + GenerateDemandPoint(Length(1100), Length(1000)); + + EXPECT_EQ(tracker_.total_skipped().pages, Length(350)); + EXPECT_EQ(tracker_.total_skipped().count, 3); + EXPECT_EQ(tracker_.correctly_skipped().pages, Length(300)); + EXPECT_EQ(tracker_.correctly_skipped().count, 2); + EXPECT_EQ(tracker_.pending_skipped().pages, Length(0)); + EXPECT_EQ(tracker_.pending_skipped().count, 0); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h index 4498994f7568..6c0b60aaf452 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +28,7 @@ #include #include "tcmalloc/common.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" #include "tcmalloc/pages.h" @@ -40,32 +42,40 @@ inline constexpr Length kPagesPerHugePage = // A single aligned huge page. struct HugePage { - void *start_addr() const { - ASSERT(pn <= kMaxPageNumber); - return reinterpret_cast(pn << kHugePageShift); + void* start_addr() const { + TC_ASSERT_LE(pn, kMaxPageNumber); + return reinterpret_cast(pn << kHugePageShift); } PageId first_page() const { - ASSERT(pn <= kMaxPageNumber); + TC_ASSERT_LE(pn, kMaxPageNumber); return PageId(pn << (kHugePageShift - kPageShift)); } size_t index() const { - ASSERT(pn <= kMaxPageNumber); + TC_ASSERT_LE(pn, kMaxPageNumber); return pn; } + template + friend H AbslHashValue(H h, const HugePage& p) { + return H::combine(std::move(h), p.pn); + } + static constexpr uintptr_t kMaxPageNumber = std::numeric_limits::max() >> kHugePageShift; + template + friend void AbslStringify(Sink& sink, const HugePage& v) { + absl::Format(&sink, "%p", v.start_addr()); + } + uintptr_t pn; }; struct HugeLength { - size_t n; - constexpr HugeLength() : n(0) {} - explicit HugeLength(double x) : n(ceil(x)) { ASSERT(x >= 0); } + explicit HugeLength(double x) : n(ceil(x)) { TC_ASSERT_GE(x, 0); } constexpr size_t raw_num() const { return n; } constexpr size_t in_bytes() const { return n * kHugePageSize; } constexpr size_t in_mib() const { @@ -77,10 +87,43 @@ struct HugeLength { // It is possible to have a HugeLength that corresponds to more // bytes than can be addressed (i.e. > size_t.) Check for that. bool overflows() const; + static constexpr HugeLength min() { + return HugeLength(static_cast(0)); + } + static constexpr HugeLength max() { + return HugeLength(static_cast(HugePage::kMaxPageNumber)); + } + + template + friend void AbslStringify(Sink& sink, const HugeLength& v) { + absl::Format(&sink, "%zu", v.in_bytes()); + } private: + size_t n; + explicit constexpr HugeLength(size_t x) : n(x) {} friend constexpr HugeLength NHugePages(size_t n); + friend HugeLength& operator++(HugeLength&); + friend HugeLength& operator--(HugeLength&); + friend constexpr bool operator<(HugeLength, HugeLength); + friend constexpr bool operator>(HugeLength, HugeLength); + friend constexpr bool operator<=(HugeLength, HugeLength); + friend constexpr bool operator>=(HugeLength, HugeLength); + friend constexpr bool operator==(HugeLength, HugeLength); + friend constexpr bool operator!=(HugeLength, HugeLength); + friend constexpr size_t operator/(HugeLength, HugeLength); + friend constexpr HugeLength operator%(HugeLength, HugeLength); + friend constexpr HugeLength operator*(HugeLength, size_t); + friend HugeLength& operator*=(HugeLength&, size_t); + friend constexpr HugeLength operator/(HugeLength, size_t); + friend constexpr HugePage operator+(HugePage lhs, HugeLength rhs); + friend constexpr HugePage operator-(HugePage lhs, HugeLength rhs); + friend HugePage& operator+=(HugePage& lhs, HugeLength rhs); + friend constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs); + friend HugeLength& operator+=(HugeLength& lhs, HugeLength rhs); + friend constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs); + friend HugeLength& operator-=(HugeLength& lhs, HugeLength rhs); }; // Literal constructors (made explicit to avoid accidental uses when @@ -100,19 +143,19 @@ inline constexpr HugeLength HLFromPages(Length pages) { kPagesPerHugePage); } -inline HugeLength &operator++(HugeLength &len) { // NOLINT(runtime/references) +inline HugeLength& operator++(HugeLength& len) { // NOLINT(runtime/references) len.n++; return len; } -inline HugePage &operator++(HugePage &p) { // NOLINT(runtime/references) - ASSERT(p.pn + 1 <= HugePage::kMaxPageNumber); +inline HugePage& operator++(HugePage& p) { // NOLINT(runtime/references) + TC_ASSERT_LE(p.pn + 1, HugePage::kMaxPageNumber); p.pn++; return p; } -inline HugeLength &operator--(HugeLength &len) { // NOLINT(runtime/references) - ASSERT(len.n >= 1); +inline HugeLength& operator--(HugeLength& len) { // NOLINT(runtime/references) + TC_ASSERT_GE(len.n, 1); len.n--; return len; } @@ -191,7 +234,7 @@ inline constexpr HugeLength operator/(HugeLength lhs, size_t rhs) { return NHugePages(lhs.n / rhs); } -inline HugeLength &operator*=(HugeLength &lhs, size_t rhs) { +inline HugeLength& operator*=(HugeLength& lhs, size_t rhs) { lhs.n *= rhs; return lhs; } @@ -203,7 +246,7 @@ inline constexpr HugeLength operator%(HugeLength lhs, HugeLength rhs) { TCMALLOC_ATTRIBUTE_CONST inline constexpr HugePage operator+(HugePage lhs, HugeLength rhs) { - ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); + TC_ASSERT_LE(lhs.pn + rhs.n, HugePage::kMaxPageNumber); return HugePage{lhs.pn + rhs.n}; } @@ -214,16 +257,16 @@ inline constexpr HugePage operator+(HugeLength lhs, HugePage rhs) { TCMALLOC_ATTRIBUTE_CONST inline constexpr HugePage operator-(HugePage lhs, HugeLength rhs) { - return ASSERT(lhs.pn >= rhs.n), HugePage{lhs.pn - rhs.n}; + return TC_ASSERT_GE(lhs.pn, rhs.n), HugePage{lhs.pn - rhs.n}; } TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator-(HugePage lhs, HugePage rhs) { - return ASSERT(lhs.pn >= rhs.pn), NHugePages(lhs.pn - rhs.pn); + return TC_ASSERT_GE(lhs.pn, rhs.pn), NHugePages(lhs.pn - rhs.pn); } -inline HugePage &operator+=(HugePage &lhs, HugeLength rhs) { - ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); +inline HugePage& operator+=(HugePage& lhs, HugeLength rhs) { + TC_ASSERT_LE(lhs.pn + rhs.n, HugePage::kMaxPageNumber); lhs.pn += rhs.n; return lhs; } @@ -233,18 +276,18 @@ inline constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs) { return NHugePages(lhs.n + rhs.n); } -inline HugeLength &operator+=(HugeLength &lhs, HugeLength rhs) { +inline HugeLength& operator+=(HugeLength& lhs, HugeLength rhs) { lhs.n += rhs.n; return lhs; } TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs) { - return ASSERT(lhs.n >= rhs.n), NHugePages(lhs.n - rhs.n); + return TC_ASSERT_GE(lhs.n, rhs.n), NHugePages(lhs.n - rhs.n); } -inline HugeLength &operator-=(HugeLength &lhs, HugeLength rhs) { - ASSERT(lhs.n >= rhs.n); +inline HugeLength& operator-=(HugeLength& lhs, HugeLength rhs) { + TC_ASSERT_GE(lhs.n, rhs.n); lhs.n -= rhs.n; return lhs; } @@ -253,7 +296,7 @@ inline bool HugeLength::overflows() const { return *this > HLFromBytes(std::numeric_limits::max()); } -inline void PrintTo(const HugeLength &n, ::std::ostream *os) { +inline void PrintTo(const HugeLength& n, ::std::ostream* os) { *os << n.raw_num() << "hps"; } @@ -263,16 +306,25 @@ inline HugePage HugePageContaining(PageId p) { } TCMALLOC_ATTRIBUTE_CONST -inline HugePage HugePageContaining(void *p) { +inline HugePage HugePageContaining(void* p) { return HugePageContaining(PageIdContaining(p)); } // A set of contiguous huge pages. struct HugeRange { - void *start_addr() const { return first.start_addr(); } - void *end_addr() const { return (first + n).start_addr(); } + HugeRange() = default; + constexpr HugeRange(HugePage p, HugeLength len) : first(p), n(len) {} + + constexpr HugeRange(const HugeRange&) = default; + constexpr HugeRange& operator=(const HugeRange&) = default; + + constexpr HugeRange(HugeRange&&) = default; + constexpr HugeRange& operator=(HugeRange&&) = default; + + void* start_addr() const { return first.start_addr(); } + void* end_addr() const { return (first + n).start_addr(); } size_t byte_len() const { - return static_cast(end_addr()) - static_cast(start_addr()); + return static_cast(end_addr()) - static_cast(start_addr()); } // Assume any range starting at 0 is bogus. @@ -285,7 +337,7 @@ struct HugeRange { HugePage operator[](HugeLength i) const { return first + i; } template - friend H AbslHashValue(H h, const HugeRange &r) { + friend H AbslHashValue(H h, const HugeRange& r) { return H::combine(std::move(h), r.start().start_addr(), r.len().raw_num()); } @@ -320,7 +372,7 @@ inline constexpr bool operator==(HugeRange lhs, HugeRange rhs) { // REQUIRES: a and b are disjoint but adjacent (in that order) inline HugeRange Join(HugeRange a, HugeRange b) { - CHECK_CONDITION(a.precedes(b)); + TC_CHECK(a.precedes(b)); return {a.start(), a.len() + b.len()}; } @@ -328,7 +380,7 @@ inline HugeRange Join(HugeRange a, HugeRange b) { // Splits r into two ranges, one of length n. The other is either the rest // of the space (if any) or Nil. inline std::pair Split(HugeRange r, HugeLength n) { - ASSERT(r.len() >= n); + TC_ASSERT_GE(r.len(), n); if (r.len() > n) { return {HugeRange::Make(r.start(), n), HugeRange::Make(r.start() + n, r.len() - n)}; diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region.h b/contrib/libs/tcmalloc/tcmalloc/huge_region.h index 0262c007b219..64ea99c65062 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_region.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_region.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,11 +20,18 @@ #include +#include "absl/base/attributes.h" #include "absl/base/internal/cycleclock.h" -#include "tcmalloc/huge_allocator.h" -#include "tcmalloc/huge_page_filler.h" +#include "absl/base/optimization.h" +#include "absl/time/time.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_subrelease.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/linked_list.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/range_tracker.h" #include "tcmalloc/pages.h" #include "tcmalloc/stats.h" @@ -31,6 +39,20 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +constexpr double kFractionToReleaseFromRegion = 0.1; +enum class HugeRegionUsageOption : bool { + // This is a default behavior. We use slack to determine when to use + // HugeRegion. When slack is greater than 64MB (to ignore small binaries), and + // greater than the number of small allocations, we allocate large allocations + // from HugeRegion. + kDefault, + // When the experiment TEST_ONLY_TCMALLOC_USE_HUGE_REGIONS_MORE_OFTEN is + // enabled, we use number of abandoned pages in addition to slack to make a + // decision. If the size of abandoned pages plus slack exceeds 64MB (to ignore + // small binaries), we use HugeRegion for large allocations. + kUseForAllLargeAllocs +}; + // Track allocations from a fixed-size multiple huge page region. // Similar to PageTracker but a few important differences: // - crosses multiple hugepages @@ -51,21 +73,23 @@ class HugeRegion : public TList::Elem { static constexpr HugeLength size() { return kRegionSize; } // REQUIRES: r.len() == size(); r unbacked. - HugeRegion(HugeRange r, MemoryModifyFunction unback); + HugeRegion(HugeRange r, + MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND); HugeRegion() = delete; // If available, return a range of n free pages, setting *from_released = // true iff the returned range is currently unbacked. // Returns false if no range available. - bool MaybeGet(Length n, PageId *p, bool *from_released); + bool MaybeGet(Length n, PageId* p, bool* from_released); - // Return [p, p + n) for new allocations. + // Return r for new allocations. // If release=true, release any hugepages made empty as a result. - // REQUIRES: [p, p + n) was the result of a previous MaybeGet. - void Put(PageId p, Length n, bool release); + // REQUIRES: Range{p, n} was the result of a previous MaybeGet. + void Put(Range r, bool release); - // Release any hugepages that are unused but backed. - HugeLength Release(); + // Release numbae of pages from free-and-backed hugepages from + // region. + HugeLength Release(Length desired); // Is p located in this region? bool contains(PageId p) { return location_.contains(p); } @@ -77,57 +101,53 @@ class HugeRegion : public TList::Elem { } Length unmapped_pages() const { return (size() - nbacked_).in_pages(); } - void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; HugeLength backed() const; - void Print(Printer *out) const; - void PrintInPbtxt(PbtxtRegion *detail) const; + // Returns the number of hugepages that have been fully free (i.e. no + // allocated pages on them), but are backed. We release hugepages lazily when + // huge-regions-more-often feature is enabled. + HugeLength free_backed() const; + + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& detail) const; BackingStats stats() const; // We don't define this as operator< because it's a rather specialized order. - bool BetterToAllocThan(const HugeRegion *rhs) const { + bool BetterToAllocThan(const HugeRegion* rhs) const { return longest_free() < rhs->longest_free(); } - void prepend_it(HugeRegion *other) { this->prepend(other); } + void prepend_it(HugeRegion* other) { this->prepend(other); } - void append_it(HugeRegion *other) { this->append(other); } + void append_it(HugeRegion* other) { this->append(other); } private: RangeTracker tracker_; HugeRange location_; - static int64_t AverageWhens(Length a, int64_t a_when, Length b, - int64_t b_when) { - const double aw = static_cast(a.raw_num()) * a_when; - const double bw = static_cast(b.raw_num()) * b_when; - return static_cast((aw + bw) / (a.raw_num() + b.raw_num())); - } - Length longest_free() const { return Length(tracker_.longest_free()); } - // Adjust counts of allocs-per-hugepage for [p, p + n) being added/removed. + // Adjust counts of allocs-per-hugepage for r being added/removed. - // *from_released is set to true iff [p, p + n) is currently unbacked - void Inc(PageId p, Length n, bool *from_released); + // *from_released is set to true iff r is currently unbacked + void Inc(Range r, bool* from_released); // If release is true, unback any hugepage that becomes empty. - void Dec(PageId p, Length n, bool release); + void Dec(Range r, bool release); - void UnbackHugepages(bool should[kNumHugePages]); + HugeLength UnbackHugepages(bool should_unback[kNumHugePages]); // How many pages are used in each hugepage? Length pages_used_[kNumHugePages]; // Is this hugepage backed? bool backed_[kNumHugePages]; HugeLength nbacked_; - int64_t whens_[kNumHugePages]; HugeLength total_unbacked_{NHugePages(0)}; - MemoryModifyFunction unback_; + MemoryModifyFunction& unback_; }; // Manage a set of regions from which we allocate. @@ -135,31 +155,61 @@ class HugeRegion : public TList::Elem { template class HugeRegionSet { public: - HugeRegionSet() : n_(0) {} + // For testing with mock clock. + HugeRegionSet(HugeRegionUsageOption use_huge_region_more_often, Clock clock) + : n_(0), + use_huge_region_more_often_(use_huge_region_more_often), + regionstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {} + + explicit HugeRegionSet(HugeRegionUsageOption use_huge_region_more_often) + : HugeRegionSet( + use_huge_region_more_often, + Clock{.now = absl::base_internal::CycleClock::Now, + .freq = absl::base_internal::CycleClock::Frequency}) {} // If available, return a range of n free pages, setting *from_released = // true iff the returned range is currently unbacked. // Returns false if no range available. - bool MaybeGet(Length n, PageId *page, bool *from_released); + bool MaybeGet(Length n, PageId* page, bool* from_released); // Return an allocation to a region (if one matches!) - bool MaybePut(PageId p, Length n); + bool MaybePut(Range r); // Add region to the set. - void Contribute(Region *region); - - // Unback any totally unused hugepages; return the number of pages - // we managed to release. - HugeLength Release(); - - void Print(Printer *out) const; - void PrintInPbtxt(PbtxtRegion *hpaa) const; - void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, - PageAgeHistograms *ages) const; + void Contribute(Region* region); + + // Tries to release up to number of pages from fully-free but backed + // hugepages in HugeRegions. defines the skip-subrelease + // intervals, but unlike HugePageFiller skip-subrelease, it only releases free + // hugepages. + // Releases all free and backed hugepages to system when is set to + // true. Else, it uses intervals to determine recent demand as seen by + // HugeRegions to compute realized fragmentation. It may only release as much + // memory in free pages as determined by the realized fragmentation. + // Returns the number of pages actually released. + Length ReleasePagesByPeakDemand(Length desired, + SkipSubreleaseIntervals intervals, + bool hit_limit); + + // Release hugepages that are unused but backed. + // Releases up to times number of free-but-backed hugepages + // from each huge region. Note that this clamps release_fraction between 0 and + // 1 if a fraction outside those bounds is specified. + Length ReleasePages(double release_fraction); + + void Print(Printer& out) const; + void PrintInPbtxt(PbtxtRegion& hpaa) const; + void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large) const; BackingStats stats() const; + HugeLength free_backed() const; + size_t ActiveRegions() const; + bool UseHugeRegionMoreOften() const { + return use_huge_region_more_often_ == + HugeRegionUsageOption::kUseForAllLargeAllocs; + } private: - void Fix(Region *r) { + void Fix(Region* r) { // We've changed r's fragmentation--move it through the list to the // correct home (if needed). Rise(r); @@ -167,7 +217,7 @@ class HugeRegionSet { } // Check if r has to move forward in the list. - void Rise(Region *r) { + void Rise(Region* r) { auto prev = list_.at(r); --prev; if (prev == list_.end()) return; // we're at the front @@ -183,7 +233,7 @@ class HugeRegionSet { } // Check if r has to move backward in the list. - void Fall(Region *r) { + void Fall(Region* r) { auto next = list_.at(r); ++next; if (next == list_.end()) return; // we're at the back @@ -199,8 +249,8 @@ class HugeRegionSet { } // Add r in its sorted place. - void AddToList(Region *r) { - for (Region *curr : list_) { + void AddToList(Region* r) { + for (Region* curr : list_) { if (r->BetterToAllocThan(curr)) { curr->prepend_it(r); return; @@ -211,65 +261,123 @@ class HugeRegionSet { list_.append(r); } + using StatsTrackerType = SubreleaseStatsTracker<600>; + StatsTrackerType::SubreleaseStats GetSubreleaseStats() const { + StatsTrackerType::SubreleaseStats stats; + for (Region* region : list_) { + stats.num_pages += region->used_pages(); + stats.free_pages += region->free_pages(); + stats.unmapped_pages += region->unmapped_pages(); + stats.huge_pages[StatsTrackerType::kRegular] += region->size(); + } + stats.num_pages_subreleased = subrelease_stats_.num_pages_subreleased; + return stats; + } + + Length used_pages() const { + Length used; + for (Region* region : list_) { + used += region->used_pages(); + } + return used; + } + + Length free_pages() const { + Length free; + for (Region* region : list_) { + free += region->free_pages(); + } + return free; + } + + HugeLength size() const { + HugeLength size; + for (Region* region : list_) { + size += region->size(); + } + return size; + } + size_t n_; + HugeRegionUsageOption use_huge_region_more_often_; // Sorted by longest_free increasing. TList list_; + + // Computes the recent demand to compute the number of pages that may be + // released. determines an upper-bound on the number of pages to + // release. + // Returns number of pages that may be released based on recent demand. + Length GetDesiredReleasablePages(Length desired, + SkipSubreleaseIntervals intervals); + + // Functionality related to tracking demand. + void UpdateStatsTracker(); + StatsTrackerType regionstats_tracker_; + SubreleaseStats subrelease_stats_; }; // REQUIRES: r.len() == size(); r unbacked. -inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback) +inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction& unback) : tracker_{}, location_(r), pages_used_{}, backed_{}, nbacked_(NHugePages(0)), unback_(unback) { - int64_t now = absl::base_internal::CycleClock::Now(); for (int i = 0; i < kNumHugePages; ++i) { - whens_[i] = now; // These are already 0 but for clarity... pages_used_[i] = Length(0); backed_[i] = false; } } -inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) { +inline bool HugeRegion::MaybeGet(Length n, PageId* p, bool* from_released) { if (n > longest_free()) return false; + TC_ASSERT_GT(n, Length(0)); auto index = Length(tracker_.FindAndMark(n.raw_num())); PageId page = location_.start().first_page() + index; *p = page; // the last hugepage we touch - Inc(page, n, from_released); + Inc(Range{page, n}, from_released); return true; } // If release=true, release any hugepages made empty as a result. -inline void HugeRegion::Put(PageId p, Length n, bool release) { - Length index = p - location_.start().first_page(); - tracker_.Unmark(index.raw_num(), n.raw_num()); +inline void HugeRegion::Put(Range r, bool release) { + Length index = r.p - location_.start().first_page(); + tracker_.Unmark(index.raw_num(), r.n.raw_num()); - Dec(p, n, release); + Dec(r, release); } -// Release any hugepages that are unused but backed. -inline HugeLength HugeRegion::Release() { - HugeLength r = NHugePages(0); - bool should_unback_[kNumHugePages] = {}; +// Release hugepages that are unused but backed. +// TODO(b/199203282): We release pages, rounded up to a hugepage, from +// free but backed hugepages from the region. We can explore a more +// sophisticated mechanism similar to Filler/Cache, that accounts for a recent +// peak while releasing pages. +inline HugeLength HugeRegion::Release(Length desired) { + if (desired == Length(0)) return NHugePages(0); + + const Length free_yet_backed = free_backed().in_pages(); + const Length to_release = std::min(desired, free_yet_backed); + + HugeLength release_target = NHugePages(0); + bool should_unback[kNumHugePages] = {}; for (size_t i = 0; i < kNumHugePages; ++i) { if (backed_[i] && pages_used_[i] == Length(0)) { - should_unback_[i] = true; - ++r; + should_unback[i] = true; + ++release_target; } + + if (release_target.in_pages() >= to_release) break; } - UnbackHugepages(should_unback_); - return r; + return UnbackHugepages(should_unback); } -inline void HugeRegion::AddSpanStats(SmallSpanStats *small, - LargeSpanStats *large, - PageAgeHistograms *ages) const { +inline void HugeRegion::AddSpanStats(SmallSpanStats* small, + LargeSpanStats* large) const { size_t index = 0, n; Length f, u; // This is complicated a bit by the backed/unbacked status of pages. @@ -283,16 +391,14 @@ inline void HugeRegion::AddSpanStats(SmallSpanStats *small, size_t i = (hp - location_.start()) / NHugePages(1); const bool backed = backed_[i]; Length truncated; - int64_t when = 0; while (n > 0 && backed_[i] == backed) { const PageId lim = (location_.start() + NHugePages(i + 1)).first_page(); Length here = std::min(Length(n), lim - p); - when = AverageWhens(truncated, when, here, whens_[i]); truncated += here; n -= here.raw_num(); p += here; i++; - ASSERT(i < kNumHugePages || n == 0); + TC_ASSERT(i < kNumHugePages || n == 0); } n = truncated.raw_num(); const bool released = !backed; @@ -320,13 +426,20 @@ inline void HugeRegion::AddSpanStats(SmallSpanStats *small, } } - if (ages != nullptr) { - ages->RecordRange(Length(n), released, when); - } index += n; } - CHECK_CONDITION(f == free_pages()); - CHECK_CONDITION(u == unmapped_pages()); + TC_CHECK_EQ(f, free_pages()); + TC_CHECK_EQ(u, unmapped_pages()); +} + +inline HugeLength HugeRegion::free_backed() const { + HugeLength r = NHugePages(0); + for (size_t i = 0; i < kNumHugePages; ++i) { + if (backed_[i] && pages_used_[i] == Length(0)) { + ++r; + } + } + return r; } inline HugeLength HugeRegion::backed() const { @@ -340,13 +453,13 @@ inline HugeLength HugeRegion::backed() const { return b; } -inline void HugeRegion::Print(Printer *out) const { +inline void HugeRegion::Print(Printer& out) const { const size_t kib_used = used_pages().in_bytes() / 1024; const size_t kib_free = free_pages().in_bytes() / 1024; const size_t kib_longest_free = longest_free().in_bytes() / 1024; const HugeLength unbacked = size() - backed(); const size_t mib_unbacked = unbacked.in_mib(); - out->printf( + out.printf( "HugeRegion: %zu KiB used, %zu KiB free, " "%zu KiB contiguous space, %zu MiB unbacked, " "%zu MiB unbacked lifetime\n", @@ -354,13 +467,14 @@ inline void HugeRegion::Print(Printer *out) const { total_unbacked_.in_bytes() / 1024 / 1024); } -inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const { - detail->PrintI64("used_bytes", used_pages().in_bytes()); - detail->PrintI64("free_bytes", free_pages().in_bytes()); - detail->PrintI64("longest_free_range_bytes", longest_free().in_bytes()); +inline void HugeRegion::PrintInPbtxt(PbtxtRegion& detail) const { + detail.PrintI64("used_bytes", used_pages().in_bytes()); + detail.PrintI64("free_bytes", free_pages().in_bytes()); + detail.PrintI64("longest_free_range_bytes", longest_free().in_bytes()); const HugeLength unbacked = size() - backed(); - detail->PrintI64("unbacked_bytes", unbacked.in_bytes()); - detail->PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes()); + detail.PrintI64("unbacked_bytes", unbacked.in_bytes()); + detail.PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes()); + detail.PrintI64("backed_fully_free_bytes", free_backed().in_bytes()); } inline BackingStats HugeRegion::stats() const { @@ -371,86 +485,156 @@ inline BackingStats HugeRegion::stats() const { return s; } -inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) { +inline void HugeRegion::Inc(Range r, bool* from_released) { bool should_back = false; - const int64_t now = absl::base_internal::CycleClock::Now(); - while (n > Length(0)) { - const HugePage hp = HugePageContaining(p); + while (r.n > Length(0)) { + const HugePage hp = HugePageContaining(r.p); const size_t i = (hp - location_.start()) / NHugePages(1); const PageId lim = (hp + NHugePages(1)).first_page(); - Length here = std::min(n, lim - p); + Length here = std::min(r.n, lim - r.p); if (pages_used_[i] == Length(0) && !backed_[i]) { backed_[i] = true; should_back = true; ++nbacked_; - whens_[i] = now; } pages_used_[i] += here; - ASSERT(pages_used_[i] <= kPagesPerHugePage); - p += here; - n -= here; + TC_ASSERT_LE(pages_used_[i], kPagesPerHugePage); + r.p += here; + r.n -= here; } *from_released = should_back; } -inline void HugeRegion::Dec(PageId p, Length n, bool release) { - const int64_t now = absl::base_internal::CycleClock::Now(); - bool should_unback_[kNumHugePages] = {}; - while (n > Length(0)) { - const HugePage hp = HugePageContaining(p); +inline void HugeRegion::Dec(Range r, bool release) { + bool should_unback[kNumHugePages] = {}; + while (r.n > Length(0)) { + const HugePage hp = HugePageContaining(r.p); const size_t i = (hp - location_.start()) / NHugePages(1); const PageId lim = (hp + NHugePages(1)).first_page(); - Length here = std::min(n, lim - p); - ASSERT(here > Length(0)); - ASSERT(pages_used_[i] >= here); - ASSERT(backed_[i]); - whens_[i] = - AverageWhens(here, now, kPagesPerHugePage - pages_used_[i], whens_[i]); + Length here = std::min(r.n, lim - r.p); + TC_ASSERT_GT(here, Length(0)); + TC_ASSERT_GE(pages_used_[i], here); + TC_ASSERT(backed_[i]); pages_used_[i] -= here; if (pages_used_[i] == Length(0)) { - should_unback_[i] = true; + should_unback[i] = true; } - p += here; - n -= here; + r.p += here; + r.n -= here; } if (release) { - UnbackHugepages(should_unback_); + UnbackHugepages(should_unback); } } -inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) { - const int64_t now = absl::base_internal::CycleClock::Now(); +inline HugeLength HugeRegion::UnbackHugepages( + bool should_unback[kNumHugePages]) { + HugeLength released = NHugePages(0); size_t i = 0; while (i < kNumHugePages) { - if (!should[i]) { + if (!should_unback[i]) { i++; continue; } size_t j = i; - while (j < kNumHugePages && should[j]) { - backed_[j] = false; - whens_[j] = now; + while (j < kNumHugePages && should_unback[j]) { j++; } HugeLength hl = NHugePages(j - i); - nbacked_ -= hl; HugePage p = location_.start() + NHugePages(i); - unback_(p.start_addr(), hl.in_bytes()); - total_unbacked_ += hl; + if (ABSL_PREDICT_TRUE(unback_(HugeRange(p, hl)))) { + nbacked_ -= hl; + total_unbacked_ += hl; + + for (size_t k = i; k < j; k++) { + TC_ASSERT(should_unback[k]); + backed_[k] = false; + } + + released += hl; + } i = j; } + + return released; +} + +template +inline Length HugeRegionSet::GetDesiredReleasablePages( + Length desired, SkipSubreleaseIntervals intervals) { + if (!intervals.SkipSubreleaseEnabled()) { + return desired; + } + UpdateStatsTracker(); + + Length required_pages; + required_pages = regionstats_tracker_.GetRecentDemand( + intervals.short_interval, intervals.long_interval); + + Length current_pages = used_pages() + free_pages(); + + if (required_pages != Length(0)) { + Length new_desired; + if (required_pages < current_pages) { + new_desired = current_pages - required_pages; + } + + // Because we currently release pages from fully backed and free hugepages, + // make sure that the realized fragmentation in HugeRegion is at least equal + // to kPagesPerHugePage. Otherwise, return zero to make sure we do not + // release any pages. + if (new_desired < kPagesPerHugePage) { + new_desired = Length(0); + } + + if (new_desired >= desired) { + return desired; + } + + // Compute the number of releasable pages from HugeRegion. We do not + // subrelease pages yet. Instead, we only release hugepages that are fully + // free but backed. Note: the remaining target should always be smaller or + // equal to the number of free pages according to the mechanism (recent peak + // is always larger or equal to current used_pages), however, we still + // calculate allowed release using the minimum of the two to avoid relying + // on that assumption. + Length free_backed_pages = free_backed().in_pages(); + Length releasable_pages = std::min(free_backed_pages, new_desired); + + // Reports the amount of memory that we didn't release due to this + // mechanism, but never more than skipped free pages. In other words, + // skipped_pages is zero if all free pages are allowed to be released by + // this mechanism. Note, only free pages in the smaller of the two + // (current_pages and required_pages) are skipped, the rest are allowed to + // be subreleased. + Length skipped_pages = std::min((free_backed_pages - releasable_pages), + (desired - new_desired)); + + regionstats_tracker_.ReportSkippedSubreleasePages( + skipped_pages, std::min(current_pages, required_pages)); + return new_desired; + } + + return desired; +} + +template +inline void HugeRegionSet::UpdateStatsTracker() { + regionstats_tracker_.Report(GetSubreleaseStats()); + subrelease_stats_.reset(); } // If available, return a range of n free pages, setting *from_released = // true iff the returned range is currently unbacked. // Returns false if no range available. template -inline bool HugeRegionSet::MaybeGet(Length n, PageId *page, - bool *from_released) { - for (Region *region : list_) { +inline bool HugeRegionSet::MaybeGet(Length n, PageId* page, + bool* from_released) { + for (Region* region : list_) { if (region->MaybeGet(n, page, from_released)) { Fix(region); + UpdateStatsTracker(); return true; } } @@ -459,11 +643,16 @@ inline bool HugeRegionSet::MaybeGet(Length n, PageId *page, // Return an allocation to a region (if one matches!) template -inline bool HugeRegionSet::MaybePut(PageId p, Length n) { - for (Region *region : list_) { - if (region->contains(p)) { - region->Put(p, n, true); +inline bool HugeRegionSet::MaybePut(Range r) { + // When HugeRegionMoreOften experiment is enabled, we do not release + // free-but-backed hugepages when we deallocate pages, but we do that + // periodically on the background thread. + const bool release = !UseHugeRegionMoreOften(); + for (Region* region : list_) { + if (region->contains(r.p)) { + region->Put(r, release); Fix(region); + UpdateStatsTracker(); return true; } } @@ -473,77 +662,164 @@ inline bool HugeRegionSet::MaybePut(PageId p, Length n) { // Add region to the set. template -inline void HugeRegionSet::Contribute(Region *region) { +inline void HugeRegionSet::Contribute(Region* region) { n_++; AddToList(region); + UpdateStatsTracker(); } -// Unback any totally unused hugepages; return the number of pages -// we managed to release. template -inline HugeLength HugeRegionSet::Release() { - HugeLength hl = NHugePages(0); - for (Region *region : list_) { - hl += region->Release(); +inline Length HugeRegionSet::ReleasePagesByPeakDemand( + Length desired, SkipSubreleaseIntervals intervals, bool hit_limit) { + // Because we are releasing fully-freed hugepages, in cases when malloc + // release rate is set to zero, we would still want to release some pages, + // provided it is allowed by the demand-based release strategy. We try to + // release up to 10% of the free and backed hugepages. + if (!hit_limit && desired == Length(0)) { + size_t new_desired = + kFractionToReleaseFromRegion * free_backed().in_pages().raw_num(); + desired = Length(new_desired); + } + + // Only reduce desired if skip subrelease is on. + // + // Additionally, if we hit the limit, we should not be applying skip + // subrelease. OOM may be imminent. + if (intervals.SkipSubreleaseEnabled() && !hit_limit) { + desired = GetDesiredReleasablePages(desired, intervals); } - return hl; + subrelease_stats_.set_limit_hit(hit_limit); + + Length released; + if (desired != Length(0)) { + for (Region* region : list_) { + released += region->Release(desired - released).in_pages(); + if (released >= desired) break; + } + } + + subrelease_stats_.num_pages_subreleased += released; + + // Keep separate stats if the on going release is triggered by reaching + // tcmalloc limit. + if (subrelease_stats_.limit_hit()) { + subrelease_stats_.total_pages_subreleased_due_to_limit += released; + } + + return released; +} + +template +inline Length HugeRegionSet::ReleasePages(double release_fraction) { + const Length free_yet_backed = free_backed().in_pages(); + const size_t to_release = + free_yet_backed.raw_num() * std::clamp(release_fraction, 0, 1); + const Length to_release_pages = Length(to_release); + + Length released; + for (Region* region : list_) { + released += region->Release(to_release_pages - released).in_pages(); + if (released >= to_release_pages) return released; + } + return released; } template -inline void HugeRegionSet::Print(Printer *out) const { - out->printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n", - Region::size().in_bytes() / 1024 / 1024); - out->printf("HugeRegionSet: %zu total regions\n", n_); +inline void HugeRegionSet::Print(Printer& out) const { + out.printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n", + Region::size().in_bytes() / 1024 / 1024); + out.printf("HugeRegionSet: %zu total regions\n", n_); Length total_free; HugeLength total_backed = NHugePages(0); + HugeLength total_free_backed = NHugePages(0); - for (Region *region : list_) { + for (Region* region : list_) { region->Print(out); total_free += region->free_pages(); total_backed += region->backed(); + total_free_backed += region->free_backed(); } - out->printf("HugeRegionSet: %zu hugepages backed out of %zu total\n", - total_backed.raw_num(), Region::size().raw_num() * n_); + out.printf( + "HugeRegionSet: %zu hugepages backed, %zu backed and free, " + "out of %zu total\n", + total_backed.raw_num(), total_free_backed.raw_num(), + Region::size().raw_num() * n_); const Length in_pages = total_backed.in_pages(); - out->printf("HugeRegionSet: %zu pages free in backed region, %.4f free\n", - total_free.raw_num(), - in_pages > Length(0) ? static_cast(total_free.raw_num()) / - static_cast(in_pages.raw_num()) - : 0.0); + out.printf("HugeRegionSet: %zu pages free in backed region, %.4f free\n", + total_free.raw_num(), + in_pages > Length(0) ? static_cast(total_free.raw_num()) / + static_cast(in_pages.raw_num()) + : 0.0); + + // Subrelease telemetry. + out.printf( + "HugeRegion: Since startup, %zu pages subreleased, %zu hugepages " + "broken, (%zu pages, %zu hugepages due to reaching tcmalloc limit)\n", + subrelease_stats_.total_pages_subreleased.raw_num(), + subrelease_stats_.total_hugepages_broken.raw_num(), + subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num(), + subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num()); + + regionstats_tracker_.Print(out, "HugeRegion"); } template -inline void HugeRegionSet::PrintInPbtxt(PbtxtRegion *hpaa) const { - hpaa->PrintI64("min_huge_region_alloc_size", 1024 * 1024); - hpaa->PrintI64("huge_region_size", Region::size().in_bytes()); - for (Region *region : list_) { - auto detail = hpaa->CreateSubRegion("huge_region_details"); - region->PrintInPbtxt(&detail); +inline void HugeRegionSet::PrintInPbtxt(PbtxtRegion& hpaa) const { + hpaa.PrintI64("min_huge_region_alloc_size", 1024 * 1024); + hpaa.PrintI64("huge_region_size", Region::size().in_bytes()); + for (Region* region : list_) { + auto detail = hpaa.CreateSubRegion("huge_region_details"); + region->PrintInPbtxt(detail); } + + hpaa.PrintI64("region_num_pages_subreleased", + subrelease_stats_.total_pages_subreleased.raw_num()); + hpaa.PrintI64( + "region_num_pages_subreleased_due_to_limit", + subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num()); + + regionstats_tracker_.PrintSubreleaseStatsInPbtxt(hpaa, + "region_skipped_subrelease"); + regionstats_tracker_.PrintTimeseriesStatsInPbtxt(hpaa, + "region_stats_timeseries"); } template -inline void HugeRegionSet::AddSpanStats(SmallSpanStats *small, - LargeSpanStats *large, - PageAgeHistograms *ages) const { - for (Region *region : list_) { - region->AddSpanStats(small, large, ages); +inline void HugeRegionSet::AddSpanStats(SmallSpanStats* small, + LargeSpanStats* large) const { + for (Region* region : list_) { + region->AddSpanStats(small, large); } } +template +inline size_t HugeRegionSet::ActiveRegions() const { + return n_; +} + template inline BackingStats HugeRegionSet::stats() const { BackingStats stats; - for (Region *region : list_) { + for (Region* region : list_) { stats += region->stats(); } return stats; } +template +inline HugeLength HugeRegionSet::free_backed() const { + HugeLength pages; + for (Region* region : list_) { + pages += region->free_backed(); + } + + return pages; +} + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/huge_region_fuzz.cc new file mode 100644 index 000000000000..5a299e20cc81 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/huge_region_fuzz.cc @@ -0,0 +1,262 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "fuzztest/fuzztest.h" +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +class MockUnback final : public MemoryModifyFunction { + public: + [[nodiscard]] bool operator()(Range r) override { + release_callback_(); + + if (!unback_success_) { + return false; + } + + PageId end = r.p + r.n; + for (; r.p != end; ++r.p) { + released_.insert(r.p); + } + + return true; + } + + absl::flat_hash_set released_; + bool unback_success_ = true; + std::function release_callback_; +}; + +void FuzzRegion(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); + if (size < 4) { + return; + } + // data[0][0] - Simulate reentrancy from release. + // data[1...3] - Reserved + // + // TODO(b/271282540): Convert these to strongly typed fuzztest parameters. + const bool reentrant_release = data[0] & 0x1; + + data += 4; + size -= 4; + + const HugePage start = + HugePageContaining(reinterpret_cast(0x1faced200000)); + MockUnback unback; + HugeRegion region({start, region.size()}, unback); + + unback.released_.reserve(region.size().in_pages().raw_num()); + for (PageId p = start.first_page(), end = p + region.size().in_pages(); + p != end; ++p) { + unback.released_.insert(p); + } + + std::vector allocs; + std::vector> reentrant; + + std::string output; + output.resize(1 << 20); + + auto run_dsl = [&](const char* data, size_t size) { + for (size_t i = 0; i + 9 <= size; i += 9) { + const uint8_t op = data[i]; + uint64_t value; + memcpy(&value, &data[i + 1], sizeof(value)); + + switch (op & 0x7) { + case 0: { + // Allocate. + // + // value[0:17] - Length to allocate + const Length n = Length(std::max(value & ((1 << 18) - 1), 1)); + PageId p; + bool from_released; + if (!region.MaybeGet(n, &p, &from_released)) { + continue; + } + + allocs.emplace_back(p, n); + + if (from_released) { + bool did_release = false; + + for (PageId q = p, end = p + n; q != end; ++q) { + auto it = unback.released_.find(q); + if (it != unback.released_.end()) { + unback.released_.erase(it); + did_release = true; + } + } + + CHECK(did_release); + } + + break; + } + case 1: { + // Deallocate. + // + // value[0:17] - Index of allocs to remove. + // value[18] - Release + if (allocs.empty()) { + continue; + } + + int index = value & ((1 << 18) - 1); + const bool release = (value >> 18) & 0x1; + index %= allocs.size(); + + auto alloc = allocs[index]; + using std::swap; + swap(allocs[index], allocs.back()); + allocs.resize(allocs.size() - 1); + + region.Put(alloc, release); + break; + } + case 2: { + // Release + // value[0:17] - Length to release. + const Length len = Length(value & ((1 << 18) - 1)); + const HugeLength max_expected = + std::min(region.free_backed(), HLFromPages(len)); + + const HugeLength actual = region.Release(len); + if (unback.unback_success_) { + if (max_expected > NHugePages(0) && len > Length(0)) { + TC_CHECK_GT(actual, NHugePages(0)); + } + TC_CHECK_LE(actual, max_expected); + } else { + TC_CHECK_EQ(actual, NHugePages(0)); + } + break; + } + case 3: { + // Stats + region.stats(); + SmallSpanStats small; + LargeSpanStats large; + region.AddSpanStats(&small, &large); + break; + } + case 4: { + // Toggle + unback.unback_success_ = !unback.unback_success_; + break; + } + case 5: { + // Not quite a runtime parameter: Interpret value as a subprogram + // in our dsl. + size_t subprogram = std::min(size - i - 9, value); + if (subprogram < 9) { + break; + } + reentrant.emplace_back(data + i + 9, subprogram); + i += size; + break; + } + case 6: { + // Gather stats in pbtxt format. + // + // value is unused. + Printer p(&output[0], output.size()); + { + PbtxtRegion r(p, kTop); + region.PrintInPbtxt(r); + } + CHECK_LE(p.SpaceRequired(), output.size()); + break; + } + case 7: { + // Print stats. + // + // value is unused. + Printer p(&output[0], output.size()); + region.Print(p); + break; + } + } + } + }; + + unback.release_callback_ = [&]() { + if (!reentrant_release) { + return; + } + + if (reentrant.empty()) { + return; + } + + ABSL_CONST_INIT static int depth = 0; + if (depth >= 5) { + return; + } + + auto [data, size] = reentrant.back(); + reentrant.pop_back(); + + depth++; + run_dsl(data, size); + depth--; + }; + + run_dsl(data, size); + + // Stop recursing, since region.Put below might cause us to "release" + // more pages to the system. + reentrant.clear(); + + for (const auto& alloc : allocs) { + region.Put(alloc, false); + } +} + +FUZZ_TEST(HugeRegionTest, FuzzRegion) + ; + +TEST(HugeRegionTest, b339521569) { + FuzzRegion(std::string( + "L\220\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + "\000\000\000\000\000\301\233", + 115)); +} + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc index 4370b9276249..9565e10666cc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc @@ -14,65 +14,72 @@ #include "tcmalloc/huge_region.h" +#include +#include #include -#include #include +#include +#include #include +#include +#include #include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/thread_annotations.h" +#include "absl/container/fixed_array.h" +#include "absl/memory/memory.h" #include "absl/random/random.h" +#include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_subrelease.h" #include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/clock.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" #include "tcmalloc/stats.h" +#include "tcmalloc/testing/thread_manager.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { using testing::NiceMock; -using testing::StrictMock; +using testing::Return; class HugeRegionTest : public ::testing::Test { protected: HugeRegionTest() - : // an unlikely magic page - p_(HugePageContaining(reinterpret_cast(0x1faced200000))), - region_({p_, region_.size()}, MockUnback) { + : mock_(std::make_unique>()), + // an unlikely magic page + p_(HugePageContaining(reinterpret_cast(0x1faced200000))), + region_({p_, region_.size()}, *mock_) { // we usually don't care about backing calls, unless testing that // specifically. - mock_ = absl::make_unique>(); } ~HugeRegionTest() override { mock_.reset(nullptr); } - // This is wordy, but necessary for mocking: - class BackingInterface { + class MockBackingInterface : public MemoryModifyFunction { public: - virtual void Unback(void *p, size_t len) = 0; - virtual ~BackingInterface() {} - }; + MOCK_METHOD(bool, Unback, (PageId p, Length len), ()); - class MockBackingInterface : public BackingInterface { - public: - MOCK_METHOD2(Unback, void(void *p, size_t len)); + bool operator()(Range r) override { return Unback(r.p, r.n); } }; - static std::unique_ptr mock_; - - static void MockUnback(void *p, size_t len) { mock_->Unback(p, len); } + std::unique_ptr mock_; void CheckMock() { testing::Mock::VerifyAndClearExpectations(mock_.get()); } - void ExpectUnback(HugeRange r) { - void *ptr = r.start_addr(); - size_t bytes = r.byte_len(); - EXPECT_CALL(*mock_, Unback(ptr, bytes)).Times(1); + void ExpectUnback(HugeRange r, bool success = true) { + EXPECT_CALL(*mock_, Unback(r.start().first_page(), r.len().in_pages())) + .WillOnce(Return(success)); } struct Alloc { @@ -112,9 +119,9 @@ class HugeRegionTest : public ::testing::Test { return Allocate(n, &from_released); } - Alloc Allocate(Length n, bool *from_released) { + Alloc Allocate(Length n, bool* from_released) { Alloc ret; - CHECK_CONDITION(region_.MaybeGet(n, &ret.p, from_released)); + TC_CHECK(region_.MaybeGet(n, &ret.p, from_released)); ret.n = n; ret.mark = ++next_mark_; Mark(ret); @@ -123,17 +130,15 @@ class HugeRegionTest : public ::testing::Test { void Delete(Alloc a) { Check(a); - region_.Put(a.p, a.n, false); + region_.Put(Range(a.p, a.n), false); } void DeleteUnback(Alloc a) { Check(a); - region_.Put(a.p, a.n, true); + region_.Put(Range(a.p, a.n), true); } }; -std::unique_ptr HugeRegionTest::mock_; - TEST_F(HugeRegionTest, Basic) { Length total; std::vector allocs; @@ -195,8 +200,31 @@ TEST_F(HugeRegionTest, ReqsBacking) { } } +TEST_F(HugeRegionTest, ReleaseFrac) { + const Length n = kPagesPerHugePage; + bool from_released; + auto a = Allocate(n * 20, &from_released); + EXPECT_TRUE(from_released); + + Delete(a); + ExpectUnback({p_ + NHugePages(0), NHugePages(2)}); + EXPECT_EQ(NHugePages(2), region_.Release(NHugePages(2).in_pages())); + CheckMock(); + + ExpectUnback({p_ + NHugePages(2), NHugePages(1)}); + EXPECT_EQ(NHugePages(1), region_.Release(NHugePages(1).in_pages())); + CheckMock(); + + ExpectUnback({p_ + NHugePages(3), NHugePages(8)}); + EXPECT_EQ(NHugePages(8), region_.Release(NHugePages(8).in_pages())); + CheckMock(); + + ExpectUnback({p_ + NHugePages(11), NHugePages(9)}); + EXPECT_EQ(NHugePages(9), region_.Release(NHugePages(9).in_pages())); + CheckMock(); +} + TEST_F(HugeRegionTest, Release) { - mock_ = absl::make_unique>(); const Length n = kPagesPerHugePage; bool from_released; auto a = Allocate(n * 4 - Length(1), &from_released); @@ -220,18 +248,18 @@ TEST_F(HugeRegionTest, Release) { // overlap with others. Delete(b); ExpectUnback({p_ + NHugePages(4), NHugePages(2)}); - EXPECT_EQ(NHugePages(2), region_.Release()); + EXPECT_EQ(NHugePages(2), region_.Release(NHugePages(2).in_pages())); CheckMock(); // Now we're on exact boundaries so we should unback the whole range. Delete(d); ExpectUnback({p_ + NHugePages(12), NHugePages(2)}); - EXPECT_EQ(NHugePages(2), region_.Release()); + EXPECT_EQ(NHugePages(2), region_.Release(NHugePages(2).in_pages())); CheckMock(); Delete(a); ExpectUnback({p_ + NHugePages(0), NHugePages(4)}); - EXPECT_EQ(NHugePages(4), region_.Release()); + EXPECT_EQ(NHugePages(4), region_.Release(NHugePages(4).in_pages())); CheckMock(); // Should work just as well with aggressive Put(): @@ -247,8 +275,30 @@ TEST_F(HugeRegionTest, Release) { CheckMock(); } +TEST_F(HugeRegionTest, ReleaseFailure) { + const Length n = kPagesPerHugePage; + bool from_released; + auto a = Allocate(n * 4 - Length(1), &from_released); + EXPECT_TRUE(from_released); + EXPECT_EQ(NHugePages(4), region_.backed()); + + // Don't unback the first or last hugepage this touches -- since they + // overlap with others. + Delete(a); + ExpectUnback({p_, NHugePages(4)}, false); + EXPECT_EQ(NHugePages(0), region_.Release(NHugePages(4).in_pages())); + EXPECT_EQ(NHugePages(4), region_.backed()); + CheckMock(); + + // Reallocate. + a = Allocate(n * 4 - Length(1), &from_released); + EXPECT_FALSE(from_released); + Delete(a); + + EXPECT_EQ(NHugePages(4), region_.backed()); +} + TEST_F(HugeRegionTest, Reback) { - mock_ = absl::make_unique>(); const Length n = kPagesPerHugePage / 4; bool from_released; // Even in back/unback cycles we should still call the functions @@ -275,18 +325,151 @@ TEST_F(HugeRegionTest, Reback) { } } +class MemorySimulation final : public MemoryModifyFunction { + public: + MemorySimulation(absl::Mutex& mu, PageId base, + absl::Span> bytes) + : mu_(mu), base_(base), bytes_(bytes) {} + + bool operator()(Range r) override ABSL_NO_THREAD_SAFETY_ANALYSIS { + // TODO(b/73749855): Simulate with unlocking. + mu_.AssertHeld(); + + size_t index = (r.p - base_).raw_num(); + for (size_t i = 0, n = r.n.raw_num(); i < n; ++i) { + bytes_[index + i].store(0, std::memory_order_release); + } + + return true; + } + + private: + absl::Mutex& mu_; + PageId base_; + absl::Span> bytes_; +}; + +TEST_F(HugeRegionTest, ReleaseFuzz) { + absl::Mutex mu; + absl::FixedArray> bytes( + region_.size().in_pages().raw_num()); + + MemorySimulation simulation(mu, p_.first_page(), absl::MakeSpan(bytes)); + + region_.~HugeRegion(); + new (®ion_) HugeRegion({p_, region_.size()}, simulation); + + const int kThreads = 10; + std::vector rngs(kThreads); + + absl::Mutex state_mu; + struct FuzzAlloc { + int tid; + Range r; + }; + std::vector allocs; + + ThreadManager threads; + threads.Start(kThreads, [&](int tid) { + switch (absl::Uniform(rngs[tid], 0, 4)) { + case 0: { + const size_t n = + absl::Uniform(rngs[tid], 1u, region_.size().in_pages().raw_num()); + + FuzzAlloc f; + f.tid = tid; + f.r.n = Length(n); + bool from_released; + { + absl::MutexLock l(&mu); + if (!region_.MaybeGet(f.r.n, &f.r.p, &from_released)) { + break; + } + } + + const size_t base = (f.r.p - p_.first_page()).raw_num(); + for (size_t i = 0; i < n; ++i) { + const int old_val = + bytes[base + i].exchange(tid, std::memory_order_acq_rel); + TC_CHECK_EQ(old_val, 0); + } + + { + absl::MutexLock l(&state_mu); + allocs.push_back(f); + } + break; + } + case 1: { + FuzzAlloc f; + + { + absl::MutexLock l(&state_mu); + if (allocs.empty()) { + break; + } + + const size_t index = absl::Uniform(rngs[tid], 0u, allocs.size()); + f = allocs[index]; + std::swap(allocs[index], allocs.back()); + allocs.resize(allocs.size() - 1); + } + + const size_t base = (f.r.p - p_.first_page()).raw_num(); + for (size_t i = 0; i < f.r.n.raw_num(); ++i) { + const int old_val = + bytes[base + i].exchange(0, std::memory_order_acq_rel); + TC_CHECK_EQ(old_val, f.tid); + } + + absl::MutexLock l(&mu); + region_.Put(f.r, false); + break; + } + case 2: { + absl::MutexLock l(&state_mu); + if (allocs.empty()) { + break; + } + + const size_t index = absl::Uniform(rngs[tid], 0u, allocs.size()); + FuzzAlloc f = allocs[index]; + + const size_t base = (f.r.p - p_.first_page()).raw_num(); + for (size_t i = 0; i < f.r.n.raw_num(); ++i) { + const int val = bytes[base + i].load(std::memory_order_acquire); + TC_CHECK_EQ(val, f.tid); + } + + break; + } + case 3: { + const Length to_release = Length( + absl::Uniform(rngs[tid], 0u, region_.size().in_pages().raw_num())); + + absl::MutexLock l(&mu); + region_.Release(to_release); + + break; + } + } + }); + + absl::SleepFor(absl::Seconds(1)); + + threads.Stop(); +} + TEST_F(HugeRegionTest, Stats) { const Length kLen = region_.size().in_pages(); const size_t kBytes = kLen.in_bytes(); struct Helper { - static void Stat(const Region ®ion, std::vector *small_backed, - std::vector *small_unbacked, LargeSpanStats *large, - BackingStats *stats, double *avg_age_backed, - double *avg_age_unbacked) { + static void Stat(const Region& region, std::vector* small_backed, + std::vector* small_unbacked, LargeSpanStats* large, + BackingStats* stats) { SmallSpanStats small; *large = LargeSpanStats(); - PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); - region.AddSpanStats(&small, large, &ages); + region.AddSpanStats(&small, large); small_backed->clear(); small_unbacked->clear(); for (auto i = Length(0); i < kMaxPages; ++i) { @@ -300,20 +483,14 @@ TEST_F(HugeRegionTest, Stats) { } *stats = region.stats(); - - *avg_age_backed = ages.GetTotalHistogram(false)->avg_age(); - *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age(); } }; LargeSpanStats large; std::vector small_backed, small_unbacked; BackingStats stats; - double avg_age_backed, avg_age_unbacked; - absl::SleepFor(absl::Milliseconds(10)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre()); EXPECT_THAT(small_unbacked, testing::ElementsAre()); EXPECT_EQ(1, large.spans); @@ -322,8 +499,6 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ(0, stats.free_bytes); EXPECT_EQ(kBytes, stats.unmapped_bytes); - EXPECT_LE(0.01, avg_age_unbacked); - EXPECT_EQ(0, avg_age_backed); // We don't, in production, use small allocations from the region, but // the API supports it, so test it here. @@ -335,9 +510,7 @@ TEST_F(HugeRegionTest, Stats) { Allocate(Length(1)); const Length slack = kPagesPerHugePage - Length(9); - absl::SleepFor(absl::Milliseconds(20)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre()); EXPECT_THAT(small_unbacked, testing::ElementsAre()); EXPECT_EQ(2, large.spans); @@ -346,13 +519,9 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ(slack.in_bytes(), stats.free_bytes); EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); - EXPECT_LE(0.02, avg_age_backed); - EXPECT_LE(0.03, avg_age_unbacked); Delete(a); - absl::SleepFor(absl::Milliseconds(30)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); EXPECT_EQ(2, large.spans); @@ -361,14 +530,9 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ((slack + Length(1)).in_bytes(), stats.free_bytes); EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); - EXPECT_LE((slack.raw_num() * 0.05 + 1 * 0.03) / (slack.raw_num() + 1), - avg_age_backed); - EXPECT_LE(0.06, avg_age_unbacked); Delete(b); - absl::SleepFor(absl::Milliseconds(40)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); EXPECT_EQ(2, large.spans); @@ -377,15 +541,9 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ((slack + Length(3)).in_bytes(), stats.free_bytes); EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); - EXPECT_LE( - (slack.raw_num() * 0.09 + 1 * 0.07 + 2 * 0.04) / (slack.raw_num() + 3), - avg_age_backed); - EXPECT_LE(0.10, avg_age_unbacked); Delete(c); - absl::SleepFor(absl::Milliseconds(50)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2), Length(3))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); @@ -395,15 +553,9 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ((slack + Length(6)).in_bytes(), stats.free_bytes); EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); - EXPECT_LE((slack.raw_num() * 0.14 + 1 * 0.12 + 2 * 0.09 + 3 * 0.05) / - (slack.raw_num() + 6), - avg_age_backed); - EXPECT_LE(0.15, avg_age_unbacked); Delete(barrier); - absl::SleepFor(absl::Milliseconds(60)); - Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, - &avg_age_backed, &avg_age_unbacked); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats); EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(6))); EXPECT_THAT(small_unbacked, testing::ElementsAre()); EXPECT_EQ(2, large.spans); @@ -412,11 +564,6 @@ TEST_F(HugeRegionTest, Stats) { EXPECT_EQ(kBytes, stats.system_bytes); EXPECT_EQ((slack + Length(7)).in_bytes(), stats.free_bytes); EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); - EXPECT_LE( - (slack.raw_num() * 0.20 + 1 * 0.18 + 2 * 0.15 + 3 * 0.11 + 1 * 0.06) / - (slack.raw_num() + 7), - avg_age_backed); - EXPECT_LE(0.21, avg_age_unbacked); } // Test that free regions are broken down properly when they cross @@ -429,11 +576,13 @@ TEST_F(HugeRegionTest, StatBreakdown) { Alloc d = Allocate(n - (n / 5) - Length(1)); // This unbacks the middle 2 hugepages, but not the beginning or // trailing region + ExpectUnback( + HugeRange::Make(HugePageContaining(b.p) + NHugePages(1), NHugePages(2))); DeleteUnback(b); Delete(c); SmallSpanStats small; LargeSpanStats large; - region_.AddSpanStats(&small, &large, nullptr); + region_.AddSpanStats(&small, &large); // Backed beginning of hugepage 0, unbacked range in middle of b, // long backed range from c, unbacked tail of allocation. EXPECT_EQ(4, large.spans); @@ -450,31 +599,439 @@ TEST_F(HugeRegionTest, StatBreakdown) { Delete(d); } -static void NilUnback(void *p, size_t bytes) {} +TEST_F(HugeRegionTest, StatBreakdownReleaseFailure) { + const Length n = kPagesPerHugePage; + Alloc a = Allocate(n / 4); + Alloc b = Allocate(n * 3 + n / 3); + Alloc c = Allocate((n - n / 3 - n / 4) + n * 5 + n / 5); + Alloc d = Allocate(n - (n / 5) - Length(1)); + // This tries to unback the middle 2 hugepages, but not the beginning or + // trailing region, but fails. + ExpectUnback( + HugeRange::Make(HugePageContaining(b.p) + NHugePages(1), NHugePages(2)), + /*success=*/false); + DeleteUnback(b); + Delete(c); + SmallSpanStats small; + LargeSpanStats large; + region_.AddSpanStats(&small, &large); + // Backed beginning of hugepage A/B/C/D and the unbacked tail of allocation. + EXPECT_EQ(2, large.spans); + // Tail end of A's page, all of B, all of C. + EXPECT_EQ((n - n / 4) + n * 8 + (n / 5), large.normal_pages); + // The above fill up 10 total pages. + EXPECT_EQ((Region::size().raw_num() - 10) * n, large.returned_pages); + EXPECT_EQ(1, small.normal_length[1]); -class HugeRegionSetTest : public testing::Test { + EXPECT_EQ(Length(1) + large.normal_pages + large.returned_pages + + region_.used_pages(), + Region::size().in_pages()); + Delete(a); + Delete(d); +} + +class NilUnback final : public MemoryModifyFunction { + public: + bool operator()(Range r) override { return true; } +}; + +class HugeRegionSetTest + : public ::testing::TestWithParam { protected: typedef HugeRegion Region; - HugeRegionSetTest() { next_ = HugePageContaining(nullptr); } + static int64_t FakeClock() { return clock_; } + static double GetFakeClockFrequency() { + return absl::ToDoubleNanoseconds(absl::Seconds(2)); + } + static void Advance(absl::Duration d) { + clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency(); + } + + HugeRegionSetTest() + : set_(/*use_huge_region_more_often=*/GetParam(), + Clock{.now = FakeClock, .freq = GetFakeClockFrequency}) { + next_ = HugePageContaining(nullptr); + } std::unique_ptr GetRegion() { // These regions are backed by "real" memory, but we don't touch it. - std::unique_ptr r(new Region({next_, Region::size()}, NilUnback)); + std::unique_ptr r(new Region({next_, Region::size()}, nil_unback_)); next_ += Region::size(); return r; } - HugeRegionSet set_; - HugePage next_; - struct Alloc { PageId p; Length n; }; + + Alloc Allocate(Length n) { + bool from_released; + return Allocate(n, &from_released); + } + + Alloc Allocate(Length n, bool* from_released) { + Alloc ret; + TC_CHECK(set_.MaybeGet(n, &ret.p, from_released)); + ret.n = n; + return ret; + } + + void Delete(Alloc a) { TC_CHECK(set_.MaybePut(Range(a.p, a.n))); } + + Length ReleasePagesByPeakDemand(Length desired, + SkipSubreleaseIntervals intervals = {}, + bool hit_limit = false) { + return set_.ReleasePagesByPeakDemand(desired, intervals, hit_limit); + } + + Length HardReleasePages(Length desired) { + return set_.ReleasePagesByPeakDemand(desired, SkipSubreleaseIntervals{}, + /*hit_limit=*/true); + } + + bool UseHugeRegionMoreOften() const { return set_.UseHugeRegionMoreOften(); } + + NilUnback nil_unback_; + HugeRegionSet set_; + HugePage next_; + + static int64_t clock_; }; -TEST_F(HugeRegionSetTest, Set) { +int64_t HugeRegionSetTest::clock_{1234}; + +TEST_P(HugeRegionSetTest, Release) { + absl::BitGen rng; + PageId p; + constexpr Length kSize = kPagesPerHugePage + Length(1); + bool from_released; + ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released)); + auto r1 = GetRegion(); + set_.Contribute(r1.get()); + + std::vector allocs; + + while (set_.MaybeGet(kSize, &p, &from_released)) { + allocs.push_back({p, kSize}); + } + BackingStats stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, 0); + + for (auto a : allocs) { + ASSERT_TRUE(set_.MaybePut(Range(a.p, a.n))); + } + + stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, + UseHugeRegionMoreOften() ? 0 : stats.system_bytes); + // All the huge pages in the region would be free, but backed, when + // huge-region-more-often feature is enabled. + EXPECT_EQ(r1->free_backed().raw_num(), + UseHugeRegionMoreOften() ? Region::size().raw_num() : 0); + Length released = set_.ReleasePages(/*release_fraction=*/1.0); + stats = set_.stats(); + EXPECT_EQ(released.in_bytes(), + UseHugeRegionMoreOften() ? stats.system_bytes : 0); + EXPECT_EQ(r1->free_backed().in_bytes(), 0); + EXPECT_EQ(stats.unmapped_bytes, stats.system_bytes); +} + +// Tests that HugeRegions releases all pages in free hugepages when +// skip-subrelease intervals are not set. +TEST_P(HugeRegionSetTest, ReleasePagesWithoutIntervals) { + absl::BitGen rng; + PageId p; + constexpr Length kSize = kPagesPerHugePage + Length(1); + bool from_released; + ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released)); + auto r1 = GetRegion(); + set_.Contribute(r1.get()); + + std::vector allocs; + + while (set_.MaybeGet(kSize, &p, &from_released)) { + allocs.push_back({p, kSize}); + } + BackingStats stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, 0); + + for (auto a : allocs) { + ASSERT_TRUE(set_.MaybePut(Range(a.p, a.n))); + } + + stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, + UseHugeRegionMoreOften() ? 0 : stats.system_bytes); + // All the huge pages in the region would be free, but backed, when + // huge-region-more-often feature is enabled. + EXPECT_EQ(r1->free_backed().raw_num(), + UseHugeRegionMoreOften() ? Region::size().raw_num() : 0); + Length released = ReleasePagesByPeakDemand(r1->free_backed().in_pages()); + stats = set_.stats(); + EXPECT_EQ(released.in_bytes(), + UseHugeRegionMoreOften() ? stats.system_bytes : 0); + EXPECT_EQ(r1->free_backed().in_bytes(), 0); + EXPECT_EQ(stats.unmapped_bytes, stats.system_bytes); +} + +// Tests that releasing zero pages is allowed and does not crash. +TEST_P(HugeRegionSetTest, ReleaseZero) { + // Trying to release no pages should not crash. + EXPECT_EQ(ReleasePagesByPeakDemand( + Length(0), + SkipSubreleaseIntervals{.short_interval = absl::Seconds(10), + .long_interval = absl::Seconds(600)}), + Length(0)); +} + +// Tests that HugeRegions releases fraction of hugepages when desired pages is +// set to zero. Because HugeRegion releases complete hugepages, for cases when +// malloc release rate is set to zero, this ensures that we still release a +// fraction of free hugepages. +TEST_P(HugeRegionSetTest, ReleaseZeroPages) { + absl::BitGen rng; + PageId p; + constexpr Length kSize = kPagesPerHugePage + Length(1); + bool from_released; + ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released)); + auto r1 = GetRegion(); + set_.Contribute(r1.get()); + + std::vector allocs; + + while (set_.MaybeGet(kSize, &p, &from_released)) { + allocs.push_back({p, kSize}); + } + BackingStats stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, 0); + + for (auto a : allocs) { + ASSERT_TRUE(set_.MaybePut(Range(a.p, a.n))); + } + + stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, + UseHugeRegionMoreOften() ? 0 : stats.system_bytes); + // All the huge pages in the region would be free, but backed, when + // huge-region-more-often feature is enabled. + EXPECT_EQ(r1->free_backed().raw_num(), + UseHugeRegionMoreOften() ? Region::size().raw_num() : 0); + HugeLength expected = + UseHugeRegionMoreOften() + ? HugeLength(kFractionToReleaseFromRegion * Region::size().raw_num()) + : NHugePages(0); + Length released = ReleasePagesByPeakDemand(Length(0)); + stats = set_.stats(); + EXPECT_EQ(released.in_bytes(), + UseHugeRegionMoreOften() ? expected.in_bytes() : 0); +} + +// Tests the number of pages that are released for different skip subrelease +// intervals. +TEST_P(HugeRegionSetTest, SkipSubrelease) { + // This test is sensitive to the number of pages per hugepage, as we are + // printing raw stats. + if (kPagesPerHugePage != Length(256)) { + GTEST_SKIP(); + } + + if (!UseHugeRegionMoreOften()) { + GTEST_SKIP() << "Skipping this test as background release is available " + "only when we use huge regions more often."; + } + + auto r1 = GetRegion(); + set_.Contribute(r1.get()); + // Firstly, this test generates a peak (long-term demand peak) and waits for + // time interval a. Then, it generates a higher peak plus a short-term + // fluctuation peak, and waits for time interval b. It then generates a trough + // in demand and tries to subrelease. Finally, it waits for time interval c to + // generate the highest peak for evaluating subrelease correctness. Skip + // subrelease selects those demand points using provided time intervals. + const auto demand_pattern = [&](absl::Duration a, absl::Duration b, + absl::Duration c, + SkipSubreleaseIntervals intervals, + bool release_phase_1, bool release_phase_2) { + const Length N = kPagesPerHugePage; + // First peak: min_demand 3/4N, max_demand 1N. + Alloc peak1a = Allocate(3 * N / 4); + Alloc peak1b = Allocate(N / 4); + Advance(a); + // Second peak: min_demand 0, max_demand 2N. + Delete(peak1a); + Delete(peak1b); + + Alloc half = Allocate(N / 2); + Alloc tiny1 = Allocate(N / 4); + Alloc tiny2 = Allocate(N / 4); + + // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is + // necessary after we delete `half` below, as a half huge page for the + // peak would fill into the gap previously occupied by it. + Alloc peak2a = Allocate(3 * N / 4); + Alloc peak2b = Allocate(N / 4); + // EXPECT_EQ(set_.used_pages(), 2 * N); + Delete(peak2a); + Delete(peak2b); + Advance(b); + Delete(half); + EXPECT_EQ(set_.free_backed(), NHugePages(1)); + // The number of released pages is limited to the number of free pages. + EXPECT_EQ(ReleasePagesByPeakDemand(10 * N, intervals), + release_phase_1 ? kPagesPerHugePage : Length(0)); + // + Advance(c); + // Third peak: min_demand 1/2N, max_demand (2+1/2)N. + Alloc peak3a = Allocate(3 * N / 4); + Alloc peak3b = Allocate(N / 4); + + Alloc peak4a = Allocate(3 * N / 4); + Alloc peak4b = Allocate(N / 4); + + Delete(tiny1); + Delete(tiny2); + Delete(peak3a); + Delete(peak3b); + Delete(peak4a); + Delete(peak4b); + + EXPECT_EQ(set_.free_backed(), NHugePages(3)); + EXPECT_EQ(ReleasePagesByPeakDemand(10 * N, intervals), + release_phase_2 ? NHugePages(3).in_pages() : Length(0)); + HardReleasePages(10 * N); + }; + + { + // Skip subrelease feature is disabled if all intervals are zero. + SCOPED_TRACE("demand_pattern 1"); + demand_pattern(absl::Minutes(1), absl::Minutes(1), absl::Minutes(4), + SkipSubreleaseIntervals{}, + /*release_phase_1=*/true, + /*release_phase_2=*/true); + } + + Advance(absl::Minutes(30)); + + { + // Uses short-term and long-term intervals for skipping subrelease. + SCOPED_TRACE("demand_pattern 2"); + demand_pattern(absl::Minutes(3), absl::Minutes(2), absl::Minutes(7), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3), + .long_interval = absl::Minutes(6)}, + /*release_phase_1=*/false, + /*release_phase_2=*/false); + } + + Advance(absl::Minutes(30)); + + { + // Uses short-term and long-term intervals for skipping subrelease, + // subreleasing all free pages. + SCOPED_TRACE("demand_pattern 3"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + /*release_phase_1=*/true, + /*release_phase_2=*/false); + } + Advance(absl::Minutes(30)); + + { + // Uses only short-term interval for skipping subrelease. + SCOPED_TRACE("demand_pattern 4"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(3)}, + /*release_phase_1=*/false, + /*release_phase_2=*/false); + } + + Advance(absl::Minutes(30)); + + { + // Uses only long-term interval for skipping subrelease, subreleased all + // free pages. + SCOPED_TRACE("demand_pattern 5"); + demand_pattern(absl::Minutes(4), absl::Minutes(2), absl::Minutes(3), + SkipSubreleaseIntervals{.long_interval = absl::Minutes(2)}, + /*release_phase_1=*/true, + /*release_phase_2=*/true); + } + + Advance(absl::Minutes(30)); + + // This captures a corner case: If we hit another peak immediately after a + // subrelease decision (in the same time series epoch), do not count this as + // a correct subrelease decision. + { + SCOPED_TRACE("demand_pattern 6"); + demand_pattern(absl::Milliseconds(10), absl::Milliseconds(10), + absl::Milliseconds(10), + SkipSubreleaseIntervals{.short_interval = absl::Minutes(1), + .long_interval = absl::Minutes(2)}, + /*release_phase_1=*/false, + /*release_phase_2=*/false); + } + + Advance(absl::Minutes(30)); + + // Ensure that the tracker is updated. + auto tiny = Allocate(Length(1)); + Delete(tiny); + + std::string buffer(1024 * 1024, '\0'); + { + Printer printer(&*buffer.begin(), buffer.size()); + set_.Print(printer); + } + buffer.resize(strlen(buffer.c_str())); + + EXPECT_THAT(buffer, testing::HasSubstr(R"( +HugeRegion: Since the start of the execution, 7 subreleases (3840 pages) were skipped due to the sum of short-term (60s) fluctuations and long-term (120s) trends. +HugeRegion: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending). +)")); +} + +// Tests that HugeRegion releases all free hugepages when hit_limit is set to +// true. +TEST_P(HugeRegionSetTest, HardRelease) { + absl::BitGen rng; + PageId p; + constexpr Length kSize = kPagesPerHugePage + Length(1); + bool from_released; + ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released)); + auto r1 = GetRegion(); + set_.Contribute(r1.get()); + + std::vector allocs; + + while (set_.MaybeGet(kSize, &p, &from_released)) { + allocs.push_back({p, kSize}); + } + BackingStats stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, 0); + + for (auto a : allocs) { + ASSERT_TRUE(set_.MaybePut(Range(a.p, a.n))); + } + + stats = set_.stats(); + EXPECT_EQ(stats.unmapped_bytes, + UseHugeRegionMoreOften() ? 0 : stats.system_bytes); + // All the huge pages in the region would be free, but backed, when + // huge-region-more-often feature is enabled. + EXPECT_EQ(r1->free_backed().raw_num(), + UseHugeRegionMoreOften() ? Region::size().raw_num() : 0); + Length released = HardReleasePages(r1->free_backed().in_pages()); + stats = set_.stats(); + EXPECT_EQ(released.in_bytes(), + UseHugeRegionMoreOften() ? stats.system_bytes : 0); + EXPECT_EQ(r1->free_backed().in_bytes(), 0); + EXPECT_EQ(stats.unmapped_bytes, stats.system_bytes); +} + +TEST_P(HugeRegionSetTest, Set) { absl::BitGen rng; PageId p; constexpr Length kSize = kPagesPerHugePage + Length(1); @@ -504,7 +1061,7 @@ TEST_F(HugeRegionSetTest, Set) { allocs.erase(allocs.begin() + allocs.size() / 2, allocs.end()); for (auto d : doomed) { - ASSERT_TRUE(set_.MaybePut(d.p, d.n)); + ASSERT_TRUE(set_.MaybePut(Range(d.p, d.n))); } for (size_t i = 0; i < 100 * 1000; ++i) { @@ -512,7 +1069,7 @@ TEST_F(HugeRegionSetTest, Set) { size_t index = absl::Uniform(rng, 0, N); std::swap(allocs[index], allocs[N - 1]); auto a = allocs.back(); - ASSERT_TRUE(set_.MaybePut(a.p, a.n)); + ASSERT_TRUE(set_.MaybePut(Range(a.p, a.n))); allocs.pop_back(); ASSERT_TRUE(set_.MaybeGet(kSize, &p, &from_released)); allocs.push_back({p, kSize}); @@ -521,16 +1078,15 @@ TEST_F(HugeRegionSetTest, Set) { // Random traffic should have defragmented our allocations into full // and empty regions, and released the empty ones. Annoyingly, we don't // know which region is which, so we have to do a bit of silliness: - std::vector regions = {r1.get(), r2.get(), r3.get(), r4.get()}; + std::vector regions = {r1.get(), r2.get(), r3.get(), r4.get()}; std::sort(regions.begin(), regions.end(), - [](const Region *a, const Region *b) -> bool { + [](const Region* a, const Region* b) -> bool { return a->used_pages() > b->used_pages(); }); for (int i = 0; i < regions.size(); i++) { - Log(kLog, __FILE__, __LINE__, i, regions[i]->used_pages().raw_num(), - regions[i]->free_pages().raw_num(), - regions[i]->unmapped_pages().raw_num()); + TC_LOG("i=%v used=%v free=%v unmapped=%v", i, regions[i]->used_pages(), + regions[i]->free_pages(), regions[i]->unmapped_pages()); } // Now first two should be "full" (ish) EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, @@ -538,10 +1094,21 @@ TEST_F(HugeRegionSetTest, Set) { EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, regions[1]->used_pages().raw_num()); // and last two "empty" (ish.) - EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, - regions[2]->unmapped_pages().raw_num()); - EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, - regions[3]->unmapped_pages().raw_num()); + if (UseHugeRegionMoreOften()) { + EXPECT_EQ(regions[2]->unmapped_pages().raw_num(), 0); + EXPECT_EQ(regions[3]->unmapped_pages().raw_num(), 0); + EXPECT_GT(regions[2]->free_backed().raw_num(), + Region::size().raw_num() * 0.9); + EXPECT_GT(regions[3]->free_backed().raw_num(), + Region::size().raw_num() * 0.9); + } else { + EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, + regions[2]->unmapped_pages().raw_num()); + EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, + regions[3]->unmapped_pages().raw_num()); + EXPECT_EQ(regions[2]->free_backed().raw_num(), 0); + EXPECT_EQ(regions[3]->free_backed().raw_num(), 0); + } // Check the stats line up. auto stats = set_.stats(); @@ -556,10 +1123,15 @@ TEST_F(HugeRegionSetTest, Set) { // Print out the stats for inspection of formats. std::vector buf(64 * 1024); Printer out(&buf[0], buf.size()); - set_.Print(&out); + set_.Print(out); printf("%s\n", &buf[0]); } +INSTANTIATE_TEST_SUITE_P( + All, HugeRegionSetTest, + testing::Values(HugeRegionUsageOption::kDefault, + HugeRegionUsageOption::kUseForAllLargeAllocs)); + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/BUILD b/contrib/libs/tcmalloc/tcmalloc/internal/BUILD new file mode 100644 index 000000000000..38a2b6f54952 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/BUILD @@ -0,0 +1,1122 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Internal libraries used for the implementation and testing of TCMalloc. + +load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library") +load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library") +load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") +load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark") + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) + +cc_library( + name = "affinity", + testonly = 1, + srcs = ["affinity.cc"], + hdrs = ["affinity.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":cpu_utils", + ":logging", + "@com_google_absl//absl/types:span", + ], +) + +cc_test( + name = "affinity_test", + srcs = ["affinity_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":affinity", + ":cpu_utils", + ":percpu", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "allocation_guard", + srcs = ["allocation_guard.cc"], + hdrs = ["allocation_guard.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "allocation_guard_test", + srcs = ["allocation_guard_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":allocation_guard", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "atomic_danger", + hdrs = ["atomic_danger.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [":config"], +) + +cc_library( + name = "atomic_stats_counter", + hdrs = ["atomic_stats_counter.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [":config"], +) + +cc_library( + name = "clock", + hdrs = ["clock.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [":config"], +) + +cc_library( + name = "config", + hdrs = ["config.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "config_test", + srcs = ["config_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":config", + ":util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + "@com_google_protobuf//:protobuf", + ], +) + +cc_library( + name = "declarations", + hdrs = ["declarations.h"], + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "environment", + srcs = ["environment.cc"], + hdrs = ["environment.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [":config"], +) + +cc_test( + name = "environment_test", + srcs = ["environment_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":environment", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "explicitly_constructed", + hdrs = ["explicitly_constructed.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:__subpackages__"], + deps = [ + ":config", + ], +) + +cc_library( + name = "exponential_biased", + hdrs = ["exponential_biased.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_test( + name = "exponential_biased_test", + srcs = ["exponential_biased_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":exponential_biased", + "//tcmalloc/testing:testutil", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "fake_profile", + hdrs = ["fake_profile.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "linked_list", + hdrs = ["linked_list.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + "@com_google_absl//absl/base:core_headers", + ], +) + +create_tcmalloc_benchmark( + name = "linked_list_benchmark", + srcs = ["linked_list_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":config", + ":linked_list", + ":logging", + ":mock_span", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + ], +) + +cc_test( + name = "linked_list_test", + size = "small", + srcs = ["linked_list_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":linked_list", + ":mock_span", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "linux_syscall_support", + hdrs = ["linux_syscall_support.h"], + copts = TCMALLOC_DEFAULT_COPTS, +) + +cc_library( + name = "logging", + srcs = ["logging.cc"], + hdrs = ["logging.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__pkg__", + "//tcmalloc:__subpackages__", + ], + deps = [ + ":allocation_guard", + ":config", + ":environment", + ":parameter_accessors", + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + ], +) + +cc_test( + name = "logging_test", + srcs = ["logging_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = [ + ], + deps = [ + ":logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", + "@com_google_googletest//:gtest_main", + ], +) + +cc_binary( + name = "logging_test_helper", + testonly = 1, + srcs = ["logging_test_helper.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", +) + +cc_library( + name = "memory_stats", + srcs = ["memory_stats.cc"], + hdrs = ["memory_stats.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":page_size", + ":util", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "memory_stats_test", + srcs = ["memory_stats_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":memory_stats", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "memory_tag", + srcs = ["memory_tag.cc"], + hdrs = ["memory_tag.h"], + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":optimization", + "@com_google_absl//absl/strings:string_view", + ], +) + +cc_library( + name = "mincore", + srcs = ["mincore.cc"], + hdrs = ["mincore.h"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":page_size", + ], +) + +cc_library( + name = "mismatched_delete_state", + hdrs = ["mismatched_delete_state.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "residency", + srcs = ["residency.cc"], + hdrs = ["residency.h"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":page_size", + ":range_tracker", + ":util", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "pageflags", + srcs = ["pageflags.cc"], + hdrs = ["pageflags.h"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__pkg__", + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":page_size", + ":util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/cleanup", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "pageflags_test", + srcs = ["pageflags_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + tags = [ + ], + deps = [ + ":allocation_guard", + ":pageflags", + ":util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_benchmark( + name = "mincore_benchmark", + srcs = ["mincore_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":config", + ":logging", + ":page_size", + "@com_github_google_benchmark//:benchmark", + ], +) + +cc_test( + name = "mincore_test", + srcs = ["mincore_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":mincore", + ":page_size", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "residency_test", + srcs = ["residency_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":allocation_guard", + ":config", + ":page_size", + ":range_tracker", + ":residency", + ":util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "mock_span", + testonly = 1, + hdrs = ["mock_span.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":linked_list", + ], +) + +cc_library( + name = "cache_topology", + srcs = ["cache_topology.cc"], + hdrs = ["cache_topology.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":cpu_utils", + ":logging", + ":sysinfo", + ":util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "cache_topology_test", + srcs = ["cache_topology_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":cache_topology", + ":sysinfo", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "numa", + srcs = ["numa.cc"], + hdrs = ["numa.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":cpu_utils", + ":environment", + ":logging", + ":percpu", + ":sysinfo", + ":util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:function_ref", + ], +) + +cc_test( + name = "numa_test", + srcs = ["numa_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":logging", + ":numa", + ":percpu", + ":sysinfo", + "//tcmalloc:want_numa_aware", + "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "optimization", + hdrs = ["optimization.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_library( + name = "overflow", + hdrs = ["overflow.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + "@com_google_absl//absl/base:config", + ], +) + +cc_library( + name = "page_size", + srcs = ["page_size.cc"], + hdrs = ["page_size.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_library( + name = "parameter_accessors", + hdrs = ["parameter_accessors.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "percpu", + srcs = [ + "percpu.cc", + "percpu_rseq_asm.S", + "percpu_rseq_unsupported.cc", + ], + hdrs = ["percpu.h"], + copts = TCMALLOC_DEFAULT_COPTS, + # See b/299621672 for details. + linkstatic = 1, + textual_hdrs = [ + "percpu_rseq_aarch64.S", + "percpu_rseq_x86_64.S", + ], + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":cpu_utils", + ":linux_syscall_support", + ":logging", + ":optimization", + ":sysinfo", + ":util", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "percpu_test", + srcs = ["percpu_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":logging", + ":percpu", + "//tcmalloc/testing:testutil", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "percpu_early_test", + srcs = ["percpu_early_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + tags = [ + ], + deps = [ + ":percpu", + "@com_google_absl//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "percpu_tcmalloc", + hdrs = ["percpu_tcmalloc.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":linux_syscall_support", + ":logging", + ":mincore", + ":optimization", + ":percpu", + ":prefetch", + ":sysinfo", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/numeric:bits", + ], +) + +cc_test( + name = "percpu_tcmalloc_test", + timeout = "long", + srcs = ["percpu_tcmalloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = ":system_malloc", + tags = ["noubsan"], + deps = [ + ":affinity", + ":config", + ":cpu_utils", + ":logging", + ":page_size", + ":percpu", + ":percpu_tcmalloc", + ":sysinfo", + ":util", + "//tcmalloc:malloc_extension", + "//tcmalloc/testing:testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:seed_sequences", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "prefetch", + hdrs = ["prefetch.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [":config"], +) + +cc_test( + name = "prefetch_test", + srcs = ["prefetch_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":prefetch", + "@com_google_googletest//:gtest_main", + ], +) + +proto_library( + name = "profile_proto", + srcs = ["profile.proto"], +) + +cc_proto_library( + name = "profile_cc_proto", + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [":profile_proto"], +) + +cc_library( + name = "profile_builder", + srcs = ["profile_builder.cc"], + hdrs = ["profile_builder.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + ":pageflags", + ":residency", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:profile_cc_proto", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + ], +) + +cc_test( + name = "profile_builder_fuzz", + srcs = ["profile_builder_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = glob(["testdata/profile_builder_fuzz/*"]), + deps = [ + ":profile_builder", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "profile_builder_test", + srcs = ["profile_builder_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + data = [ + "testdata/b180635896.so", + "testdata/gnu-property.so", + ], + deps = [ + ":environment", + ":fake_profile", + ":page_size", + ":pageflags", + ":profile_builder", + ":residency", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:profile_cc_proto", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/meta:type_traits", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "profile_builder_no_tcmalloc_test", + srcs = ["profile_builder_no_tcmalloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = ":system_malloc", + tags = [ + "noubsan", + ], + deps = [ + ":profile_builder", + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:status_matchers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "proc_maps", + srcs = ["proc_maps.cc"], + hdrs = ["proc_maps.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":util", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "range_tracker", + hdrs = ["range_tracker.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + ":optimization", + "@com_google_absl//absl/numeric:bits", + ], +) + +create_tcmalloc_benchmark( + name = "range_tracker_benchmark", + srcs = ["range_tracker_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":config", + ":range_tracker", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + ], +) + +cc_test( + name = "range_tracker_test", + srcs = ["range_tracker_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":range_tracker", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "sampled_allocation", + hdrs = ["sampled_allocation.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:__subpackages__"], + deps = [ + ":logging", + ":sampled_allocation_recorder", + ], +) + +cc_test( + name = "sampled_allocation_test", + srcs = ["sampled_allocation_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":logging", + ":sampled_allocation", + "@com_google_absl//absl/base", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "sampled_allocation_recorder", + hdrs = ["sampled_allocation_recorder.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:__subpackages__"], + deps = [ + ":allocation_guard", + ":config", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:function_ref", + ], +) + +cc_test( + name = "sampled_allocation_recorder_test", + srcs = ["sampled_allocation_recorder_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":explicitly_constructed", + ":sampled_allocation_recorder", + "//tcmalloc/testing:thread_manager", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "stacktrace_filter", + hdrs = ["stacktrace_filter.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/types:span", + ], +) + +cc_test( + name = "stacktrace_filter_test", + timeout = "long", + srcs = ["stacktrace_filter_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":stacktrace_filter", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "sysinfo", + srcs = ["sysinfo.cc"], + hdrs = ["sysinfo.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__pkg__", + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":cpu_utils", + ":logging", + ":util", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "sysinfo_fuzz", + srcs = ["sysinfo_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":cpu_utils", + ":sysinfo", + "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "sysinfo_test", + srcs = ["sysinfo_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":allocation_guard", + ":cpu_utils", + ":sysinfo", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + +# An empty rule to force libc malloc instead of TCMalloc. +cc_library( + name = "system_malloc", + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "timeseries_tracker", + hdrs = ["timeseries_tracker.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":clock", + ":logging", + "@com_google_absl//absl/base", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/numeric:int128", + "@com_google_absl//absl/time", + ], +) + +cc_test( + name = "timeseries_tracker_test", + srcs = ["timeseries_tracker_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":clock", + ":timeseries_tracker", + "@com_google_absl//absl/base", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__pkg__", + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + ":logging", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "cpu_utils", + hdrs = ["cpu_utils.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":config", + "@com_google_absl//absl/base:core_headers", + ], +) diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/affinity.cc b/contrib/libs/tcmalloc/tcmalloc/internal/affinity.cc new file mode 100644 index 000000000000..78f60423e9df --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/affinity.cc @@ -0,0 +1,99 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "tcmalloc/internal/affinity.h" + +#include +#include +#include + +#include + +#include "absl/types/span.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/logging.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +std::vector AllowedCpus() { + // We have no need for dynamically sized sets (currently >1024 CPUs for glibc) + // at the present time. We could change this in the future. + CpuSet allowed_cpus; + TC_CHECK(allowed_cpus.GetAffinity(0)); + int n = allowed_cpus.Count(), c = 0; + + std::vector result(n); + for (int i = 0; i < kMaxCpus && n; i++) { + if (allowed_cpus.IsSet(i)) { + result[c++] = i; + n--; + } + } + TC_CHECK_EQ(0, n); + + return result; +} + +static CpuSet SpanToCpuSetT(absl::Span mask) { + CpuSet result; + result.Zero(); + for (int cpu : mask) { + result.Set(cpu); + } + return result; +} + +ScopedAffinityMask::ScopedAffinityMask(absl::Span allowed_cpus) { + specified_cpus_ = SpanToCpuSetT(allowed_cpus); + // getaffinity should never fail. + TC_CHECK(original_cpus_.GetAffinity(0)); + // See destructor comments on setaffinity interactions. Tampered() will + // necessarily be true in this case. + TC_CHECK(specified_cpus_.SetAffinity(0)); +} + +ScopedAffinityMask::ScopedAffinityMask(int allowed_cpu) { + specified_cpus_.Zero(); + specified_cpus_.Set(allowed_cpu); + + // getaffinity should never fail. + TC_CHECK(original_cpus_.GetAffinity(0)); + // See destructor comments on setaffinity interactions. Tampered() will + // necessarily be true in this case. + TC_CHECK(specified_cpus_.SetAffinity(0)); +} + +ScopedAffinityMask::~ScopedAffinityMask() { + // If something else has already reset our affinity, do not attempt to + // restrict towards our original mask. This is best-effort as the tampering + // may obviously occur during the destruction of *this. + if (!Tampered()) { + // Note: We do not assert success here, conflicts may restrict us from all + // 'original_cpus_'. + (void)original_cpus_.SetAffinity(0); + } +} + +bool ScopedAffinityMask::Tampered() { + CpuSet current_cpus; + TC_CHECK(current_cpus.GetAffinity(0)); + return !CPU_EQUAL_S(kCpuSetBytes, ¤t_cpus, + &specified_cpus_); // Mismatch => modified. +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/affinity.h b/contrib/libs/tcmalloc/tcmalloc/internal/affinity.h new file mode 100644 index 000000000000..aa9cf38431d1 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/affinity.h @@ -0,0 +1,71 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_AFFINITY_H_ +#define TCMALLOC_INTERNAL_AFFINITY_H_ + +#include + +#include + +#include "absl/types/span.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Affinity helpers. + +// Returns a vector of the which cpus the currently allowed thread is allowed to +// run on. There are no guarantees that this will not change before, after, or +// even during, the call to AllowedCpus(). +std::vector AllowedCpus(); + +// Enacts a scoped affinity mask on the constructing thread. Attempts to +// restore the original affinity mask on destruction. +// +// REQUIRES: For test-use only. Do not use this in production code. +class ScopedAffinityMask { + public: + // When racing with an external restriction that has a zero-intersection with + // "allowed_cpus" we will construct, but immediately register as "Tampered()", + // without actual changes to affinity. + explicit ScopedAffinityMask(absl::Span allowed_cpus); + explicit ScopedAffinityMask(int allowed_cpu); + + // Restores original affinity iff our scoped affinity has not been externally + // modified (i.e. Tampered()). Otherwise, the updated affinity is preserved. + ~ScopedAffinityMask(); + + // Returns true if the affinity mask no longer matches what was set at point + // of construction. + // + // Note: This is instantaneous and not fool-proof. It's possible for an + // external affinity modification to subsequently align with our originally + // specified "allowed_cpus". In this case Tampered() will return false when + // time may have been spent executing previously on non-specified cpus. + bool Tampered(); + + private: + CpuSet original_cpus_, specified_cpus_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_AFFINITY_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/affinity_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/affinity_test.cc new file mode 100644 index 000000000000..6fd2918f8ce3 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/affinity_test.cc @@ -0,0 +1,102 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/affinity.h" + +#include +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/types/span.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/percpu.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +static bool AffinityMatches(std::vector expected_affinity) { + CpuSet allowed_cpus; + EXPECT_TRUE(allowed_cpus.GetAffinity(0)) << errno; + for (int cpu : expected_affinity) { + if (!allowed_cpus.IsSet(cpu)) return false; + + allowed_cpus.CLR(cpu); + } + + // All cpus should now be accounted for. + return allowed_cpus.Count() == 0; +} + +TEST(AffinityInternalTest, AllowedCpus) { + ASSERT_THAT(AllowedCpus(), testing::Contains(subtle::percpu::GetRealCpu())); + ASSERT_TRUE(AffinityMatches(AllowedCpus())); +} + +TEST(AffinityInternalTest, ScopedAffinityTamper) { + // It would be convenient to use a ScopedAffinityMask here also, however, the + // tamper logic disables the destructor (this is intentional so as to leave us + // with the most consistent masks). + CpuSet original_cpus; +restart: + EXPECT_TRUE(original_cpus.GetAffinity(0)) << errno; + + // We require at least 2 cpus to run this test. + if (original_cpus.Count() == 1) return; + + for (int i = 0; i < kMaxCpus; i++) { + if (original_cpus.IsSet(i)) { + ScopedAffinityMask mask(i); + + // Progressing past this point _requires_ a successful false return. + if (mask.Tampered()) goto restart; + + EXPECT_FALSE(mask.Tampered()); + // Manually tamper. Note that the only way this can fail (external + // restriction away from "i", must in itself trigger tampering. + ASSERT_TRUE(original_cpus.SetAffinity(0)); + ASSERT_TRUE(mask.Tampered()); + break; + } + } + // We already restored original_cpus above. +} + +TEST(AffinityInternalTest, ScopedAffinityMask) { + auto original_cpus = AllowedCpus(); + +restart: + std::vector original_affinity = AllowedCpus(), temporary_affinity; + + for (int i = 0; i < original_affinity.size(); i++) { + if (AllowedCpus() != original_affinity) goto restart; + + temporary_affinity.push_back(original_affinity[i]); + ScopedAffinityMask mask(absl::MakeSpan(temporary_affinity)); + ASSERT_TRUE(AllowedCpus() == temporary_affinity || mask.Tampered()); + + if (mask.Tampered()) { + goto restart; + } + } + + EXPECT_EQ(original_affinity, AllowedCpus()); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.cc b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.cc new file mode 100644 index 000000000000..c61bd6c06378 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.cc @@ -0,0 +1,24 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/allocation_guard.h" + +#include "absl/base/attributes.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.h b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.h new file mode 100644 index 000000000000..ac8675602cbf --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard.h @@ -0,0 +1,57 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_ALLOCATION_GUARD_H_ +#define TCMALLOC_INTERNAL_ALLOCATION_GUARD_H_ + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +// TODO(b/143069684): actually ensure no allocations in debug mode here. +struct AllocationGuard { + AllocationGuard() {} +}; + +// A SpinLockHolder that also enforces no allocations while the lock is held in +// debug mode. +class ABSL_SCOPED_LOCKABLE AllocationGuardSpinLockHolder { + public: + explicit AllocationGuardSpinLockHolder(absl::base_internal::SpinLock* l) + ABSL_EXCLUSIVE_LOCK_FUNCTION(l) + : lock_holder_(l) { +#ifndef NDEBUG + if (l->IsCooperative()) { + abort(); + } +#endif // NDEBUG + } + + inline ~AllocationGuardSpinLockHolder() ABSL_UNLOCK_FUNCTION() = default; + + private: + absl::base_internal::SpinLockHolder lock_holder_; + // In debug mode, enforces no allocations. + AllocationGuard enforce_no_alloc_; +}; + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_ALLOCATION_GUARD_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard_test.cc new file mode 100644 index 000000000000..c1faa12e4063 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/allocation_guard_test.cc @@ -0,0 +1,40 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/allocation_guard.h" + +#include + +#include "gtest/gtest.h" +#include "absl/base/const_init.h" +#include "absl/base/internal/spinlock.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +TEST(AllocationGuard, Noncooperative) { + absl::base_internal::SpinLock lock(absl::kConstInit, + absl::base_internal::SCHEDULE_KERNEL_ONLY); + AllocationGuardSpinLockHolder h(&lock); +} + +TEST(AllocationGuard, CooperativeDeathTest) { + absl::base_internal::SpinLock lock; + + EXPECT_DEBUG_DEATH( + { AllocationGuardSpinLockHolder h(&lock); }, "SIGABRT received"); +} + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h index 49c95d66cb5b..bfbfc9f3d6c1 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +20,7 @@ #include #include +#include #include "tcmalloc/internal/config.h" diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h index da7f30646d5e..d155cc753d51 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,8 +17,8 @@ #define TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_ #include +#include -#include "absl/base/macros.h" #include "tcmalloc/internal/config.h" GOOGLE_MALLOC_SECTION_BEGIN diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits.h b/contrib/libs/tcmalloc/tcmalloc/internal/bits.h deleted file mode 100644 index 80ca17085c0c..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/bits.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_BITS_H_ -#define TCMALLOC_INTERNAL_BITS_H_ - -#include -#include - -#include "tcmalloc/internal/logging.h" - -namespace tcmalloc { -namespace tcmalloc_internal { - -class Bits { - public: - // Returns true if a value is zero or a power of two. - template - static constexpr - typename std::enable_if::value, bool>::type - IsZeroOrPow2(T n) { - return (n & (n - 1)) == 0; - } - - // Returns true if a value is a power of two. - template - static constexpr - typename std::enable_if::value, bool>::type - IsPow2(T n) { - return n != 0 && (n & (n - 1)) == 0; - } - - template - static constexpr typename std::enable_if::value, T>::type - Log2Floor(T n) { - if (n == 0) { - return -1; - } - - if (sizeof(T) <= sizeof(unsigned int)) { - return std::numeric_limits::digits - 1 - __builtin_clz(n); - } else if (sizeof(T) <= sizeof(unsigned long)) { - return std::numeric_limits::digits - 1 - __builtin_clzl(n); - } else { - static_assert(sizeof(T) <= sizeof(unsigned long long)); - return std::numeric_limits::digits - 1 - __builtin_clzll(n); - } - } - - template - static constexpr typename std::enable_if::value, T>::type - Log2Ceiling(T n) { - T floor = Log2Floor(n); - if (IsZeroOrPow2(n)) - return floor; - else - return floor + 1; - } - - template - static constexpr typename std::enable_if::value, T>::type - RoundUpToPow2(T n) { - if (n == 0) return 1; - return T{1} << Log2Ceiling(n); - } -}; - -} // namespace tcmalloc_internal -} // namespace tcmalloc - -#endif // TCMALLOC_INTERNAL_BITS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc deleted file mode 100644 index 0589b314d2c8..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/bits.h" - -#include -#include -#include - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "absl/random/random.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -TEST(BitsTest, Log2EdgeCases) { - EXPECT_EQ(-1, Bits::Log2Floor(0u)); - EXPECT_EQ(-1, Bits::Log2Ceiling(0u)); - - for (int i = 0; i < 32; i++) { - uint32_t n = 1U << i; - EXPECT_EQ(i, Bits::Log2Floor(n)); - EXPECT_EQ(i, Bits::Log2Ceiling(n)); - if (n > 2) { - EXPECT_EQ(i - 1, Bits::Log2Floor(n - 1)); - EXPECT_EQ(i, Bits::Log2Floor(n + 1)); - EXPECT_EQ(i, Bits::Log2Ceiling(n - 1)); - EXPECT_EQ(i + 1, Bits::Log2Ceiling(n + 1)); - } - } - - EXPECT_EQ(Bits::Log2Ceiling(uint64_t{0x40000000000}), 42); - EXPECT_EQ(Bits::Log2Floor(uint64_t{0x40000000000}), 42); -} - -TEST(BitsTest, Log2Random) { - absl::BitGen random; - - const int kNumIterations = 10000; - for (int i = 0; i < kNumIterations; i++) { - int maxbit = -1; - uint32_t n = 0; - while (!absl::Bernoulli(random, 1.0 / 32)) { - int bit = absl::Uniform(random, 0, 32); - n |= (1U << bit); - maxbit = std::max(bit, maxbit); - } - EXPECT_EQ(maxbit, Bits::Log2Floor(n)); - } -} - -TEST(BitsTest, IsZeroOrPow2) { - EXPECT_TRUE(Bits::IsZeroOrPow2(0u)); - EXPECT_TRUE(Bits::IsZeroOrPow2(1u)); - EXPECT_TRUE(Bits::IsZeroOrPow2(2u)); - EXPECT_FALSE(Bits::IsZeroOrPow2(3u)); - EXPECT_TRUE(Bits::IsZeroOrPow2(4u)); - EXPECT_FALSE(Bits::IsZeroOrPow2(1337u)); - EXPECT_TRUE(Bits::IsZeroOrPow2(65536u)); - EXPECT_FALSE(Bits::IsZeroOrPow2(std::numeric_limits::max())); - EXPECT_TRUE(Bits::IsZeroOrPow2(uint32_t{1} << 31)); -} - -TEST(BitsTest, IsPow2) { - EXPECT_FALSE(Bits::IsPow2(0u)); - EXPECT_TRUE(Bits::IsPow2(1u)); - EXPECT_TRUE(Bits::IsPow2(2u)); - EXPECT_FALSE(Bits::IsPow2(3u)); - EXPECT_TRUE(Bits::IsPow2(4u)); - EXPECT_FALSE(Bits::IsPow2(1337u)); - EXPECT_TRUE(Bits::IsPow2(65536u)); - EXPECT_FALSE(Bits::IsPow2(std::numeric_limits::max())); - EXPECT_TRUE(Bits::IsPow2(uint32_t{1} << 31)); -} - -TEST(BitsTest, RoundUpToPow2) { - EXPECT_EQ(Bits::RoundUpToPow2(0u), 1); - EXPECT_EQ(Bits::RoundUpToPow2(1u), 1); - EXPECT_EQ(Bits::RoundUpToPow2(2u), 2); - EXPECT_EQ(Bits::RoundUpToPow2(3u), 4); - EXPECT_EQ(Bits::RoundUpToPow2(4u), 4); - EXPECT_EQ(Bits::RoundUpToPow2(1337u), 2048); - EXPECT_EQ(Bits::RoundUpToPow2(65536u), 65536); - EXPECT_EQ(Bits::RoundUpToPow2(65536u - 1337u), 65536); - EXPECT_EQ(Bits::RoundUpToPow2(uint64_t{0x40000000000}), - uint64_t{0x40000000000}); -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc index 12a1709b34fb..d86ef754b34f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc @@ -17,10 +17,14 @@ #include #include -#include "absl/strings/numbers.h" -#include "absl/strings/string_view.h" +#include +#include +#include + #include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/sysinfo.h" #include "tcmalloc/internal/util.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -36,51 +40,71 @@ int OpenSysfsCacheList(size_t cpu) { } } // namespace -int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current) { - // Remove all parts coming after a dash or comma. - const size_t dash = current.find('-'); - if (dash != absl::string_view::npos) current = current.substr(0, dash); - const size_t comma = current.find(','); - if (comma != absl::string_view::npos) current = current.substr(0, comma); +void CacheTopology::Init() { + const auto maybe_numcpus = NumCPUsMaybe(); + if (!maybe_numcpus.has_value()) { + l3_count_ = 1; + return; + } - int first_cpu; - CHECK_CONDITION(absl::SimpleAtoi(current, &first_cpu)); - CHECK_CONDITION(first_cpu < CPU_SETSIZE); - return first_cpu; -} + cpu_count_ = *maybe_numcpus; + CpuSet cpus_to_check; + cpus_to_check.Zero(); + for (int cpu = 0; cpu < cpu_count_; ++cpu) { + cpus_to_check.Set(cpu); + } -int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]) { - int index = 0; - // Set to a sane value. - memset(l3_cache_index, 0, CPU_SETSIZE); - for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) { + while (true) { + const int cpu = cpus_to_check.FindFirstSet(); + if (cpu == -1) { + break; + } const int fd = OpenSysfsCacheList(cpu); if (fd == -1) { // At some point we reach the number of CPU on the system, and // we should exit. We verify that there was no other problem. - CHECK_CONDITION(errno == ENOENT); - return index; + TC_CHECK_EQ(errno, ENOENT); + // For aarch64 if + // /sys/devices/system/cpu/cpu*/cache/index3/shared_cpu_list is missing + // then L3 is assumed to be shared by all CPUs. + // TODO(b/210049384): find a better replacement for shared_cpu_list in + // this case, e.g. based on numa nodes. +#ifdef __aarch64__ + if (l3_count_ == 0) { + l3_count_ = 1; + } +#endif + return; } // The file contains something like: // 0-11,22-33 - // we are looking for the first number in that file. - char buf[10]; - const size_t bytes_read = - signal_safe_read(fd, buf, 10, /*bytes_read=*/nullptr); + // Extract all CPUs from that. + + std::optional maybe_shared_cpu_list = + ParseCpulist([&](char* const buf, const size_t count) { + return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr); + }); signal_safe_close(fd); - CHECK_CONDITION(bytes_read >= 0); - const int first_cpu = - BuildCpuToL3CacheMap_FindFirstNumberInBuf({buf, bytes_read}); - CHECK_CONDITION(first_cpu < CPU_SETSIZE); - CHECK_CONDITION(first_cpu <= cpu); - if (cpu == first_cpu) { - l3_cache_index[cpu] = index++; - } else { - l3_cache_index[cpu] = l3_cache_index[first_cpu]; + TC_CHECK(maybe_shared_cpu_list.has_value()); + CpuSet& shared_cpu_list = *maybe_shared_cpu_list; + shared_cpu_list.CLR(cpu); + cpus_to_check.CLR(cpu); + + const int first_cpu = cpu; + l3_cache_index_[first_cpu] = l3_count_++; + // Set the remaining in the parsed cpu set to the l3_cache_index of + // the first one. + while (true) { + int next_cpu = shared_cpu_list.FindFirstSet(); + if (next_cpu == -1) { + break; + } + shared_cpu_list.CLR(next_cpu); + cpus_to_check.CLR(next_cpu); + l3_cache_index_[next_cpu] = l3_cache_index_[first_cpu]; } } - return index; } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h index 292f17547030..83c0f5f7046f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2021 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,19 +16,44 @@ #ifndef TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ #define TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ +#include + +#include "absl/base/attributes.h" +#include "absl/strings/string_view.h" #include "tcmalloc/internal/config.h" -#include "tcmalloc/internal/util.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/logging.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -// Build a mapping from cpuid to the index of the L3 cache used by that cpu. -// Returns the number of caches detected. -int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]); +class CacheTopology { + public: + static CacheTopology& Instance() { + ABSL_CONST_INIT static CacheTopology instance; + return instance; + } + + constexpr CacheTopology() = default; + + void Init(); + + unsigned l3_count() const { return l3_count_; } + + unsigned GetL3FromCpuId(int cpu) const { + TC_ASSERT_GE(cpu, 0); + TC_ASSERT_LT(cpu, cpu_count_); + unsigned l3 = l3_cache_index_[cpu]; + TC_ASSERT_LT(l3, l3_count_); + return l3; + } -// Helper function exposed to permit testing it. -int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current); + private: + unsigned cpu_count_ = 0; + unsigned l3_count_ = 0; + uint8_t l3_cache_index_[kMaxCpus] = {}; +}; } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc index 927ecace94e4..df795abb86b2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc @@ -14,38 +14,29 @@ #include "tcmalloc/internal/cache_topology.h" -#include - -#include "gmock/gmock.h" #include "gtest/gtest.h" +#include "tcmalloc/internal/sysinfo.h" +namespace tcmalloc::tcmalloc_internal { namespace { -TEST(CacheToplogy, ComputesSomethingReasonable) { +TEST(CacheTopology, ComputesSomethingReasonable) { // This test verifies that each L3 cache serves the same number of CPU. This // is not a strict requirement for the correct operation of this code, but a // sign of sanity. - uint8_t l3_cache_index[CPU_SETSIZE]; - const int num_nodes = - tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap(l3_cache_index); - EXPECT_EQ(absl::base_internal::NumCPUs() % num_nodes, 0); - ASSERT_GT(num_nodes, 0); + CacheTopology topology; + topology.Init(); + EXPECT_EQ(NumCPUs() % topology.l3_count(), 0); + ASSERT_GT(topology.l3_count(), 0); static const int kMaxNodes = 256 / 8; int count_per_node[kMaxNodes] = {0}; - for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) { - count_per_node[l3_cache_index[i]]++; + for (int i = 0, n = NumCPUs(); i < n; ++i) { + count_per_node[topology.GetL3FromCpuId(i)]++; } - for (int i = 0; i < num_nodes; ++i) { - EXPECT_EQ(count_per_node[i], absl::base_internal::NumCPUs() / num_nodes); + for (int i = 0; i < topology.l3_count(); ++i) { + EXPECT_EQ(count_per_node[i], NumCPUs() / topology.l3_count()); } } -TEST(CacheTopology, FindFirstNumberInBuf) { - using tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap_FindFirstNumberInBuf; - EXPECT_EQ(7, BuildCpuToL3CacheMap_FindFirstNumberInBuf("7,-787")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5-9")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5,9")); -} - } // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h index 65c765203c90..d145040f6f3f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2021 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/config.h b/contrib/libs/tcmalloc/tcmalloc/internal/config.h index 73dbab06aa83..55e4cd81eea8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/config.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/config.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +18,8 @@ #include -#include "absl/base/policy_checks.h" +#include "absl/base/attributes.h" +#include "absl/base/config.h" // TCMALLOC_HAVE_SCHED_GETCPU is defined when the system implements // sched_getcpu(3) as by glibc and it's imitators. @@ -27,23 +29,34 @@ #undef TCMALLOC_HAVE_SCHED_GETCPU #endif +#if defined(__GLIBC__) && defined(__GLIBC_MINOR__) +#define TCMALLOC_GLIBC_PREREQ(major, minor) \ + ((__GLIBC__ * 100 + __GLIBC_MINOR__) >= ((major) * 100 + (minor))) +#else +#define TCMALLOC_GLIBC_PREREQ(major, minor) 0 +#endif + // TCMALLOC_HAVE_STRUCT_MALLINFO is defined when we know that the system has // `struct mallinfo` available. // -// The FreeBSD libc, and subsequently macOS, does not provide the `mallopt` -// interfaces. We know that bionic, glibc (and variants), newlib, and uclibc do +// We know that bionic, glibc (and variants), newlib, and uclibc do // provide the `mallopt` interface. The musl libc is known to not provide the // interface, nor does it provide a macro for checking. As a result, we // conservatively state that `struct mallinfo` is only available on these // environments. -#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) && \ - (defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ - defined(__UCLIBC__)) +#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ + defined(__UCLIBC__) #define TCMALLOC_HAVE_STRUCT_MALLINFO 1 #else #undef TCMALLOC_HAVE_STRUCT_MALLINFO #endif +#if TCMALLOC_GLIBC_PREREQ(2, 33) +#define TCMALLOC_HAVE_STRUCT_MALLINFO2 1 +#else +#undef TCMALLOC_HAVE_STRUCT_MALLINFO2 +#endif + // When possible, name the text section as google_malloc. This macro should not // be added to header files as that may move unrelated code to google_malloc // section. @@ -51,9 +64,20 @@ #define GOOGLE_MALLOC_SECTION_BEGIN \ _Pragma("clang section text = \"google_malloc\"") #define GOOGLE_MALLOC_SECTION_END _Pragma("clang section text = \"\"") +// For inline lambdas, which aren't covered +#define GOOGLE_MALLOC_SECTION __attribute__((section("google_malloc"))) #else #define GOOGLE_MALLOC_SECTION_BEGIN #define GOOGLE_MALLOC_SECTION_END +#define GOOGLE_MALLOC_SECTION +#endif + +// TCMALLOC_ATTRIBUTE_NO_DESTROY is defined when clang::no_destroy attribute is +// present. +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::no_destroy) +#define TCMALLOC_ATTRIBUTE_NO_DESTROY [[clang::no_destroy]] +#else +#define TCMALLOC_ATTRIBUTE_NO_DESTROY #endif #if defined(__GNUC__) && !defined(__clang__) @@ -68,11 +92,19 @@ #endif #endif -#if !defined(__x86_64__) && !defined(__ppc64__) && !defined(__arm__) && \ - !defined(__aarch64__) && !defined(__riscv) +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__ppc64__) && \ + !defined(__arm__) && !defined(__aarch64__) && !defined(__riscv) #error "Unsupported architecture." #endif +#ifndef ABSL_IS_LITTLE_ENDIAN +#error "TCMalloc only supports little endian architectures" +#endif + +#ifndef __linux__ +#error "TCMalloc is only supported on Linux." +#endif + #if !defined(__cplusplus) || __cplusplus < 201703L #error "TCMalloc requires C++17 or later." #else @@ -86,32 +118,44 @@ namespace tcmalloc { namespace tcmalloc_internal { #if defined __x86_64__ -// All current and planned x86_64 processors only look at the lower 48 bits -// in virtual to physical address translation. The top 16 are thus unused. +// x86_64 processors use lower 48 bits in virtual to physical address +// translation with 4-level page tables. The top 16 are thus unused. +// We don't support 5-level page tables yet. // TODO(b/134686025): Under what operating systems can we increase it safely to // 17? This lets us use smaller page maps. On first allocation, a 36-bit page // map uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. -inline constexpr int kAddressBits = - (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +inline constexpr int kAddressBits = 48; #elif defined __powerpc64__ && defined __linux__ // Linux(4.12 and above) on powerpc64 supports 128TB user virtual address space -// by default, and up to 512TB if user space opts in by specifing hint in mmap. +// by default, and up to 512TB if user space opts in by specifying hint in mmap. // See comments in arch/powerpc/include/asm/processor.h // and arch/powerpc/mm/mmap.c. -inline constexpr int kAddressBits = - (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 49); +inline constexpr int kAddressBits = 49; #elif defined __aarch64__ && defined __linux__ // According to Documentation/arm64/memory.txt of kernel 3.16, // AARCH64 kernel supports 48-bit virtual addresses for both user and kernel. -inline constexpr int kAddressBits = - (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +inline constexpr int kAddressBits = 48; #elif defined __riscv && defined __linux__ -inline constexpr int kAddressBits = - (sizeof(void *) < 8 ? (8 * sizeof(void *)) : 48); +inline constexpr int kAddressBits = 48; #else inline constexpr int kAddressBits = 8 * sizeof(void*); #endif +#ifdef TCMALLOC_INTERNAL_SELSAN +inline constexpr bool kSelSanPresent = true; +#else +inline constexpr bool kSelSanPresent = false; +#endif + +// Sanitizers constrain the memory layout which causes problems with the +// enlarged tags required to represent NUMA partitions and for SelSan. +#if defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) +static_assert(!kSelSanPresent, "MSan/TSan are incompatible with SelSan."); +inline constexpr bool kSanitizerAddressSpace = true; +#else +inline constexpr bool kSanitizerAddressSpace = false; +#endif + #if defined(__x86_64__) // x86 has 2 MiB huge pages static constexpr size_t kHugePageShift = 21; diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/config_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/config_test.cc new file mode 100644 index 000000000000..a9c042817c7d --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/config_test.cc @@ -0,0 +1,101 @@ +// Copyright 2020 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/config.h" + +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "google/protobuf/io/gzip_stream.h" +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "tcmalloc/internal/util.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +TEST(AddressBits, CpuVirtualBits) { + // Check that kAddressBits is as least as large as either the number of bits + // in a pointer or as the number of virtual bits handled by the processor. + // To be effective this test must be run on each processor model. +#ifdef __x86_64__ + const int kPointerBits = 8 * sizeof(void*); + + // LLVM has a miscompile bug around %rbx, see + // https://bugs.llvm.org/show_bug.cgi?id=17907 + int ret; + asm("mov %%rbx, %%rdi\n" + "cpuid\n" + "xchg %%rdi, %%rbx\n" + /* inputs */ + : "=a"(ret) + /* outputs */ + : "a"(0x80000008) + /* clobbers */ + : "rdi", "ecx", "edx"); + const int kImplementedVirtualBits = (ret >> 8) & ((1 << 8) - 1); + ASSERT_GE(kAddressBits, std::min(kImplementedVirtualBits, kPointerBits)); +#elif __aarch64__ + const int kPointerBits = 8 * sizeof(void*); + + int fd = signal_safe_open("/proc/config.gz", O_RDONLY); + if (fd < 0) { + GTEST_SKIP() << "Unable to open kernel config."; + } + + google::protobuf::io::FileInputStream fs(fd); + google::protobuf::io::GzipInputStream gs(&fs, google::protobuf::io::GzipInputStream::GZIP); + + std::string config; + do { + const void* buf; + int size; + if (!gs.Next(&buf, &size)) { + break; + } + if (size < 0) { + break; + } + + absl::StrAppend( + &config, absl::string_view(reinterpret_cast(buf), size)); + } while (true); + + constexpr absl::string_view token = "CONFIG_PGTABLE_LEVELS="; + ASSERT_THAT(config, testing::HasSubstr(token)); + auto position = config.find(token); + ASSERT_NE(position, std::string::npos); + position += token.size(); + auto eol = config.find('\n', position); + ASSERT_NE(eol, std::string::npos); + ASSERT_NE(eol, position); + absl::string_view string_levels(&config[position], eol - position); + int levels; + ASSERT_TRUE(absl::SimpleAtoi(string_levels, &levels)) << string_levels; + + ASSERT_GE(levels, 3); + const int kImplementedVirtualBits = 39 + (levels - 3) * 9; + ASSERT_EQ(kAddressBits, kImplementedVirtualBits); + ASSERT_GE(kAddressBits, std::min(kImplementedVirtualBits, kPointerBits)); +#endif +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cpu_utils.h b/contrib/libs/tcmalloc/tcmalloc/internal/cpu_utils.h new file mode 100644 index 000000000000..9d2981cf5e79 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cpu_utils.h @@ -0,0 +1,85 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_CPU_UTILS_H_ +#define TCMALLOC_INTERNAL_CPU_UTILS_H_ + +#include + +#include + +#include "absl/base/attributes.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// The maximum number of CPUs supported by TCMalloc. +static constexpr int kMaxCpus = 2048; +// The size of the CPU set in bytes. +static constexpr int kCpuSetBytes = CPU_ALLOC_SIZE(kMaxCpus); + +class CpuSet { + public: + void Zero() { CPU_ZERO_S(kCpuSetBytes, cpu_set_.data()); } + void Set(int cpu) { CPU_SET_S(cpu, kCpuSetBytes, cpu_set_.data()); } + bool IsSet(int cpu) const { + return CPU_ISSET_S(cpu, kCpuSetBytes, cpu_set_.data()); + } + void CLR(int cpu) { CPU_CLR_S(cpu, kCpuSetBytes, cpu_set_.data()); } + int Count() const { return CPU_COUNT_S(kCpuSetBytes, cpu_set_.data()); } + + // Find the index of the first set CPU. Returns -1 if none are set. + int FindFirstSet() const { + if (Count() == 0) { + return -1; + } + int cpu = 0; + while (!IsSet(cpu)) { + ++cpu; + } + return cpu; + } + + // Sets the CPU affinity of the process with the given pid. Returns true if + // successful. If returns false, please check the global 'errno' variable to + // determine the specific error that occurred. + [[nodiscard]] bool SetAffinity(pid_t pid) { + return sched_setaffinity(pid, kCpuSetBytes, cpu_set_.data()) == 0; + } + + // Gets the CPU affinity of the process with the given pid. Return trues if + // successful. If returns false, please check the global 'errno' variable to + // determine the specific error that occurred. + [[nodiscard]] bool GetAffinity(pid_t pid) { + return sched_getaffinity(pid, kCpuSetBytes, cpu_set_.data()) == 0; + } + + const cpu_set_t* data() const { return cpu_set_.data(); } + + private: + // In the sched.h, each CPU occupies one bit. + // Declare a bit array with a size that is an integer multiple of cpu_set_t: + std::array + cpu_set_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_CPU_UTILS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h index b82a3ce9e5e6..d234f7ea8d6c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,8 +19,14 @@ #ifndef TCMALLOC_INTERNAL_DECLARATIONS_H_ #define TCMALLOC_INTERNAL_DECLARATIONS_H_ -#include -#include +#if !defined(__cpp_sized_deallocation) + +void operator delete(void*, std::size_t) noexcept; +void operator delete[](void*, std::size_t) noexcept; + +#endif // !defined(__cpp_sized_deallocation) + +#if !defined(__cpp_aligned_new) namespace std { enum class align_val_t : size_t; @@ -33,10 +40,15 @@ void* operator new[](std::size_t, std::align_val_t, const std::nothrow_t&) noexcept; void operator delete(void*, std::align_val_t) noexcept; -void operator delete(void*, std::size_t) noexcept; -void operator delete(void*, std::size_t, std::align_val_t) noexcept; void operator delete[](void*, std::align_val_t) noexcept; -void operator delete[](void*, std::size_t) noexcept; + +#endif // !defined(__cpp_aligned_new) + +#if !defined(__cpp_sized_deallocation) || !defined(__cpp_aligned_new) + +void operator delete(void*, std::size_t, std::align_val_t) noexcept; void operator delete[](void*, std::size_t, std::align_val_t) noexcept; +#endif // !defined(__cpp_sized_deallocation) || !defined(__cpp_aligned_new) + #endif // TCMALLOC_INTERNAL_DECLARATIONS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc index e786dd7a96dc..dd5e25120f4a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc @@ -15,6 +15,8 @@ #include +#include "tcmalloc/internal/config.h" + GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h index f54840e8d781..86023160bdd0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/explicitly_constructed.h b/contrib/libs/tcmalloc/tcmalloc/internal/explicitly_constructed.h new file mode 100644 index 000000000000..0ae0e8a0a250 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/explicitly_constructed.h @@ -0,0 +1,63 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_EXPLICITLY_CONSTRUCTED_H_ +#define TCMALLOC_INTERNAL_EXPLICITLY_CONSTRUCTED_H_ + +#include + +#include + +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Wraps a variable whose constructor is explicitly called. It is particularly +// useful for a global variable, without its constructor and destructor run on +// start and end of the program lifetime. This circumvents the initial +// construction order fiasco, while keeping the address of the empty string a +// compile time constant. +// +// Pay special attention to the initialization state of the object. +// 1. The object is "uninitialized" to begin with. +// 2. Call Construct() only if the object is uninitialized. After the call, the +// object becomes "initialized". +// 3. Call get_mutable() only if the object is initialized. +template +class ExplicitlyConstructed { + public: + template + void Construct(Args&&... args) { + new (&union_) T(std::forward(args)...); + } + + T& get_mutable() { return reinterpret_cast(union_); } + + private: + union AlignedUnion { + constexpr AlignedUnion() = default; + alignas(T) char space[sizeof(T)]; + int64_t align_to_int64; + void* align_to_ptr; + } union_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_EXPLICITLY_CONSTRUCTED_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased.h b/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased.h new file mode 100644 index 000000000000..cfe85e0b3e16 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased.h @@ -0,0 +1,78 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_EXPONENTIAL_BIASED_H_ +#define TCMALLOC_INTERNAL_EXPONENTIAL_BIASED_H_ + +#include +#include + +namespace tcmalloc { +namespace tcmalloc_internal { + +class ExponentialBiased { + public: + static uint64_t NextRandom(uint64_t rnd); + static uint32_t GetRandom(uint64_t rnd); +}; + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t ExponentialBiased::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = UINT64_C(0x5DEECE66D); + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +// Extracts higher-quality random bits. +// The raw value returned from NextRandom has poor randomness low bits +// and is not directly suitable for things like 'if (rnd % 2)'. +inline uint32_t ExponentialBiased::GetRandom(uint64_t rnd) { return rnd >> 16; } + +// Convenience wrapper to initialize a seed and return a sequence of +// pseudo-random values. Thread-safety: thread safe. +class Random { + public: + constexpr explicit Random(uint64_t seed) : state_(seed) {} + + // Return the next pseudo-random value. + uint32_t Next(); + + // Reset internal state with provided seed. + void Reset(uint64_t seed); + + private: + std::atomic state_; +}; + +inline uint32_t Random::Next() { + uint64_t r = state_.load(std::memory_order_relaxed); + r = ExponentialBiased::NextRandom(r); + state_.store(r, std::memory_order_relaxed); + return ExponentialBiased::GetRandom(r); +} + +inline void Random::Reset(uint64_t seed) { + state_.store(seed, std::memory_order_relaxed); +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_EXPONENTIAL_BIASED_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased_test.cc new file mode 100755 index 000000000000..e850e20fdf6c --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/exponential_biased_test.cc @@ -0,0 +1,125 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/exponential_biased.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +// Testing that NextRandom generates uniform +// random numbers. +// Applies the Anderson-Darling test for uniformity +void TestNextRandom(int n) { + uint64_t x = 1; + // This assumes that the prng returns 48 bit numbers + uint64_t max_prng_value = static_cast(1) << 48; + // Initialize + for (int i = 1; i <= 20; i++) { // 20 mimics sampler.Init() + x = ExponentialBiased::NextRandom(x); + } + std::vector int_random_sample(n); + // Collect samples + for (int i = 0; i < n; i++) { + int_random_sample[i] = x; + x = ExponentialBiased::NextRandom(x); + } + // First sort them... + std::sort(int_random_sample.begin(), int_random_sample.end()); + std::vector random_sample(n); + // Convert them to uniform randoms (in the range [0,1]) + for (int i = 0; i < n; i++) { + random_sample[i] = + static_cast(int_random_sample[i]) / max_prng_value; + } + // Now compute the Anderson-Darling statistic + double ad_pvalue = AndersonDarlingTest(random_sample); + EXPECT_GT(std::min(ad_pvalue, 1 - ad_pvalue), 0.0001) + << "prng is not uniform: n = " << n << " p = " << ad_pvalue; +} + +TEST(ExponentialBiased, TestNextRandom_MultipleValues) { + TestNextRandom(10); // Check short-range correlation + TestNextRandom(100); + TestNextRandom(1000); + TestNextRandom(10000); // Make sure there's no systematic error +} + +// Test that NextRand is in the right range. Unfortunately, this is a +// stochastic test which could miss problems. +TEST(ExponentialBiased, NextRand_range) { + uint64_t one = 1; + // The next number should be (one << 48) - 1 + uint64_t max_value = (one << 48) - 1; + uint64_t x = (one << 55); + int n = 22; // 27; + for (int i = 1; i <= (1 << n); i++) { // 20 mimics sampler.Init() + x = ExponentialBiased::NextRandom(x); + ASSERT_LE(x, max_value); + } +} + +// Tests certain arithmetic operations to make sure they compute what we +// expect them too (for testing across different platforms) +TEST(ExponentialBiased, arithmetic_1) { + uint64_t rnd; // our 48 bit random number, which we don't trust + const uint64_t prng_mod_power = 48; + uint64_t one = 1; + rnd = one; + uint64_t max_value = (one << 48) - 1; + for (int i = 1; i <= (1 << 27); i++) { // 20 mimics sampler.Init() + rnd = ExponentialBiased::NextRandom(rnd); + ASSERT_LE(rnd, max_value); + double q = (rnd >> (prng_mod_power - 26)) + 1.0; + ASSERT_GE(q, 0) << rnd << " " << prng_mod_power; + } + // Test some potentially out of bounds value for rnd + for (int i = 1; i < 64; i++) { + rnd = one << i; + double q = (rnd >> (prng_mod_power - 26)) + 1.0; + ASSERT_GE(q, 0) << " rnd=" << rnd << " i=" << i << " prng_mod_power" + << prng_mod_power; + } +} + +TEST(ExponentialBiased, CoinFlip) { + // Ensure that the low bits contain good randomness and can be as a coin flip. + for (uint64_t seed = 0; seed < 100; seed++) { + uint64_t rnd = seed; + int even = 0; + constexpr int kIters = 100; + for (int i = 0; i < 2 * kIters; i++) { + rnd = ExponentialBiased::NextRandom(rnd); + // Check that it works even if we look at every second value + // (i.e. if the rand is used twice per some operation). + // This fails without GetRandom, which caused issues for guarded page + // allocator sampling (left-right-alignment decision). + if (i % 2) { + even += ExponentialBiased::GetRandom(rnd) % 2; + } + } + EXPECT_GT(even, kIters / 10) << seed; + EXPECT_LT(even, kIters / 10 * 9) << seed; + } +} +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/fake_profile.h b/contrib/libs/tcmalloc/tcmalloc/internal/fake_profile.h new file mode 100644 index 000000000000..e8b7885c2294 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/fake_profile.h @@ -0,0 +1,66 @@ +#pragma clang system_header +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_FAKE_PROFILE_H_ +#define TCMALLOC_INTERNAL_FAKE_PROFILE_H_ + +#include +#include +#include + +#include "absl/functional/function_ref.h" +#include "absl/time/time.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class FakeProfile final : public ProfileBase { + public: + void SetSamples(std::vector samples) { + samples_ = std::move(samples); + } + + // For each sample in the profile, Iterate invokes the callback f on the + // sample. + void Iterate( + absl::FunctionRef f) const override { + for (const auto& sample : samples_) { + f(sample); + } + } + + // The type of profile (live objects, allocated, etc.). + ProfileType Type() const override { return type_; } + void SetType(ProfileType type) { type_ = type; } + + // The duration of the profile + absl::Duration Duration() const override { return duration_; } + void SetDuration(absl::Duration duration) { duration_ = duration; } + + std::optional StartTime() const override { return start_time_; } + void SetStartTime(std::optional t) { start_time_ = t; } + + private: + std::vector samples_; + ProfileType type_; + absl::Duration duration_; + std::optional start_time_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_FAKE_PROFILE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h deleted file mode 100644 index 514dd4a73e72..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2020 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ -#define TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ - -#include -#include -#include - -#include "absl/algorithm/container.h" -#include "absl/base/const_init.h" -#include "absl/base/internal/low_level_alloc.h" -#include "absl/base/internal/spinlock.h" -#include "absl/debugging/stacktrace.h" -#include "absl/hash/hash.h" -#include "absl/time/clock.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/linked_list.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Counts how many times we observed objects with a particular stack trace -// that were short lived/long lived. Each LifetimeStats object is associated -// with a particular allocation site (i.e., allocation stack trace) and each -// allocation site has at most one LifetimeStats object. All accesses to -// LifetimeStats objects need to be synchronized via the page heap lock. -class LifetimeStats : public TList::Elem { - public: - enum class Certainty { kLowCertainty, kHighCertainty }; - enum class Prediction { kShortLived, kLongLived }; - - void Update(Prediction prediction) { - if (prediction == Prediction::kShortLived) { - short_lived_++; - } else { - long_lived_++; - } - } - - Prediction Predict(Certainty certainty) { - if (certainty == Certainty::kLowCertainty) { - return (short_lived_ > long_lived_) ? Prediction::kShortLived - : Prediction::kLongLived; - } else { - // If little data was collected, predict as long-lived (current behavior). - return (short_lived_ > (long_lived_ + 10)) ? Prediction::kShortLived - : Prediction::kLongLived; - } - } - - // Reference counts are protected by LifetimeDatabase::table_lock_. - - // Increments the reference count of this entry. - void IncRef() { ++refcount_; } - - // Returns true if and only if the reference count reaches 0. - bool DecRef() { return --refcount_ == 0; } - - private: - uint64_t refcount_ = 1; - uint64_t short_lived_ = 0; - uint64_t long_lived_ = 0; -}; - -// Manages stack traces and statistics about their associated lifetimes. Since -// the database can fill up, old entries are evicted. Evicted entries need to -// survive as long as the last lifetime tracker referencing them and are thus -// reference-counted. -class LifetimeDatabase { - public: - struct Key { - int depth; // Number of PC values stored in array below - void* stack[kMaxStackDepth]; - - // Statically instantiate at the start of the allocation to acquire - // the allocation stack trace. - Key() { depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); } - - template - friend H AbslHashValue(H h, const Key& c) { - return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth), - c.depth); - } - - bool operator==(const Key& other) const { - if (depth != other.depth) { - return false; - } - return std::equal(stack, stack + depth, other.stack); - } - }; - - // Captures statistics associated with the low-level allocator backing the - // memory used by the database. - struct ArenaStats { - uint64_t bytes_allocated; - }; - - static constexpr int kMaxDatabaseSize = 1024; - - LifetimeDatabase() {} - ~LifetimeDatabase() {} - - // Not copyable or movable - LifetimeDatabase(const LifetimeDatabase&) = delete; - LifetimeDatabase& operator=(const LifetimeDatabase&) = delete; - - // Identifies the current stack trace and returns a handle to the lifetime - // statistics associated with this stack trace. May run outside the page heap - // lock -- we therefore need to do our own locking. This increments the - // reference count of the lifetime stats object and the caller is responsible - // for calling RemoveLifetimeStatsReference when finished with the object. - LifetimeStats* LookupOrAddLifetimeStats(Key* k) { - absl::base_internal::SpinLockHolder h(&table_lock_); - auto it = table_.find(*k); - LifetimeStats* s; - if (it == table_.end()) { - MaybeEvictLRU(); - // Allocate a new entry using the low-level allocator, which is safe - // to call from within TCMalloc. - s = stats_allocator_.allocate(1); - new (s) LifetimeStats(); - table_.insert(std::make_pair(*k, s)); - stats_fifo_.append(s); - } else { - s = it->second; - UpdateLRU(s); - } - s->IncRef(); - return s; - } - - void RemoveLifetimeStatsReference(LifetimeStats* s) { - absl::base_internal::SpinLockHolder h(&table_lock_); - if (s->DecRef()) { - stats_allocator_.deallocate(s, 1); - } - } - - size_t size() const { - absl::base_internal::SpinLockHolder h(&table_lock_); - return table_.size(); - } - - size_t evictions() const { - absl::base_internal::SpinLockHolder h(&table_lock_); - return n_evictions_; - } - - static ArenaStats* arena_stats() { - static ArenaStats stats = {0}; - return &stats; - } - - protected: - static const int kMaxStackDepth = 64; - - static absl::base_internal::LowLevelAlloc::Arena* GetArena() { - static absl::base_internal::LowLevelAlloc::Arena* arena = - absl::base_internal::LowLevelAlloc::NewArena(0); - return arena; - } - - static uint64_t bytes_allocated_ ABSL_GUARDED_BY(table_lock_); - - void UpdateLRU(LifetimeStats* stats) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { - stats_fifo_.remove(stats); - stats_fifo_.append(stats); - } - - // If an entry is evicted, it is returned (nullptr otherwise). - void MaybeEvictLRU() ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { - if (table_.size() < kMaxDatabaseSize) { - return; - } - n_evictions_++; - LifetimeStats* evict = stats_fifo_.first(); - stats_fifo_.remove(evict); - for (auto it = table_.begin(); it != table_.end(); ++it) { - if (it->second == evict) { - table_.erase(it); - if (evict->DecRef()) { - stats_allocator_.deallocate(evict, 1); - } - return; - } - } - CHECK_CONDITION(false); // Should not happen - } - - private: - template - class MyAllocator : public std::allocator { - public: - template - struct rebind { - using other = MyAllocator; - }; - - MyAllocator() noexcept {} - - template - explicit MyAllocator(const MyAllocator&) noexcept {} - - T* allocate(size_t num_objects, const void* = nullptr) { - size_t bytes = num_objects * sizeof(T); - arena_stats()->bytes_allocated += bytes; - return static_cast(absl::base_internal::LowLevelAlloc::AllocWithArena( - bytes, GetArena())); - } - - void deallocate(T* p, size_t num_objects) { - size_t bytes = num_objects * sizeof(T); - arena_stats()->bytes_allocated -= bytes; - absl::base_internal::LowLevelAlloc::Free(p); - } - }; - - MyAllocator stats_allocator_ ABSL_GUARDED_BY(table_lock_); - mutable absl::base_internal::SpinLock table_lock_{ - absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; - - // Stores the current mapping from allocation site to LifetimeStats. - std::unordered_map, std::equal_to, - MyAllocator>> - table_ ABSL_GUARDED_BY(table_lock_); - - // Stores the entries ordered by how many times they have been accessed. - TList stats_fifo_ ABSL_GUARDED_BY(table_lock_); - size_t n_evictions_ ABSL_GUARDED_BY(table_lock_) = 0; -}; - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc deleted file mode 100644 index 4280890afe6c..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/lifetime_predictions.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "tcmalloc/testing/testutil.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -class LifetimeDatabaseTest : public testing::Test { - protected: - LifetimeDatabase lifetime_database_; - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateA() { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateB() { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateWithStacktraceId(int id) { - if (id == 0) { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } else if (id % 2 == 0) { - return AllocateWithStacktraceId(id / 2); - } else { - return AllocateWithStacktraceId_2(id / 2); - } - } - - // Record a sufficiently large number of short-lived allocations to make - // a prediction short-lived, absent any long-lived allocations. - void MakeShortLived(LifetimeStats* stats, bool high_certainty) { - for (int i = 0; i < (high_certainty ? 100 : 2); i++) { - stats->Update(LifetimeStats::Prediction::kShortLived); - } - } - - private: - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateWithStacktraceId_2(int id) { - if (id == 0) { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } else if (id % 2 == 0) { - return AllocateWithStacktraceId(id / 2); - } else { - return AllocateWithStacktraceId_2(id / 2); - } - } -}; - -TEST_F(LifetimeDatabaseTest, Basic) { - PRAGMA_NO_UNROLL - for (int i = 0; i < 2; i++) { - LifetimeStats* r1 = AllocateA(); - LifetimeStats* r2 = AllocateB(); - LifetimeStats* r3 = AllocateB(); - ASSERT_NE(nullptr, r1); - ASSERT_NE(nullptr, r2); - ASSERT_NE(nullptr, r3); - - // First iteration: set short-lived count. - if (i == 0) { - MakeShortLived(r1, false); - MakeShortLived(r2, true); - } else { - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r1->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r1->Predict(LifetimeStats::Certainty::kHighCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r2->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r2->Predict(LifetimeStats::Certainty::kHighCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r3->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r3->Predict(LifetimeStats::Certainty::kHighCertainty)); - } - - lifetime_database_.RemoveLifetimeStatsReference(r1); - lifetime_database_.RemoveLifetimeStatsReference(r2); - lifetime_database_.RemoveLifetimeStatsReference(r3); - } -} - -TEST_F(LifetimeDatabaseTest, Eviction) { - const int kEntries = 5 * LifetimeDatabase::kMaxDatabaseSize; - - std::vector refs; - - PRAGMA_NO_UNROLL - for (int i = 0; i < kEntries; i++) { - LifetimeStats* r = AllocateWithStacktraceId(i); - refs.push_back(r); - - ASSERT_NE(nullptr, r); - if (i < LifetimeDatabase::kMaxDatabaseSize) { - MakeShortLived(r, true); - } - } - - // Check that even evicted entries are still accessible due to refcounts. - for (int i = 0; i < kEntries; i++) { - if (i < LifetimeDatabase::kMaxDatabaseSize) { - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); - } else { - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); - } - } - - EXPECT_EQ(LifetimeDatabase::kMaxDatabaseSize, lifetime_database_.size()); - EXPECT_EQ(kEntries - LifetimeDatabase::kMaxDatabaseSize, - lifetime_database_.evictions()); - - uint64_t before_bytes = lifetime_database_.arena_stats()->bytes_allocated; - - // Return all of the references, which should drop the remaining refcounts. - for (int i = 0; i < kEntries; i++) { - lifetime_database_.RemoveLifetimeStatsReference(refs[i]); - } - - uint64_t after_bytes = lifetime_database_.arena_stats()->bytes_allocated; - - // Check that this freed up memory - EXPECT_LT(after_bytes, before_bytes); -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h deleted file mode 100644 index d348dbe60931..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2020 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ -#define TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ - -#include "absl/base/internal/cycleclock.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/clock.h" -#include "tcmalloc/internal/lifetime_predictions.h" -#include "tcmalloc/internal/linked_list.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -template -class LifetimeTrackerImpl { - public: - // A tracker is attached to an individual allocation and tracks its lifetime. - // This allocation can either be in a region or in the filler. It contains - // a pointer back to the LifetimeStats of the allocation site that generated - // this allocation, so that statistics can be updated. - struct Tracker : public TList::Elem { - // The deadline after which the object is considered long-lived. - uint64_t deadline = 0; - - // If the allocation is associated with a counterfactual, this contains - // the hypothetical location in the short-lived region (null otherwise). - void* counterfactual_ptr = nullptr; - - // Lifetime statistics associated with this allocation (will be updated when - // the lifetime is known). - LifetimeStatsT* lifetime; - - // The allocation this stat belongs to was predicted short-lived. - bool predicted_short_lived = false; - - // Is this element currently tracked by the lifetime tracker? - bool is_tracked() { return deadline != 0; } - - // Reset the element (implies not tracked). - void reset() { - deadline = 0; - counterfactual_ptr = nullptr; - } - }; - - struct Stats { - uint64_t expired_lifetimes = 0; - uint64_t overestimated_lifetimes = 0; - uint64_t short_lived_predictions = 0; - uint64_t long_lived_predictions = 0; - }; - - explicit LifetimeTrackerImpl( - LifetimeDatabaseT* lifetime_database, absl::Duration timeout, - Clock clock = Clock{.now = absl::base_internal::CycleClock::Now, - .freq = absl::base_internal::CycleClock::Frequency}) - : timeout_(absl::ToDoubleSeconds(timeout) * clock.freq()), - lifetime_database_(*lifetime_database), - clock_(clock) {} - - // Registers a donated allocation with the tracker. - void AddAllocation(Tracker* tracker, LifetimeStatsT* lifetime, - bool predicted_short_lived) { - CheckForLifetimeExpirations(); - - if (predicted_short_lived) { - stats_.short_lived_predictions++; - } else { - stats_.long_lived_predictions++; - } - - ASSERT(tracker != nullptr); - ASSERT(lifetime != nullptr); - tracker->deadline = clock_.now() + timeout_; - tracker->lifetime = lifetime; - tracker->predicted_short_lived = predicted_short_lived; - list_.append(tracker); - } - - // Remove an allocation from the tracker. This will stop tracking the - // allocation and record whether it was correctly predicted. - void RemoveAllocation(Tracker* tracker) { - CheckForLifetimeExpirations(); - - // This is not tracked anymore. - if (!tracker->is_tracked()) { - return; - } - - if (!tracker->predicted_short_lived) { - stats_.overestimated_lifetimes++; - } - - if (tracker->lifetime != nullptr) { - tracker->lifetime->Update(LifetimeStatsT::Prediction::kShortLived); - lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); - } - - tracker->reset(); - - list_.remove(tracker); - } - - // Check whether any lifetimes in the tracker have passed the threshold after - // which they are not short-lived anymore. - void CheckForLifetimeExpirations() { - // TODO(mmaas): Expirations are fairly cheap, but there is a theoretical - // possibility of having an arbitrary number of expirations at once, which - // could affect tail latency. We may want to limit the number of pages we - // let expire per unit time. - uint64_t now = clock_.now(); - Tracker* tracker = TryGetExpired(now); - while (tracker != nullptr) { - ASSERT(tracker->is_tracked()); - - // A page that was predicted short-lived was actually long-lived. - if (tracker->predicted_short_lived) { - stats_.expired_lifetimes++; - } - - if (tracker->lifetime != nullptr) { - tracker->lifetime->Update(LifetimeStatsT::Prediction::kLongLived); - lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); - } - - tracker->reset(); - tracker = TryGetExpired(now); - } - } - - Stats stats() const { return stats_; } - - private: - // Returns the earliest expiring entry, or nullptr if none expired. - Tracker* TryGetExpired(uint64_t now) { - if (!list_.empty() && list_.first()->deadline < now) { - Tracker* s = list_.first(); - list_.remove(s); - return s; - } - return nullptr; - } - - const uint64_t timeout_; - - TList list_; - Stats stats_; - LifetimeDatabaseT& lifetime_database_; - Clock clock_; -}; - -using LifetimeTracker = LifetimeTrackerImpl; - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc deleted file mode 100644 index 78ed38ecae45..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/lifetime_tracker.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/lifetime_predictions.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -class MockLifetimeStats { - public: - enum class Prediction { kShortLived, kLongLived }; - MOCK_METHOD(void, Update, (Prediction prediction), ()); -}; - -class MockLifetimeDatabase { - public: - MOCK_METHOD(void, RemoveLifetimeStatsReference, (MockLifetimeStats*), ()); -}; - -using LifetimeTrackerUnderTest = - LifetimeTrackerImpl; - -class LifetimeTrackerTest : public testing::Test { - protected: - const Clock kFakeClock = - Clock{.now = FakeClock, .freq = GetFakeClockFrequency}; - - void Advance(absl::Duration d) { - clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency(); - } - - private: - static int64_t FakeClock() { return clock_; } - - static double GetFakeClockFrequency() { - return absl::ToDoubleNanoseconds(absl::Seconds(2)); - } - - static int64_t clock_; -}; - -int64_t LifetimeTrackerTest::clock_{0}; - -TEST_F(LifetimeTrackerTest, Basic) { - MockLifetimeDatabase database; - LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); - MockLifetimeStats stats; - - LifetimeTrackerUnderTest::Tracker tracker1; - tracker.AddAllocation(&tracker1, &stats, false); - Advance(absl::Seconds(1)); - - EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kLongLived)); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); - - LifetimeTrackerUnderTest::Tracker tracker2; - tracker.AddAllocation(&tracker2, &stats, false); - - EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kShortLived)); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); - - Advance(absl::Seconds(0.1)); - tracker.RemoveAllocation(&tracker2); - - EXPECT_EQ(tracker.stats().expired_lifetimes, 0); - EXPECT_EQ(tracker.stats().overestimated_lifetimes, 1); - EXPECT_EQ(tracker.stats().short_lived_predictions, 0); - EXPECT_EQ(tracker.stats().long_lived_predictions, 2); -} - -TEST_F(LifetimeTrackerTest, ExpirationLogic) { - MockLifetimeDatabase database; - LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); - - // Create 100 trackers, all predicted short-lived. Every second tracker will - // be long-lived and therefore expire. - const int kNumTrackers = 100; - std::vector trackers(kNumTrackers); - MockLifetimeStats stats[] = {MockLifetimeStats(), MockLifetimeStats()}; - - for (int i = 0; i < kNumTrackers; ++i) { - tracker.AddAllocation(&trackers[i], &stats[i % 2], true); - Advance(absl::Milliseconds(1)); - } - - EXPECT_CALL(stats[0], Update(MockLifetimeStats::Prediction::kShortLived)) - .Times(kNumTrackers / 2); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[0])) - .Times(kNumTrackers / 2); - - for (int i = 0; i < kNumTrackers; i += 2) { - tracker.RemoveAllocation(&trackers[i]); - } - - // After an additional 450ms, 1/4 of the allocations should have expired. - EXPECT_CALL(stats[1], Update(MockLifetimeStats::Prediction::kLongLived)) - .Times(kNumTrackers / 4); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[1])) - .Times(kNumTrackers / 4); - - Advance(absl::Milliseconds(450)); - tracker.CheckForLifetimeExpirations(); - - EXPECT_EQ(tracker.stats().expired_lifetimes, kNumTrackers / 4); - EXPECT_EQ(tracker.stats().overestimated_lifetimes, 0); - EXPECT_EQ(tracker.stats().short_lived_predictions, kNumTrackers); - EXPECT_EQ(tracker.stats().long_lived_predictions, 0); -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h index 181a48027580..5651ed0f3012 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,6 +24,7 @@ #include "absl/base/attributes.h" #include "absl/base/optimization.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -59,14 +61,17 @@ inline void* SLL_Pop(void** list) { // LinkedList forms an in-place linked list with its void* elements. class LinkedList { private: - void* list_; // Linked list. - uint32_t length_; // Current length. + void* list_ = nullptr; // Linked list. + uint32_t length_ = 0; // Current length. public: - void Init() { - list_ = nullptr; - length_ = 0; - } + constexpr LinkedList() = default; + + // Not copy constructible or movable. + LinkedList(const LinkedList&) = delete; + LinkedList(LinkedList&&) = delete; + LinkedList& operator=(const LinkedList&) = delete; + LinkedList& operator=(LinkedList&&) = delete; // Return current length of list size_t length() const { return length_; } @@ -101,7 +106,7 @@ class LinkedList { // PushBatch and PopBatch do not guarantee an ordering. void PushBatch(int N, void** batch) { - ASSERT(N > 0); + TC_ASSERT_GT(N, 0); for (int i = 0; i < N - 1; ++i) { SLL_SetNext(batch[i], batch[i + 1]); } @@ -117,7 +122,7 @@ class LinkedList { p = SLL_Next(p); } list_ = p; - ASSERT(length_ >= N); + TC_ASSERT_GE(length_, N); length_ -= N; } }; @@ -146,9 +151,9 @@ class TList { // potential aliasing and does unnecessary reloads after stores. Elem* next = next_; Elem* prev = prev_; - ASSERT(prev->next_ == this); + TC_ASSERT_EQ(prev->next_, this); prev->next_ = next; - ASSERT(next->prev_ == this); + TC_ASSERT_EQ(next->prev_, this); next->prev_ = prev; #ifndef NDEBUG prev_ = nullptr; @@ -177,6 +182,12 @@ class TList { // Initialize to empty list. constexpr TList() { head_.next_ = head_.prev_ = &head_; } + // Not copy constructible/movable. + TList(const TList&) = delete; + TList(TList&&) = delete; + TList& operator=(const TList&) = delete; + TList& operator=(TList&&) = delete; + bool empty() const { return head_.next_ == &head_; } // Return the length of the linked list. O(n). @@ -190,15 +201,15 @@ class TList { // Returns first element in the list. The list must not be empty. ABSL_ATTRIBUTE_RETURNS_NONNULL T* first() const { - ASSERT(!empty()); - ASSERT(head_.next_ != nullptr); + TC_ASSERT(!empty()); + TC_ASSERT_NE(head_.next_, nullptr); return static_cast(head_.next_); } // Returns last element in the list. The list must not be empty. ABSL_ATTRIBUTE_RETURNS_NONNULL T* last() const { - ASSERT(!empty()); - ASSERT(head_.prev_ != nullptr); + TC_ASSERT(!empty()); + TC_ASSERT_NE(head_.prev_, nullptr); return static_cast(head_.prev_); } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc index 505b1b62c222..2db3a7bcf7b6 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc @@ -19,6 +19,7 @@ #include "absl/random/random.h" #include "benchmark/benchmark.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/linked_list.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/mock_span.h" @@ -33,7 +34,6 @@ void BM_PushPop(benchmark::State& state) { const int sequential_calls = state.range(1); LinkedList list; - list.Init(); const size_t size = pointers * sizeof(void*); std::vector v(sequential_calls); @@ -68,7 +68,6 @@ void BM_PushPopBatch(benchmark::State& state) { const int batch_size = state.range(1); LinkedList list; - list.Init(); const size_t size = pointers * sizeof(void*); const int kNumberOfObjects = 64 << 10; @@ -112,7 +111,7 @@ static void BM_AppendRemove(benchmark::State& state) { // Create MockSpans in append order for (int i = 0; i < sequential_calls; i++) { MockSpan* s = MockSpan::New(i); - CHECK_CONDITION(s != nullptr); + TC_CHECK_NE(s, nullptr); vappend[i] = s; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc index 3299bca8d873..66f9bcf4454a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc @@ -14,16 +14,16 @@ #include "tcmalloc/internal/linked_list.h" +#include #include #include +#include #include #include "gtest/gtest.h" #include "absl/container/flat_hash_set.h" -#include "absl/container/node_hash_set.h" #include "absl/random/random.h" -#include "benchmark/benchmark.h" #include "tcmalloc/internal/mock_span.h" namespace tcmalloc { @@ -32,9 +32,12 @@ namespace { class LinkedListTest : public ::testing::Test { protected: - void SetUp() override { list_.Init(); } - LinkedList list_; + + static_assert(!std::is_copy_constructible::value, + "LinkedList should not be copyable"); + static_assert(!std::is_move_constructible::value, + "LinkedList should not be movable"); }; TEST_F(LinkedListTest, PushPop) { @@ -117,6 +120,11 @@ TEST_F(LinkedListTest, PushPopBatch) { class TListTest : public ::testing::Test { protected: MockSpanList list_; + + static_assert(!std::is_copy_constructible::value, + "TList should not be copyable"); + static_assert(!std::is_move_constructible::value, + "TList should not be movable"); }; TEST_F(TListTest, AppendPushPop) { diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h index 367c8a9e4dd6..9346b8d43584 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +16,14 @@ #ifndef TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ #define TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ +#ifdef __linux__ +#ifdef __has_include +#if __has_include() +#include +#endif // __has_include() +#endif // __has_include +#endif // __linux__ + /* include/uapi/linux/rseq.h */ struct kernel_rseq { @@ -41,11 +50,6 @@ static_assert(sizeof(kernel_rseq) == (4 * sizeof(unsigned long long)), "Unexpected size for rseq structure"); struct kernel_rseq_cs { - unsigned version; - unsigned flags; - unsigned long long start_ip; - unsigned long long post_commit_offset; - unsigned long long abort_ip; // This is aligned, per upstream RSEQ specification. } __attribute__((aligned(4 * sizeof(unsigned long long)))); @@ -55,7 +59,6 @@ static_assert(sizeof(kernel_rseq_cs) == (4 * sizeof(unsigned long long)), #if !defined(__NR_rseq) #if defined(__x86_64__) #define __NR_rseq 334 -#define __NR_membarrier 324 #elif defined(__aarch64__) #define __NR_rseq 293 #elif defined(__PPC__) @@ -63,4 +66,8 @@ static_assert(sizeof(kernel_rseq_cs) == (4 * sizeof(unsigned long long)), #endif #endif +#ifndef KPF_ZERO_PAGE +#define KPF_ZERO_PAGE 24 +#endif + #endif // TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc index 2b70bc1502a7..a53c26aa15bb 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc @@ -14,7 +14,7 @@ #include "tcmalloc/internal/logging.h" -#include +#include #include #include #include @@ -22,19 +22,31 @@ #include #include +#include +#include +#include #include "absl/base/attributes.h" #include "absl/base/const_init.h" #include "absl/base/internal/spinlock.h" #include "absl/base/macros.h" #include "absl/debugging/stacktrace.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/parameter_accessors.h" -#include "tcmalloc/malloc_extension.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +ABSL_CONST_INIT SampleUserDataSupport::CreateSampleUserDataCallback* SampleUserDataSupport::create_sample_user_data_callback_ = nullptr; +ABSL_CONST_INIT SampleUserDataSupport::CopySampleUserDataCallback* SampleUserDataSupport::copy_sample_user_data_callback_ = nullptr; +ABSL_CONST_INIT SampleUserDataSupport::DestroySampleUserDataCallback* SampleUserDataSupport::destroy_sample_user_data_callback_ = nullptr; + // Variables for storing crash output. Allocated statically since we // may not be able to heap-allocate while crashing. ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock( @@ -42,7 +54,9 @@ ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock( static bool crashed = false; static const size_t kStatsBufferSize = 16 << 10; +#ifndef __APPLE__ static char stats_buffer[kStatsBufferSize] = {0}; +#endif // __APPLE__ static void WriteMessage(const char* msg, int length) { (void)::write(STDERR_FILENO, msg, length); @@ -50,91 +64,106 @@ static void WriteMessage(const char* msg, int length) { void (*log_message_writer)(const char* msg, int length) = WriteMessage; -class Logger { - public: - bool Add(const LogItem& item); - bool AddStr(const char* str, int n); - bool AddNum(uint64_t num, int base); // base must be 10 or 16. - - static constexpr int kBufSize = 512; - char* p_; - char* end_; - char buf_[kBufSize]; - - StackTrace trace; -}; - -static Logger FormatLog(bool with_stack, const char* filename, int line, - LogItem a, LogItem b, LogItem c, LogItem d) { - Logger state; - state.p_ = state.buf_; - state.end_ = state.buf_ + sizeof(state.buf_); - // clang-format off - state.AddStr(filename, strlen(filename)) && - state.AddStr(":", 1) && - state.AddNum(line, 10) && - state.AddStr("]", 1) && - state.Add(a) && - state.Add(b) && - state.Add(c) && - state.Add(d); - // clang-format on - - if (with_stack) { - state.trace.depth = - absl::GetStackTrace(state.trace.stack, kMaxStackDepth, 1); - state.Add(LogItem("@")); - for (int i = 0; i < state.trace.depth; i++) { - state.Add(LogItem(state.trace.stack[i])); - } - } +// If this failure occurs during "bazel test", writes a warning for Bazel to +// display. +static void RecordBazelWarning(absl::string_view type, + absl::string_view error) { + constexpr absl::string_view kHeaderSuffix = " error detected: "; + + const char* warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE"); + if (!warning_file) return; // Not a bazel test. + + int fd = open(warning_file, O_CREAT | O_WRONLY | O_APPEND, 0644); + if (fd == -1) return; + (void)write(fd, type.data(), type.size()); + (void)write(fd, kHeaderSuffix.data(), kHeaderSuffix.size()); + (void)write(fd, error.data(), error.size()); + (void)write(fd, "\n", 1); + close(fd); +} - // Teminate with newline - if (state.p_ >= state.end_) { - state.p_ = state.end_ - 1; +// If this failure occurs during a gUnit test, writes an XML file describing the +// error type. Note that we cannot use ::testing::Test::RecordProperty() +// because it doesn't write the XML file if a test crashes (which we're about to +// do here). So we write directly to the XML file instead. +// +static void RecordTestFailure(absl::string_view detector, + absl::string_view error) { + const char* xml_file = thread_safe_getenv("XML_OUTPUT_FILE"); + if (!xml_file) return; // Not a gUnit test. + + // Record test failure for Sponge. + constexpr absl::string_view kXmlHeaderPart1 = + "" + "" + " " + " " + " " + " " + " "; + constexpr absl::string_view kXmlFooterPart2 = + " detected a memory error. See the test log for full report." + " " + ""; + + int fd = open(xml_file, O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (fd == -1) return; + (void)write(fd, kXmlHeaderPart1.data(), kXmlHeaderPart1.size()); + for (char c : detector) { + c = absl::ascii_tolower(c); + (void)write(fd, &c, 1); } - *state.p_ = '\n'; - state.p_++; - - return state; + (void)write(fd, kXmlHeaderPart2.data(), kXmlHeaderPart2.size()); + (void)write(fd, error.data(), error.size()); + (void)write(fd, kXmlFooterPart1.data(), kXmlFooterPart1.size()); + (void)write(fd, detector.data(), detector.size()); + (void)write(fd, kXmlFooterPart2.data(), kXmlFooterPart2.size()); + close(fd); } - -ABSL_ATTRIBUTE_NOINLINE -void Log(LogMode mode, const char* filename, int line, LogItem a, LogItem b, - LogItem c, LogItem d) { - Logger state = FormatLog(mode == kLogWithStack, filename, line, a, b, c, d); - int msglen = state.p_ - state.buf_; - (*log_message_writer)(state.buf_, msglen); +// +// If this crash occurs in a test, records test failure summaries. +// +// detector is the bug detector or tools that found the error +// error contains the type of error to record. +void RecordCrash(absl::string_view detector, absl::string_view error) { + TC_ASSERT(!detector.empty()); + TC_ASSERT(!error.empty()); + + RecordBazelWarning(detector, error); + RecordTestFailure(detector, error); } ABSL_ATTRIBUTE_NOINLINE -void Crash(CrashMode mode, const char* filename, int line, LogItem a, LogItem b, - LogItem c, LogItem d) { - Logger state = FormatLog(true, filename, line, a, b, c, d); - - int msglen = state.p_ - state.buf_; +ABSL_ATTRIBUTE_NORETURN +static void Crash(const char* filename, int line, const char* msg, + size_t msglen, bool oom) { + StackTrace trace; + trace.depth = absl::GetStackTrace(trace.stack, kMaxStackDepth, 1); // FailureSignalHandler mallocs for various logging attempts. // We might be crashing holding tcmalloc locks. // We're substantially less likely to try to take those locks // (and thus deadlock until the alarm timer fires) if we disable sampling. #ifndef __APPLE__ - if (&TCMalloc_Internal_SetProfileSamplingRate != nullptr) { - TCMalloc_Internal_SetProfileSamplingRate(0); + if (&TCMalloc_Internal_SetProfileSamplingInterval != nullptr) { + TCMalloc_Internal_SetProfileSamplingInterval(0); } #endif // __APPLE__ bool first_crash = false; { - absl::base_internal::SpinLockHolder l(&crash_lock); + AllocationGuardSpinLockHolder l(&crash_lock); if (!crashed) { crashed = true; first_crash = true; } } - (*log_message_writer)(state.buf_, msglen); - if (first_crash && mode == kCrashWithStats) { + (*log_message_writer)(msg, msglen); + if (first_crash && oom) { #ifndef __APPLE__ if (&TCMalloc_Internal_GetStats != nullptr) { size_t n = TCMalloc_Internal_GetStats(stats_buffer, kStatsBufferSize); @@ -146,128 +175,107 @@ void Crash(CrashMode mode, const char* filename, int line, LogItem a, LogItem b, abort(); } -bool Logger::Add(const LogItem& item) { - // Separate real items with spaces - if (item.tag_ != LogItem::kEnd && p_ < end_) { - *p_ = ' '; - p_++; - } +ABSL_ATTRIBUTE_NORETURN void CheckFailed(const char* file, int line, + const char* msg, int msglen) { + Crash(file, line, msg, msglen, false); +} - switch (item.tag_) { - case LogItem::kStr: - return AddStr(item.u_.str, strlen(item.u_.str)); - case LogItem::kUnsigned: - return AddNum(item.u_.unum, 10); - case LogItem::kSigned: - if (item.u_.snum < 0) { - // The cast to uint64_t is intentionally before the negation - // so that we do not attempt to negate -2^63. - return AddStr("-", 1) && - AddNum(-static_cast(item.u_.snum), 10); - } else { - return AddNum(static_cast(item.u_.snum), 10); - } - case LogItem::kPtr: - return AddStr("0x", 2) && - AddNum(reinterpret_cast(item.u_.ptr), 16); - default: - return false; - } +void CrashWithOOM(size_t alloc_size) { + char buf[512]; + int n = absl::SNPrintF(buf, sizeof(buf), + "Unable to allocate %zu (new failed)", alloc_size); + Crash("tcmalloc", 0, buf, n, true); } -bool Logger::AddStr(const char* str, int n) { - ptrdiff_t remaining = end_ - p_; - if (remaining < n) { - // Try to log a truncated message if there is some space. - static constexpr absl::string_view kDots = "..."; - if (remaining > kDots.size() + 1) { - int truncated = remaining - kDots.size(); - memcpy(p_, str, truncated); - p_ += truncated; - memcpy(p_, kDots.data(), kDots.size()); - p_ += kDots.size(); - - return true; - } - return false; - } else { - memcpy(p_, str, n); - p_ += n; - return true; +void PrintStackTrace(void* const* stack_frames, size_t depth) { + for (size_t i = 0; i < depth; ++i) { + TC_LOG(" @ %p", stack_frames[i]); } } -bool Logger::AddNum(uint64_t num, int base) { - static const char kDigits[] = "0123456789abcdef"; - char space[22]; // more than enough for 2^64 in smallest supported base (10) - char* end = space + sizeof(space); - char* pos = end; - do { - pos--; - *pos = kDigits[num % base]; - num /= base; - } while (num > 0 && pos > space); - return AddStr(pos, end - pos); +void PrintStackTraceFromSignalHandler(void* context) { + void* stack_frames[kMaxStackDepth]; + size_t depth = absl::GetStackTraceWithContext(stack_frames, kMaxStackDepth, + 1, + context, nullptr); + PrintStackTrace(stack_frames, depth); } -PbtxtRegion::PbtxtRegion(Printer* out, PbtxtRegionType type, int indent) - : out_(out), type_(type), indent_(indent) { +PbtxtRegion::PbtxtRegion(Printer& out, PbtxtRegionType type) + : out_(&out), type_(type) { switch (type_) { case kTop: break; case kNested: - out_->printf("{"); + out_->Append("{"); break; } - ++indent_; } PbtxtRegion::~PbtxtRegion() { - --indent_; - out_->printf("\n"); - for (int i = 0; i < indent_; i++) { - out_->printf(" "); - } switch (type_) { case kTop: break; case kNested: - out_->printf("}"); + out_->Append("}"); break; } } -void PbtxtRegion::NewLineAndIndent() { - out_->printf("\n"); - for (int i = 0; i < indent_; i++) { - out_->printf(" "); - } -} +#ifndef NDEBUG +static std::atomic injected_i64; +static std::atomic injected_d; +static std::atomic injected_b; +#endif // NDEBUG void PbtxtRegion::PrintI64(absl::string_view key, int64_t value) { - NewLineAndIndent(); - out_->printf("%s: %" PRIi64, key, value); +#ifndef NDEBUG + int64_t* ptr = injected_i64.load(std::memory_order_acquire); + if (ptr) { + value = *ptr; + } +#endif + + out_->Append(" ", key, ": ", value); } void PbtxtRegion::PrintDouble(absl::string_view key, double value) { - NewLineAndIndent(); - out_->printf("%s: %f", key, value); +#ifndef NDEBUG + double* ptr = injected_d.load(std::memory_order_acquire); + if (ptr) { + value = *ptr; + } +#endif + + out_->Append(" ", key, ": ", value); } void PbtxtRegion::PrintBool(absl::string_view key, bool value) { - NewLineAndIndent(); - out_->printf("%s: %s", key, value ? "true" : "false"); +#ifndef NDEBUG + bool* ptr = injected_b.load(std::memory_order_acquire); + if (ptr) { + value = *ptr; + } +#endif + + out_->Append(" ", key, value ? ": true" : ": false"); } void PbtxtRegion::PrintRaw(absl::string_view key, absl::string_view value) { - NewLineAndIndent(); - out_->printf("%s: %s", key, value); + out_->Append(" ", key, ": ", value); +} + +#ifndef NDEBUG +void PbtxtRegion::InjectValues(int64_t* i64, double* d, bool* b) { + injected_i64.store(i64, std::memory_order_release); + injected_d.store(d, std::memory_order_release); + injected_b.store(b, std::memory_order_release); } +#endif // NDEBUG PbtxtRegion PbtxtRegion::CreateSubRegion(absl::string_view key) { - NewLineAndIndent(); - out_->printf("%s ", key); - PbtxtRegion sub(out_, kNested, indent_); + out_->Append(" ", key, " "); + PbtxtRegion sub(*out_, kNested); return sub; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h index 4d42aa40a9fe..e41732dc4254 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,11 +21,24 @@ #include #include -#include "absl/base/internal/per_thread_tls.h" +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/sysinfo.h" #include "absl/base/optimization.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/allocation_guard.h" #include "tcmalloc/internal/config.h" +#include "tcmalloc/malloc_extension.h" //------------------------------------------------------------------- // Utility routines @@ -33,19 +47,103 @@ // Safe logging helper: we write directly to the stderr file // descriptor and avoid FILE buffering because that may invoke // malloc(). -// -// Example: -// Log(kLog, __FILE__, __LINE__, "error", bytes); GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +class SampleUserDataSupport { +public: + using CreateSampleUserDataCallback = void*(); + using CopySampleUserDataCallback = void*(void*); + using DestroySampleUserDataCallback = void(void*); + + class UserData { + public: + static UserData Make() { + return UserData{CreateSampleUserData()}; + } + + constexpr UserData() noexcept : ptr_(nullptr) {} + + UserData(const UserData& that) noexcept : ptr_(CopySampleUserData(that.ptr_)) {} + UserData& operator=(const UserData& that) noexcept { + DestroySampleUserData(ptr_); + ptr_ = CopySampleUserData(that.ptr_); + return *this; + } + + UserData(UserData&& that) noexcept : ptr_(that.ptr_) { + that.ptr_ = nullptr; + } + UserData& operator=(UserData&& that) noexcept { + if (this == &that) { + return *this; + } + DestroySampleUserData(ptr_); + ptr_ = that.ptr_; + that.ptr_ = nullptr; + return *this; + } + void Reset() { + DestroySampleUserData(ptr_); + ptr_ = nullptr; + } + + ~UserData() { + DestroySampleUserData(ptr_); + } + + void* Get() const { return ptr_; } + private: + UserData(void* ptr) noexcept : ptr_(ptr) {} + private: + void* ptr_; + }; + + static void Enable(CreateSampleUserDataCallback create, + CopySampleUserDataCallback copy, + DestroySampleUserDataCallback destroy) { + create_sample_user_data_callback_ = create; + copy_sample_user_data_callback_ = copy; + destroy_sample_user_data_callback_ = destroy; + } +private: + static void* CreateSampleUserData() { + if (create_sample_user_data_callback_ != nullptr) { + return create_sample_user_data_callback_(); + } + return nullptr; + } + + static void* CopySampleUserData(void* ptr) noexcept { + if (copy_sample_user_data_callback_ != nullptr) { + return copy_sample_user_data_callback_(ptr); + } + return nullptr; + } + + static void DestroySampleUserData(void* ptr) noexcept { + if (destroy_sample_user_data_callback_ != nullptr) { + destroy_sample_user_data_callback_(ptr); + } + } + ABSL_CONST_INIT static CreateSampleUserDataCallback* create_sample_user_data_callback_; + ABSL_CONST_INIT static CopySampleUserDataCallback* copy_sample_user_data_callback_; + ABSL_CONST_INIT static DestroySampleUserDataCallback* destroy_sample_user_data_callback_; +}; + static constexpr int kMaxStackDepth = 64; +// An opaque handle type used to identify allocations. +using AllocHandle = int64_t; + // size/depth are made the same size as a pointer so that some generic // code below can conveniently cast them back and forth to void*. struct StackTrace { + // An opaque handle used by allocator to uniquely identify the sampled + // memory block. + AllocHandle sampled_alloc_handle; // For small sampled objects, we allocate a full span to hold the // sampled object. However to avoid disturbing fragmentation @@ -59,113 +157,201 @@ struct StackTrace { uintptr_t requested_size; uintptr_t requested_alignment; uintptr_t allocated_size; // size after sizeclass/page rounding + bool requested_size_returning; - uintptr_t depth; // Number of PC values stored in array below - void* stack[kMaxStackDepth]; + uint8_t access_hint; + bool cold_allocated; // weight is the expected number of *bytes* that were requested // between the previous sample and this one size_t weight; - void* user_data; + SampleUserDataSupport::UserData user_data; - template - friend H AbslHashValue(H h, const StackTrace& t) { - // As we use StackTrace as a key-value node in StackTraceTable, we only - // produce a hasher for the fields used as keys. - return H::combine(H::combine_contiguous(std::move(h), t.stack, t.depth), - t.depth, t.requested_size, t.requested_alignment, - t.allocated_size - ); - } -}; + // Timestamp of allocation. + absl::Time allocation_time; -enum LogMode { - kLog, // Just print the message - kLogWithStack, // Print the message and a stack trace -}; + Profile::Sample::GuardedStatus guarded_status; -class Logger; + // How the memory was allocated (new/malloc/etc.) + Profile::Sample::AllocationType allocation_type; -// A LogItem holds any of the argument types that can be passed to Log() -class LogItem { - public: - LogItem() : tag_(kEnd) {} - LogItem(const char* v) : tag_(kStr) { u_.str = v; } - LogItem(int v) : tag_(kSigned) { u_.snum = v; } - LogItem(long v) : tag_(kSigned) { u_.snum = v; } - LogItem(long long v) : tag_(kSigned) { u_.snum = v; } - LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; } - LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; } - LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; } - LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; } + // If not nullptr, this is the start address of the span corresponding to this + // sampled allocation. This may be nullptr for cases where it is not useful + // for residency analysis such as for peakheapz. + void* span_start_address = nullptr; - private: - friend class Logger; - enum Tag { kStr, kSigned, kUnsigned, kPtr, kEnd }; - Tag tag_; - union { - const char* str; - const void* ptr; - int64_t snum; - uint64_t unum; - } u_; + uintptr_t depth; // Number of PC values stored in array below + // Place stack as last member because it might not all be accessed. + void* stack[kMaxStackDepth]; }; -extern void Log(LogMode mode, const char* filename, int line, LogItem a, - LogItem b = LogItem(), LogItem c = LogItem(), - LogItem d = LogItem()); +#define TC_LOG(msg, ...) \ + tcmalloc::tcmalloc_internal::LogImpl("%d %s:%d] " msg "\n", __FILE__, \ + __LINE__, ##__VA_ARGS__) + +void RecordCrash(absl::string_view detector, absl::string_view error); +ABSL_ATTRIBUTE_NORETURN void CrashWithOOM(size_t alloc_size); +ABSL_ATTRIBUTE_NORETURN void CheckFailed(const char* file, int line, + const char* msg, int msglen); + +template +ABSL_ATTRIBUTE_NORETURN ABSL_ATTRIBUTE_NOINLINE void CheckFailed( + const char* func, const char* file, int line, + const absl::FormatSpec& format, + const Args&... args) { + AllocationGuard no_allocations; + char buf[512]; + int n = + absl::SNPrintF(buf, sizeof(buf), format, absl::base_internal::GetTID(), + file, line, func, args...); + buf[sizeof(buf) - 1] = 0; + CheckFailed(file, line, buf, std::min(n, sizeof(buf) - 1)); +} + +void PrintStackTrace(void* const* stack_frames, size_t depth); +void PrintStackTraceFromSignalHandler(void* context); + +// Tests can override this function to collect logging messages. +extern void (*log_message_writer)(const char* msg, int length); -enum CrashMode { - kCrash, // Print the message and crash - kCrashWithStats // Print the message, some stats, and crash +template +ABSL_ATTRIBUTE_NOINLINE void LogImpl( + const absl::FormatSpec& format, const Args&... args) { + char buf[512]; + int n; + { + AllocationGuard no_allocations; + n = absl::SNPrintF(buf, sizeof(buf), format, absl::base_internal::GetTID(), + args...); + } + buf[sizeof(buf) - 1] = 0; + (*log_message_writer)(buf, std::min(n, sizeof(buf) - 1)); +} + +// TC_BUG unconditionally aborts the program with the message. +#define TC_BUG(msg, ...) \ + tcmalloc::tcmalloc_internal::CheckFailed(__FUNCTION__, __FILE__, __LINE__, \ + "%d %s:%d] CHECK in %s: " msg "\n", \ + ##__VA_ARGS__) + +// TC_CHECK* check the given condition in both debug and release builds, +// and abort the program if the condition is false. +// Macros accept an additional optional formatted message, for example: +// TC_CHECK_EQ(a, b); +// TC_CHECK_EQ(a, b, "ptr=%p flags=%d", ptr, flags); +#define TC_CHECK(a, ...) TCMALLOC_CHECK_IMPL(a, #a, "" __VA_ARGS__) +#define TC_CHECK_EQ(a, b, ...) \ + TCMALLOC_CHECK_OP((a), ==, (b), #a, #b, "" __VA_ARGS__) +#define TC_CHECK_NE(a, b, ...) \ + TCMALLOC_CHECK_OP((a), !=, (b), #a, #b, "" __VA_ARGS__) +#define TC_CHECK_LT(a, b, ...) \ + TCMALLOC_CHECK_OP((a), <, (b), #a, #b, "" __VA_ARGS__) +#define TC_CHECK_LE(a, b, ...) \ + TCMALLOC_CHECK_OP((a), <=, (b), #a, #b, "" __VA_ARGS__) +#define TC_CHECK_GT(a, b, ...) \ + TCMALLOC_CHECK_OP((a), >, (b), #a, #b, "" __VA_ARGS__) +#define TC_CHECK_GE(a, b, ...) \ + TCMALLOC_CHECK_OP((a), >=, (b), #a, #b, "" __VA_ARGS__) + +// TC_ASSERT* are debug-only versions of TC_CHECK*. +#ifndef NDEBUG +#define TC_ASSERT TC_CHECK +#define TC_ASSERT_EQ TC_CHECK_EQ +#define TC_ASSERT_NE TC_CHECK_NE +#define TC_ASSERT_LT TC_CHECK_LT +#define TC_ASSERT_LE TC_CHECK_LE +#define TC_ASSERT_GT TC_CHECK_GT +#define TC_ASSERT_GE TC_CHECK_GE +#else // #ifndef NDEBUG +#define TC_ASSERT(a, ...) TC_CHECK(true || (a), ##__VA_ARGS__) +#define TC_ASSERT_EQ(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#define TC_ASSERT_NE(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#define TC_ASSERT_LT(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#define TC_ASSERT_LE(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#define TC_ASSERT_GT(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#define TC_ASSERT_GE(a, b, ...) TC_ASSERT((a) == (b), ##__VA_ARGS__) +#endif // #ifndef NDEBUG + +#define TCMALLOC_CHECK_IMPL(condition, str, msg, ...) \ + ({ \ + ABSL_PREDICT_TRUE((condition)) \ + ? (void)0 : TC_BUG("%s (false) " msg, str, ##__VA_ARGS__); \ + }) + +#define TCMALLOC_CHECK_OP(c1, op, c2, cs1, cs2, msg, ...) \ + ({ \ + const auto& cc1 = (c1); \ + const auto& cc2 = (c2); \ + if (ABSL_PREDICT_FALSE(!(cc1 op cc2))) { \ + TC_BUG("%s " #op " %s (%v " #op " %v) " msg, cs1, cs2, \ + tcmalloc::tcmalloc_internal::FormatConvert(cc1), \ + tcmalloc::tcmalloc_internal::FormatConvert(cc2), ##__VA_ARGS__); \ + } \ + (void)0; \ + }) + +// absl::SNPrintF rejects to print pointers with %v, +// so we need this little dance to convenience it. +struct PtrFormatter { + const volatile void* val; + template + friend void AbslStringify(Sink& sink, const PtrFormatter& p) { + absl::Format(&sink, "%p", p.val); + } }; -ABSL_ATTRIBUTE_NORETURN -void Crash(CrashMode mode, const char* filename, int line, LogItem a, - LogItem b = LogItem(), LogItem c = LogItem(), LogItem d = LogItem()); +template +PtrFormatter FormatConvert(T* v) { + return PtrFormatter{v}; +} -// Tests can override this function to collect logging messages. -extern void (*log_message_writer)(const char* msg, int length); +inline PtrFormatter FormatConvert(std::nullptr_t v) { return PtrFormatter{v}; } + +template +struct OptionalFormatter { + const T* val; + template + friend void AbslStringify(Sink& sink, const OptionalFormatter& p) { + if (p.val != nullptr) { + absl::Format(&sink, "%v", *p.val); + } else { + absl::Format(&sink, "???"); + } + } +}; -// Like assert(), but executed even in NDEBUG mode -#undef CHECK_CONDITION -#define CHECK_CONDITION(cond) \ - (ABSL_PREDICT_TRUE(cond) ? (void)0 \ - : (::tcmalloc::tcmalloc_internal::Crash( \ - ::tcmalloc::tcmalloc_internal::kCrash, \ - __FILE__, __LINE__, #cond))) +template +OptionalFormatter FormatConvert(const std::optional& v) { + return {v.has_value() ? &*v : nullptr}; +} -// Our own version of assert() so we can avoid hanging by trying to do -// all kinds of goofy printing while holding the malloc lock. -#ifndef NDEBUG -#define ASSERT(cond) CHECK_CONDITION(cond) -#else -#define ASSERT(cond) ((void)0) -#endif +template +const T& FormatConvert(const T& v) { + return v; +} // Print into buffer class Printer { private: - char* buf_; // Where should we write next - int left_; // Space left in buffer (including space for \0) - int required_; // Space we needed to complete all printf calls up to this - // point + char* buf_; // Where should we write next + size_t left_; // Space left in buffer (including space for \0) + size_t required_; // Space we needed to complete all printf calls up to this + // point public: // REQUIRES: "length > 0" - Printer(char* buf, int length) : buf_(buf), left_(length), required_(0) { - ASSERT(length > 0); + Printer(char* buf, size_t length) : buf_(buf), left_(length), required_(0) { + TC_ASSERT_GT(length, 0); buf[0] = '\0'; } + Printer(const Printer&) = delete; + Printer(Printer&&) = default; + template void printf(const absl::FormatSpec& format, const Args&... args) { - ASSERT(left_ >= 0); - if (left_ <= 0) { - return; - } - + AllocationGuard enforce_no_alloc; const int r = absl::SNPrintF(buf_, left_, format, args...); if (r < 0) { left_ = 0; @@ -181,7 +367,37 @@ class Printer { } } - int SpaceRequired() const { return required_; } + template + void Append(const Args&... args) { + AllocationGuard enforce_no_alloc; + AppendPieces({static_cast(args).Piece()...}); + } + + size_t SpaceRequired() const { return required_; } + + private: + void AppendPieces(std::initializer_list pieces) { + size_t total_size = 0; + for (const absl::string_view piece : pieces) total_size += piece.size(); + + required_ += total_size; + if (left_ < total_size) { + left_ = 0; + return; + } + + for (const absl::string_view& piece : pieces) { + const size_t this_size = piece.size(); + if (this_size == 0) { + continue; + } + + memcpy(buf_, piece.data(), this_size); + buf_ += this_size; + } + + left_ -= total_size; + } }; enum PbtxtRegionType { kTop, kNested }; @@ -191,7 +407,7 @@ enum PbtxtRegionType { kTop, kNested }; // brackets). class PbtxtRegion { public: - PbtxtRegion(Printer* out, PbtxtRegionType type, int indent); + PbtxtRegion(Printer& out ABSL_ATTRIBUTE_LIFETIME_BOUND, PbtxtRegionType type); ~PbtxtRegion(); PbtxtRegion(const PbtxtRegion&) = delete; @@ -205,14 +421,16 @@ class PbtxtRegion { void PrintRaw(absl::string_view key, absl::string_view value); // Prints 'key subregion'. Return the created subregion. - PbtxtRegion CreateSubRegion(absl::string_view key); + PbtxtRegion CreateSubRegion(absl::string_view key) + ABSL_ATTRIBUTE_LIFETIME_BOUND; - private: - void NewLineAndIndent(); +#ifndef NDEBUG + static void InjectValues(int64_t* i64, double* d, bool* b); +#endif + private: Printer* out_; PbtxtRegionType type_; - int indent_; }; } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc index c7b58de40fc8..0928d856e251 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc @@ -14,28 +14,26 @@ #include "tcmalloc/internal/logging.h" +#include #include +#include +#include +#include +#include #include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/flags/flag.h" +#include "absl/base/attributes.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { -static std::string* log_buffer; - -static void RecordLogMessage(const char* msg, int length) { - // Make tests less brittle by trimming trailing whitespace - while (length > 0 && (msg[length - 1] == ' ' || msg[length - 1] == '\n')) { - length--; - } - log_buffer->assign(msg, length); -} - TEST(InternalLogging, MessageFormatting) { std::string long_string; for (int i = 0; i < 100; i++) { @@ -43,75 +41,218 @@ TEST(InternalLogging, MessageFormatting) { } // Arrange to intercept Log() output - log_buffer = new std::string(); - void (*old_writer)(const char*, int) = log_message_writer; - log_message_writer = RecordLogMessage; + auto old_writer = log_message_writer; + static std::string* log_buffer = new std::string(); + log_message_writer = [](const char* msg, int length) { + log_buffer->assign(msg, length); + }; - Log(kLog, "foo.cc", 100, "Hello"); - EXPECT_EQ("foo.cc:100] Hello", *log_buffer); + TC_LOG("Hello int=%d str=%s", 42, "bar"); + EXPECT_THAT(*log_buffer, + testing::MatchesRegex( + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] " + "Hello int=42 str=bar\\n")); - Log(kLog, "foo.cc", 100, 123u, -456, 0); - EXPECT_EQ("foo.cc:100] 123 -456 0", *log_buffer); + TC_LOG("Long string: %s", long_string.c_str()); + EXPECT_THAT(*log_buffer, + testing::MatchesRegex( + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] " + "Long string: the quick brown fox jumped over the lazy " + "dogthe quick brown fox jumped over the lazy dog.*")); - Log(kLog, "foo.cc", 100, 123u, std::numeric_limits::min()); - EXPECT_EQ("foo.cc:100] 123 -9223372036854775808", *log_buffer); + log_message_writer = old_writer; +} - Log(kLog, "foo.cc", 2, - reinterpret_cast(static_cast(1025))); - EXPECT_EQ("foo.cc:2] 0x401", *log_buffer); +TEST(Printer, RequiredSpace) { + constexpr absl::string_view kChunk = "0123456789"; + std::string expected; - Log(kLog, "foo.cc", 10, "hello", long_string.c_str()); - EXPECT_THAT(*log_buffer, - testing::StartsWith( - "foo.cc:10] hello the quick brown fox jumped over the lazy " - "dogthe quick brown fox jumped over the lazy dog")); + for (int i = 0; i < 10; i++) { + int length = kChunk.size() * i + 1; + std::unique_ptr buf(new char[length]); + Printer printer(buf.get(), length); - Log(kLogWithStack, "foo.cc", 10, "stk"); - EXPECT_TRUE(strstr(log_buffer->c_str(), "stk @ 0x") != nullptr) - << *log_buffer; + auto get_buf = [&]() { + return absl::string_view( + buf.get(), std::min(length - 1, printer.SpaceRequired())); + }; - log_message_writer = old_writer; - delete log_buffer; -} + for (int j = 0; j < i; j++) { + printer.printf("%s", kChunk); + } + EXPECT_EQ(get_buf(), expected); + EXPECT_EQ(length - 1, printer.SpaceRequired()); -TEST(InternalLogging, Assert) { - CHECK_CONDITION((2 + 2) == 4); + // Go past the end of the buffer. This should not overrun or affect the + // existing contents of buf, but we should see SpaceRequired tick up. + printer.printf("%s", kChunk); + EXPECT_EQ(get_buf(), expected); + EXPECT_EQ(length - 1 + kChunk.size(), printer.SpaceRequired()); - if (false) - CHECK_CONDITION(false); - else - CHECK_CONDITION(true); + printer.printf("%s", kChunk); + EXPECT_EQ(get_buf(), expected); + EXPECT_EQ(length - 1 + 2 * kChunk.size(), printer.SpaceRequired()); - ASSERT_DEATH(CHECK_CONDITION((2 + 2) == 5), - ".*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] " - "\\(2 \\+ 2\\) == 5 @( 0x[0-9a-f]+)+"); + expected.append(kChunk); + } } -TEST(Printer, RequiredSpace) { - const char kChunk[] = "0123456789"; +TEST(Printer, RequiredSpacePieces) { + constexpr absl::string_view kChunk = "0123456789"; std::string expected; for (int i = 0; i < 10; i++) { - int length = strlen(kChunk) * i + 1; + int length = 2 * kChunk.size() * i + 1; std::unique_ptr buf(new char[length]); Printer printer(buf.get(), length); + auto get_buf = [&]() { + return absl::string_view( + buf.get(), std::min(length - 1, printer.SpaceRequired())); + }; + for (int j = 0; j < i; j++) { - printer.printf("%s", kChunk); + printer.Append(kChunk, kChunk); } - EXPECT_EQ(buf.get(), expected); + EXPECT_EQ(get_buf(), expected); EXPECT_EQ(length - 1, printer.SpaceRequired()); // Go past the end of the buffer. This should not overrun or affect the // existing contents of buf, but we should see SpaceRequired tick up. - printer.printf("%s", kChunk); - EXPECT_EQ(buf.get(), expected); - EXPECT_EQ(length - 1 + strlen(kChunk), printer.SpaceRequired()); + printer.Append(kChunk, kChunk); + EXPECT_EQ(get_buf(), expected); + EXPECT_EQ(length - 1 + 2 * kChunk.size(), printer.SpaceRequired()); + printer.Append(kChunk, kChunk); + EXPECT_EQ(get_buf(), expected); + EXPECT_EQ(length - 1 + 4 * kChunk.size(), printer.SpaceRequired()); + + expected.append(kChunk); expected.append(kChunk); } } +TEST(Check, OK) { + TC_CHECK(true); + TC_CHECK_EQ(1, 1); + TC_CHECK_NE(1, 2); + TC_CHECK_GT(2, 1); + TC_CHECK_GE(2, 1); + TC_CHECK_GE(2, 2); + TC_CHECK_LT(1, 2); + TC_CHECK_LE(-1, 1); + TC_CHECK_LE(2, 2); + + void* ptr1 = &ptr1; + void** ptr2 = &ptr1; + TC_CHECK_EQ(ptr1, ptr2); + TC_CHECK_NE(ptr1, nullptr); + + TC_ASSERT(true); + TC_ASSERT_EQ(1, 1); + TC_ASSERT_NE(1, 2); + TC_ASSERT_GT(2, 1); + TC_ASSERT_GE(2, 1); + TC_ASSERT_GE(2, 2); + TC_ASSERT_LT(1, 2); + TC_ASSERT_LE(-1, 1); + TC_ASSERT_LE(2, 2); + + ABSL_ATTRIBUTE_UNUSED int unused[] = { + (TC_CHECK(true), 1), + (TC_CHECK_EQ(1, 1), 2), + (TC_ASSERT(true), 3), + (TC_ASSERT_EQ(1, 1), 4), + }; +} + +TEST(Check, UnusedVars) { + int a = 1, b = 1; + TC_ASSERT_EQ(a, b); +} + +TEST(Check, DebugCheck) { + int eval1 = 0, eval2 = 0; + TC_CHECK_EQ([&]() { return ++eval1; }(), [&]() { return ++eval2; }()); + ASSERT_EQ(eval1, 1); + ASSERT_EQ(eval2, 1); +} + +TEST(Check, DebugAssert) { + int eval1 = 0, eval2 = 0; + TC_ASSERT_EQ([&]() { return ++eval1; }(), [&]() { return ++eval2; }(), + "val=%d", 1); +#ifdef NDEBUG + ASSERT_EQ(eval1, 0); + ASSERT_EQ(eval2, 0); +#else + ASSERT_EQ(eval1, 1); + ASSERT_EQ(eval2, 1); +#endif +} + +TEST(Check, Message) { + bool my_false = false; + EXPECT_DEATH(TC_CHECK(my_false, "ptr=%p foo=%d str=%s", &my_false, 42, "bar"), + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] CHECK " + "in TestBody: my_false " + "\\(false\\) ptr=0x[0-9a-f]+ foo=42 str=bar"); + + int x = -1, y = 1; + EXPECT_DEATH(TC_CHECK_GE(x, y), + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] CHECK " + "in TestBody: x >= y \\(-1 >= 1\\)"); + + int64_t a = -1, b = 1; + EXPECT_DEATH(TC_CHECK_EQ(a, b, "ptr=%p foo=%d str=%s", &my_false, 42, "bar"), + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] CHECK " + "in TestBody: a == b \\(-1 " + "== 1\\) ptr=0x[0-9a-f]+ foo=42 str=bar"); + + enum class Something : unsigned { + kFoo = 1, + kBar = 2, + }; + auto bar = []() { return Something::kBar; }; + EXPECT_DEATH(TC_CHECK_EQ(bar(), Something::kFoo), + "bar\\(\\) == Something::kFoo \\(2 == 1\\)"); + + EXPECT_DEATH(TC_BUG("bad: foo=%d bar=%s", 42, "str"), + "[0-9]+ .*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] CHECK " + "in TestBody: bad: foo=42 bar=str"); + + int s = 1; + // Ensure %s in the expression won't confuse the macro. + // clang-format off + EXPECT_DEATH(TC_CHECK_EQ(0%s, 1), "0%s == 1 \\(0 == 1\\)"); + TC_ASSERT_NE(0%s, 1); + // clang-format on + +#ifndef NDEBUG + EXPECT_DEATH(TC_ASSERT(false, "foo=%d", 42), "false \\(false\\) foo=42"); +#endif +} + +TEST(Check, DoubleEvaluation) { + int eval1 = 0, eval2 = 0; + auto f1 = [&]() { return ++eval1; }; + auto f2 = [&]() { return ++eval2; }; + EXPECT_DEATH(TC_CHECK_NE(f1(), f2()), + "CHECK in TestBody: f1\\(\\) != f2\\(\\) \\(1 != 1\\)"); +} + +TEST(Check, Optional) { + std::optional opt1(1); + std::optional opt2(2); + std::optional noopt; + TC_CHECK_EQ(opt1, opt1); + TC_CHECK_NE(opt1, opt2); + TC_CHECK_NE(opt1, noopt); + TC_CHECK_NE(noopt, 1); + EXPECT_DEATH(TC_CHECK_EQ(opt1, opt2), "opt1 == opt2 \\(1 == 2\\)"); + EXPECT_DEATH(TC_CHECK_EQ(opt1, noopt), "opt1 == noopt \\(1 == \\?\\?\\?\\)"); +} + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc index 71591834d411..c3b38f369341 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc @@ -19,9 +19,14 @@ #include #include +#include +#include + #include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" #include "tcmalloc/internal/util.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -49,21 +54,21 @@ bool GetMemoryStats(MemoryStats* stats) { FDCloser fd; fd.fd = signal_safe_open("/proc/self/statm", O_RDONLY | O_CLOEXEC); - ASSERT(fd.fd >= 0); + TC_ASSERT_GE(fd.fd, 0); if (fd.fd < 0) { return false; } char buf[1024]; ssize_t rc = signal_safe_read(fd.fd, buf, sizeof(buf), nullptr); - ASSERT(rc >= 0); - ASSERT(rc < sizeof(buf)); - if (rc < 0 || rc >= sizeof(buf)) { + TC_ASSERT_GE(rc, 0); + TC_ASSERT_LT(rc, static_cast(sizeof(buf))); + if (rc < 0 || rc >= static_cast(sizeof(buf))) { return false; } buf[rc] = '\0'; - const size_t pagesize = getpagesize(); + const size_t pagesize = GetPageSize(); absl::string_view contents(buf, rc); absl::string_view::size_type start = 0; int index = 0; diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h index a65f5b03d36d..133f73e9f02a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc index 176c712734cb..51929879acd5 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc @@ -14,7 +14,8 @@ #include "tcmalloc/internal/memory_stats.h" -#include +#include +#include #include "gtest/gtest.h" diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.cc new file mode 100644 index 000000000000..30d873ba5db3 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.cc @@ -0,0 +1,44 @@ +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/memory_tag.h" + +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/optimization.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +absl::string_view MemoryTagToLabel(MemoryTag tag) { + switch (tag) { + case MemoryTag::kNormal: + return "NORMAL"; + case MemoryTag::kNormalP1: + return "NORMAL_P1"; + case MemoryTag::kSampled: + return "SAMPLED"; + case MemoryTag::kSelSan: + return "SELSAN"; + case MemoryTag::kCold: + return "COLD"; + case MemoryTag::kMetadata: + return "METADATA"; + } + + ASSUME(false); +} + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.h b/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.h new file mode 100644 index 000000000000..d8d04f200313 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_tag.h @@ -0,0 +1,90 @@ +#pragma clang system_header +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_MEMORY_TAG_H_ +#define TCMALLOC_INTERNAL_MEMORY_TAG_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal { + +enum class MemoryTag : uint8_t { + // Sampled, infrequently allocated + kSampled = 0x0, + // Normal memory, NUMA partition 0 + kNormalP0 = kSanitizerAddressSpace ? 0x1 : 0x4, + // Normal memory, NUMA partition 1 + kNormalP1 = kSanitizerAddressSpace ? 0xff : 0x6, + // Normal memory + kNormal = kNormalP0, + // Cold + kCold = 0x2, + // Metadata + kMetadata = 0x3, + // SelSan sampled spans, kept separately because we need to quickly + // distinguish them from the rest during delete and they also consume + // shadow memory. 0xfe is an arbitrary value that shouldn't be used. + kSelSan = kSelSanPresent ? 0x1 : 0xfe, +}; + +inline constexpr uintptr_t kTagShift = std::min(kAddressBits - 4, 42); +inline constexpr uintptr_t kTagMask = + uintptr_t{kSanitizerAddressSpace ? 0x3 : 0x7} << kTagShift; + +inline MemoryTag GetMemoryTag(const void* ptr) { + return static_cast((reinterpret_cast(ptr) & kTagMask) >> + kTagShift); +} + +inline bool IsNormalMemory(const void* ptr) { + // This is slightly faster than checking kNormalP0/P1 separetly. + static_assert((static_cast(MemoryTag::kNormalP0) & + (static_cast(MemoryTag::kSampled) | + static_cast(MemoryTag::kCold))) == 0); + bool res = (static_cast(GetMemoryTag(ptr)) & + static_cast(MemoryTag::kNormal)) != 0; + TC_ASSERT(res == (GetMemoryTag(ptr) == MemoryTag::kNormalP0 || + GetMemoryTag(ptr) == MemoryTag::kNormalP1), + "ptr=%p res=%d tag=%d", ptr, res, + static_cast(GetMemoryTag(ptr))); + return res; +} + +inline bool IsSelSanMemory(const void* ptr) { + // This is a faster way to check for SelSan memory provided we already know + // it's not a normal memory, and assuming it's not kMetadata (both assumptions + // are checked by the assert below). A straightforward comparison with kSelSan + // leads to extraction/check of 2 bits (these use 2 8-byte immediates); + // this check can be done with a single BT instruction. + // kSelSanPresent part allows to optimize away branches in non SelSan build. + bool res = + kSelSanPresent && (static_cast(GetMemoryTag(ptr)) & + static_cast(MemoryTag::kSelSan)) != 0; + TC_ASSERT_EQ(res, GetMemoryTag(ptr) == MemoryTag::kSelSan); + return res; +} + +absl::string_view MemoryTagToLabel(MemoryTag tag); + +} // namespace tcmalloc::tcmalloc_internal +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_MEMORY_TAG_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc index e4120bcf5ae5..b4443312d6e4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc @@ -18,9 +18,11 @@ #include #include +#include #include #include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/page_size.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -44,7 +46,7 @@ size_t MInCore::residence_impl(void* addr, size_t size, return 0; } unsigned char res[kArrayLength]; - const size_t kPageSize = getpagesize(); + const size_t kPageSize = GetPageSize(); uintptr_t uaddr = reinterpret_cast(addr); // Round address down to get the start of the page containing the data. @@ -61,7 +63,10 @@ size_t MInCore::residence_impl(void* addr, size_t size, // then handle the case where the object spans more than one page. if (remainingPages == kPageSize) { // Find out whether the first page is resident. - mincore->mincore(reinterpret_cast(basePage), remainingPages, res); + if (mincore->mincore(reinterpret_cast(basePage), remainingPages, + res) != 0) { + return 0; + } // Residence info is returned in LSB, other bits are undefined. if ((res[0] & 1) == 1) { return size; diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h index c353bdac8708..45a74fd8263f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc index 02c8ead48d0c..13b8c8029685 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc @@ -16,14 +16,14 @@ #include #include +#include #include #include -#include -#include "absl/memory/memory.h" #include "benchmark/benchmark.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/mincore.h" +#include "tcmalloc/internal/page_size.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -38,10 +38,10 @@ void BM_mincore(benchmark::State& state) { // If we want to place the array on the stack then the maximum frame size is // 16KiB. So there is no point in benchmarking sizes larger than this. const int kMaxArraySize = 16 * 1024; - CHECK_CONDITION(size <= kMaxArraySize); - auto resident = absl::make_unique(kMaxArraySize); + TC_CHECK_LE(size, kMaxArraySize); + auto resident = std::make_unique(kMaxArraySize); - const size_t kPageSize = getpagesize(); + const size_t kPageSize = tcmalloc_internal::GetPageSize(); // We want to scan the same amount of memory in all cases const size_t regionSize = 1 * 1024 * 1024 * 1024; for (auto s : state) { diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc index daa1178b2563..1f1f9b769e34 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc @@ -14,18 +14,18 @@ #include "tcmalloc/internal/mincore.h" +#include +#include +#include #include -#include -#include #include -#include #include +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "benchmark/benchmark.h" -#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" namespace tcmalloc { namespace tcmalloc_internal { @@ -41,7 +41,7 @@ class MInCoreMock : public MInCoreInterface { // Implementation of minCore that reports presence based on provided array. int mincore(void* addr, size_t length, unsigned char* result) override { - const size_t kPageSize = getpagesize(); + const size_t kPageSize = GetPageSize(); uintptr_t uAddress = reinterpret_cast(addr); // Check that we only pass page aligned addresses into mincore(). EXPECT_THAT(uAddress & (kPageSize - 1), Eq(0)); @@ -87,9 +87,9 @@ namespace { using ::testing::Eq; -TEST(StaticVarsTest, TestResidence) { +TEST(MInCoreTest, TestResidence) { MInCoreTest mct; - const size_t kPageSize = getpagesize(); + const size_t kPageSize = GetPageSize(); // Set up a pattern with a few resident pages. // page 0 not mapped @@ -141,10 +141,10 @@ TEST(StaticVarsTest, TestResidence) { } // Test whether we are correctly handling multiple calls to mincore. -TEST(StaticVarsTest, TestLargeResidence) { +TEST(MInCoreTest, TestLargeResidence) { MInCoreTest mct; uintptr_t uAddress = 0; - const size_t kPageSize = getpagesize(); + const size_t kPageSize = GetPageSize(); // Set up a pattern covering 6 * page size * MInCore::kArrayLength to // allow us to test for situations where the region we're checking // requires multiple calls to mincore(). @@ -164,8 +164,8 @@ TEST(StaticVarsTest, TestLargeResidence) { } } -TEST(StaticVarsTest, UnmappedMemory) { - const size_t kPageSize = getpagesize(); +TEST(MInCoreTest, UnmappedMemory) { + const size_t kPageSize = GetPageSize(); const int kNumPages = 16; // Overallocate kNumPages of memory, so we can munmap the page before and @@ -181,10 +181,18 @@ TEST(StaticVarsTest, UnmappedMemory) { memset(q, 0, kNumPages * kPageSize); ::benchmark::DoNotOptimize(q); + EXPECT_EQ(0, MInCore::residence(nullptr, kPageSize)); + EXPECT_EQ(0, MInCore::residence(p, kPageSize)); for (int i = 0; i <= kNumPages; i++) { EXPECT_EQ(i * kPageSize, MInCore::residence(q, i * kPageSize)); } + // Note we can only query regions that are entirely mapped, but we should also + // test the edge case of incomplete pages. + EXPECT_EQ((kNumPages - 1) * kPageSize, + MInCore::residence(reinterpret_cast(q) + 7, + (kNumPages - 1) * kPageSize)); + ASSERT_EQ(munmap(q, kNumPages * kPageSize), 0); } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mismatched_delete_state.h b/contrib/libs/tcmalloc/tcmalloc/internal/mismatched_delete_state.h new file mode 100644 index 000000000000..3b7403e39b46 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mismatched_delete_state.h @@ -0,0 +1,122 @@ +#pragma clang system_header +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_MISMATCHED_DELETE_STATE_H_ +#define TCMALLOC_INTERNAL_MISMATCHED_DELETE_STATE_H_ + +#include +#include +#include +#include + +#include "absl/types/span.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +class MismatchedDeleteState { + public: + constexpr MismatchedDeleteState() = default; + + bool triggered() const { return triggered_; } + + std::optional> AllocationStack() const { + TC_ASSERT(triggered_); + + if (!allocation_stack_depth_.has_value()) { + return std::nullopt; + } + + return absl::MakeSpan(allocation_stack_, *allocation_stack_depth_); + } + + std::optional> DeallocationStack() const { + TC_ASSERT(triggered_); + if (!deallocation_stack_depth_.has_value()) { + return std::nullopt; + } + + return absl::MakeSpan(deallocation_stack_, *deallocation_stack_depth_); + } + + size_t provided_min() const { + TC_ASSERT(triggered_); + return provided_min_; + } + size_t provided_max() const { + TC_ASSERT(triggered_); + return provided_max_; + } + + size_t minimum_size() const { + TC_ASSERT(triggered_); + return minimum_; + } + + size_t maximum_size() const { + TC_ASSERT(triggered_); + return maximum_; + } + + void Record(size_t provided_min, size_t provided_max, size_t minimum, + size_t maximum, + std::optional> allocation_stack, + std::optional> deallocation_stack) { + triggered_ = true; + + provided_min_ = provided_min; + provided_max_ = provided_max; + minimum_ = minimum; + maximum_ = maximum; + + if (allocation_stack.has_value()) { + size_t allocation_stack_depth = + std::min(kMaxStackDepth, allocation_stack->size()); + memcpy(allocation_stack_, allocation_stack->data(), + sizeof(void*) * allocation_stack_depth); + allocation_stack_depth_ = allocation_stack_depth; + } else { + allocation_stack_depth_ = std::nullopt; + } + + if (deallocation_stack.has_value()) { + size_t deallocation_stack_depth = + std::min(kMaxStackDepth, deallocation_stack->size()); + memcpy(deallocation_stack_, deallocation_stack->data(), + sizeof(void*) * deallocation_stack_depth); + deallocation_stack_depth_ = deallocation_stack_depth; + } else { + deallocation_stack_depth_ = std::nullopt; + } + } + + private: + bool triggered_ = false; + size_t provided_min_ = 0, provided_max_ = 0, minimum_ = 0, maximum_ = 0; + + void* allocation_stack_[kMaxStackDepth] = {}; + std::optional allocation_stack_depth_ = std::nullopt; + void* deallocation_stack_[kMaxStackDepth] = {}; + std::optional deallocation_stack_depth_ = std::nullopt; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_MISMATCHED_DELETE_STATE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h index 10922c48bdba..a43ecef55175 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc index 1639bd1b6d89..6e65e125155d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc @@ -16,24 +16,23 @@ #include #include -#include #include #include #include #include #include -#include #include +#include #include "absl/base/attributes.h" -#include "absl/base/internal/sysinfo.h" #include "absl/functional/function_ref.h" -#include "absl/strings/numbers.h" -#include "absl/strings/string_view.h" #include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" #include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/sysinfo.h" #include "tcmalloc/internal/util.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -52,62 +51,9 @@ int OpenSysfsCpulist(size_t node) { return signal_safe_open(path, O_RDONLY | O_CLOEXEC); } -cpu_set_t ParseCpulist(absl::FunctionRef read) { - cpu_set_t set; - CPU_ZERO(&set); - - std::array buf; - size_t carry_over = 0; - int cpu_from = -1; - - while (true) { - const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over); - CHECK_CONDITION(rc >= 0); - - const absl::string_view current(buf.data(), carry_over + rc); - - // If we have no more data to parse & couldn't read any then we've reached - // the end of the input & are done. - if (current.empty() && rc == 0) { - break; - } - - size_t consumed; - const size_t dash = current.find('-'); - const size_t comma = current.find(','); - if (dash != absl::string_view::npos && dash < comma) { - CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, dash), &cpu_from)); - consumed = dash + 1; - } else if (comma != absl::string_view::npos || rc == 0) { - int cpu; - CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, comma), &cpu)); - if (comma == absl::string_view::npos) { - consumed = current.size(); - } else { - consumed = comma + 1; - } - if (cpu_from != -1) { - for (int c = cpu_from; c <= cpu; c++) { - CPU_SET(c, &set); - } - cpu_from = -1; - } else { - CPU_SET(cpu, &set); - } - } else { - consumed = 0; - } - - carry_over = current.size() - consumed; - memmove(buf.data(), buf.data() + consumed, carry_over); - } - - return set; -} - -bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], - uint64_t *const partition_to_nodes, - NumaBindMode *const bind_mode, +bool InitNumaTopology(size_t cpu_to_scaled_partition[kMaxCpus], + uint64_t* const partition_to_nodes, + NumaBindMode* const bind_mode, const size_t num_partitions, const size_t scale_by, absl::FunctionRef open_node_cpulist) { // Node 0 will always map to partition 0; record it here in case the system @@ -115,10 +61,6 @@ bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], // either case we'll record nothing in the loop below. partition_to_nodes[NodeToPartition(0, num_partitions)] |= 1 << 0; - // If we only compiled in support for one partition then we're trivially - // done; NUMA awareness is unavailable. - if (num_partitions == 1) return false; - // We rely on rseq to quickly obtain a CPU ID & lookup the appropriate // partition in NumaTopology::GetCurrentPartition(). If rseq is unavailable, // disable NUMA awareness. @@ -133,11 +75,14 @@ bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], // cpu_to_scaled_partition & partition_to_nodes arrays are zero initialized // we're trivially done - CPUs all map to partition 0, which contains only // CPU 0 added above. - const char *e = + const char* e = tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_NUMA_AWARE"); + bool enabled = true; if (e == nullptr) { // Enable NUMA awareness iff default_want_numa_aware(). - if (!default_want_numa_aware()) return false; + if (!default_want_numa_aware()) { + enabled = false; + } } else if (!strcmp(e, "no-binding")) { // Enable NUMA awareness with no memory binding behavior. *bind_mode = NumaBindMode::kNone; @@ -149,18 +94,18 @@ bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], *bind_mode = NumaBindMode::kStrict; } else if (!strcmp(e, "0")) { // Disable NUMA awareness. - return false; + enabled = false; } else { - Crash(kCrash, __FILE__, __LINE__, "bad TCMALLOC_NUMA_AWARE env var", e); + TC_BUG("bad TCMALLOC_NUMA_AWARE env var '%s'", e); } // The cpu_to_scaled_partition array has a fixed size so that we can // statically allocate it & avoid the need to check whether it has been - // allocated prior to lookups. It has CPU_SETSIZE entries which ought to be + // allocated prior to lookups. It has kMaxCpus entries which ought to be // sufficient, but sanity check that indexing it by CPU number shouldn't // exceed its bounds. - int num_cpus = absl::base_internal::NumCPUs(); - CHECK_CONDITION(num_cpus <= CPU_SETSIZE); + int num_cpus = NumCPUs(); + TC_CHECK_LE(num_cpus, kMaxCpus); // We could just always report that we're NUMA aware, but if a NUMA-aware // binary runs on a system that doesn't include multiple NUMA nodes then our @@ -175,7 +120,7 @@ bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], if (fd == -1) { // We expect to encounter ENOENT once node surpasses the actual number of // nodes present in the system. Any other error is a problem. - CHECK_CONDITION(errno == ENOENT); + TC_CHECK_EQ(errno, ENOENT); break; } @@ -191,28 +136,32 @@ bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], } // Parse the cpulist file to determine which CPUs are local to this node. - const cpu_set_t node_cpus = - ParseCpulist([&](char *const buf, const size_t count) { + const std::optional node_cpus = + ParseCpulist([&](char* const buf, const size_t count) { return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr); }); + // We are on the same side of an airtight hatchway as the kernel, but we + // want to know if we can no longer parse the values the kernel is + // providing. + TC_CHECK(node_cpus.has_value()); // Assign local CPUs to the appropriate partition. - for (size_t cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (CPU_ISSET(cpu, &node_cpus)) { + for (size_t cpu = 0; cpu < kMaxCpus; cpu++) { + if (node_cpus->IsSet(cpu)) { cpu_to_scaled_partition[cpu + kNumaCpuFudge] = partition * scale_by; } } // If we observed any CPUs for this node then we've now got CPUs assigned // to a non-zero partition; report that we're NUMA aware. - if (CPU_COUNT(&node_cpus) != 0) { + if (node_cpus->Count() != 0) { numa_aware = true; } signal_safe_close(fd); } - return numa_aware; + return enabled && numa_aware; } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h index bf04c65c21b7..3a4151624982 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2021 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,8 +20,13 @@ #include #include +#include +#include +#include + #include "absl/functional/function_ref.h" #include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" #include "tcmalloc/internal/percpu.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -45,8 +51,8 @@ enum class NumaBindMode { kStrict, }; -// We use the result of RseqCpuId() in GetCurrentPartition() to avoid branching -// in the fast path, but this means that the CPU number we look up in +// We use the result of GetRealCpuUnsafe() in GetCurrentPartition() to avoid +// branching in the fast path, but this means that the CPU number we look up in // cpu_to_scaled_partition_ might equal kCpuIdUninitialized or // kCpuIdUnsupported. We add this fudge factor to the value to compensate, // ensuring that our accesses to the cpu_to_scaled_partition_ array are always @@ -69,7 +75,14 @@ static constexpr size_t kNumaCpuFudge = -subtle::percpu::kCpuIdUnsupported; // may incur a performance hit, but allows us to at least run on any system. template class NumaTopology { + // To give ourselves non-trivial data even when NUMA support is compiled out + // of the allocation path, we enable >1 partition. + static constexpr size_t kNumInternalPartitions = + std::max(2, NumPartitions); + public: + static constexpr size_t kNumPartitions = NumPartitions; + // Trivially zero initialize data members. constexpr NumaTopology() = default; @@ -94,7 +107,9 @@ class NumaTopology { // partitions that other parts of TCMalloc need to concern themselves with. // Checking this rather than using kNumaPartitions allows users to avoid work // on non-zero partitions when NUMA awareness is disabled. - size_t active_partitions() const { return numa_aware() ? NumPartitions : 1; } + size_t active_partitions() const { + return numa_aware() ? kNumInternalPartitions : 1; + } // Return a value indicating how we should behave with regards to binding // memory regions to NUMA nodes. @@ -110,7 +125,9 @@ class NumaTopology { // ScaleBy. size_t GetCurrentScaledPartition() const; - // Return the NUMA partition number to which `cpu` belongs. + // Return the NUMA partition number to which `cpu` belongs. This partition + // number may exceed NumPartitions as part of providing an unconditional NUMA + // partition. // // It is valid for cpu to equal subtle::percpu::kCpuIdUninitialized or // subtle::percpu::kCpuIdUnsupported. In either case partition 0 will be @@ -126,32 +143,39 @@ class NumaTopology { uint64_t GetPartitionNodes(int partition) const; private: - // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition. - size_t cpu_to_scaled_partition_[CPU_SETSIZE + kNumaCpuFudge] = {0}; // Maps from NUMA partition to a bitmap of NUMA nodes within the partition. - uint64_t partition_to_nodes_[NumPartitions] = {0}; + uint64_t partition_to_nodes_[kNumInternalPartitions] = {0}; // Indicates whether NUMA awareness is available & enabled. bool numa_aware_ = false; // Desired memory binding behavior. NumaBindMode bind_mode_ = NumaBindMode::kAdvisory; + + // We maintain two sets of CPU-to-partition information. One is + // unconditionally available in cpu_to_scaled_partition_. + // + // The other is used by GetCurrent...Partition methods, which are used on the + // allocation fastpath. + // * If NUMA support is not compiled in, these methods short-circuit and + // return '0'. + // * If NUMA support is not enabled at runtime, gated_cpu_to_scaled_partition_ + // is left zero initialized. + + static constexpr size_t kCpuMapSize = kMaxCpus + kNumaCpuFudge; + std::array cpu_to_scaled_partition_ = {}; + // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition. + // If NUMA awareness is not enabled, allocate array of 0 size to not waste + // space, we shouldn't access it. Place it as the last member, so that ASan + // warns about any unintentional accesses. This is checked by the + // static_assert in Init. + static constexpr size_t kGatedCpuMapSize = + NumPartitions > 1 ? kMaxCpus + kNumaCpuFudge : 0; + std::array gated_cpu_to_scaled_partition_ = {}; }; // Opens a /sys/devices/system/node/nodeX/cpulist file for read only access & // returns the file descriptor. int OpenSysfsCpulist(size_t node); -// Parse a CPU list in the format used by -// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU -// numbers or ranges in the format - inclusive all joined by comma -// characters. -// -// The read function is expected to operate much like the read syscall. It -// should read up to `count` bytes into `buf` and return the number of bytes -// actually read. If an error occurs during reading it should return -1 with -// errno set to an appropriate error code. -cpu_set_t ParseCpulist( - absl::FunctionRef read); - // Initialize the data members of a NumaTopology<> instance. // // This function must only be called once per NumaTopology<> instance, and @@ -162,8 +186,8 @@ cpu_set_t ParseCpulist( // // Returns true if we're actually NUMA aware; i.e. if we have CPUs mapped to // multiple partitions. -bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], - uint64_t *partition_to_nodes, NumaBindMode *bind_mode, +bool InitNumaTopology(size_t cpu_to_scaled_partition[kMaxCpus], + uint64_t* partition_to_nodes, NumaBindMode* bind_mode, size_t num_partitions, size_t scale_by, absl::FunctionRef open_node_cpulist); @@ -174,31 +198,48 @@ inline size_t NodeToPartition(const size_t node, const size_t num_partitions) { template inline void NumaTopology::Init() { - numa_aware_ = - InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, - &bind_mode_, NumPartitions, ScaleBy, OpenSysfsCpulist); + static_assert(offsetof(NumaTopology, gated_cpu_to_scaled_partition_) + + sizeof(gated_cpu_to_scaled_partition_) + + sizeof(*gated_cpu_to_scaled_partition_.data()) >= + sizeof(NumaTopology), + "cpu_to_scaled_partition_ is not the last field"); + numa_aware_ = InitNumaTopology( + cpu_to_scaled_partition_.data(), partition_to_nodes_, &bind_mode_, + kNumInternalPartitions, ScaleBy, OpenSysfsCpulist); + if constexpr (NumPartitions > 1) { + if (numa_aware_) { + gated_cpu_to_scaled_partition_ = cpu_to_scaled_partition_; + } + } } template inline void NumaTopology::InitForTest( absl::FunctionRef open_node_cpulist) { - numa_aware_ = - InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, - &bind_mode_, NumPartitions, ScaleBy, open_node_cpulist); + numa_aware_ = InitNumaTopology( + cpu_to_scaled_partition_.data(), partition_to_nodes_, &bind_mode_, + kNumInternalPartitions, ScaleBy, open_node_cpulist); + if constexpr (NumPartitions > 1) { + if (numa_aware_) { + gated_cpu_to_scaled_partition_ = cpu_to_scaled_partition_; + } + } } template inline size_t NumaTopology::GetCurrentPartition() const { if constexpr (NumPartitions == 1) return 0; - return GetCpuPartition(subtle::percpu::RseqCpuId()); + const int cpu = subtle::percpu::GetRealCpuUnsafe(); + return gated_cpu_to_scaled_partition_[cpu + kNumaCpuFudge] / ScaleBy; } template inline size_t NumaTopology::GetCurrentScaledPartition() const { if constexpr (NumPartitions == 1) return 0; - return GetCpuScaledPartition(subtle::percpu::RseqCpuId()); + const int cpu = subtle::percpu::GetRealCpuUnsafe(); + return gated_cpu_to_scaled_partition_[cpu + kNumaCpuFudge]; } template @@ -210,7 +251,6 @@ inline size_t NumaTopology::GetCpuPartition( template inline size_t NumaTopology::GetCpuScaledPartition( const int cpu) const { - if constexpr (NumPartitions == 1) return 0; return cpu_to_scaled_partition_[cpu + kNumaCpuFudge]; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc index bbd86a3f7dee..5a0ee339211e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc @@ -29,19 +29,23 @@ #include #include +#include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "absl/types/span.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/sysinfo.h" namespace tcmalloc { namespace tcmalloc_internal { namespace { -int memfd_create(const char *name, unsigned int flags) { +int memfd_create(const char* name, unsigned int flags) { #ifdef __NR_memfd_create return syscall(__NR_memfd_create, name, flags); #else @@ -55,25 +59,24 @@ class SyntheticCpuList { public: explicit SyntheticCpuList(const absl::string_view content) { fd_ = memfd_create("cpulist", MFD_CLOEXEC); - CHECK_CONDITION(fd_ != -1); + TC_CHECK_NE(fd_, -1); - CHECK_CONDITION(write(fd_, content.data(), content.size()) == - content.size()); - CHECK_CONDITION(write(fd_, "\n", 1) == 1); - CHECK_CONDITION(lseek(fd_, 0, SEEK_SET) == 0); + TC_CHECK_EQ(write(fd_, content.data(), content.size()), content.size()); + TC_CHECK_EQ(write(fd_, "\n", 1), 1); + TC_CHECK_EQ(lseek(fd_, 0, SEEK_SET), 0); } ~SyntheticCpuList() { close(fd_); } // Disallow copies, which would make require reference counting to know when // we should close fd_. - SyntheticCpuList(const SyntheticCpuList &) = delete; - SyntheticCpuList &operator=(const SyntheticCpuList &) = delete; + SyntheticCpuList(const SyntheticCpuList&) = delete; + SyntheticCpuList& operator=(const SyntheticCpuList&) = delete; // Moves are fine - only one instance at a time holds the fd. - SyntheticCpuList(SyntheticCpuList &&other) + SyntheticCpuList(SyntheticCpuList&& other) : fd_(std::exchange(other.fd_, -1)) {} - SyntheticCpuList &operator=(SyntheticCpuList &&other) { + SyntheticCpuList& operator=(SyntheticCpuList&& other) { new (this) SyntheticCpuList(std::move(other)); return *this; } @@ -104,10 +107,10 @@ class NumaTopologyTest : public ::testing::Test { } }; -template -NumaTopology CreateNumaTopology( +template +NumaTopology CreateNumaTopology( const absl::Span cpu_lists) { - NumaTopology nt; + NumaTopology nt; nt.InitForTest([&](const size_t node) { if (node >= cpu_lists.size()) { errno = ENOENT; @@ -129,6 +132,7 @@ TEST_F(NumaTopologyTest, NoCompileTimeNuma) { EXPECT_EQ(nt.numa_aware(), false); EXPECT_EQ(nt.active_partitions(), 1); + EXPECT_EQ(nt.GetCurrentPartition(), 0); } // Ensure that if we run on a system with no NUMA support at all (i.e. no @@ -139,6 +143,7 @@ TEST_F(NumaTopologyTest, NoRunTimeNuma) { EXPECT_EQ(nt.numa_aware(), false); EXPECT_EQ(nt.active_partitions(), 1); + EXPECT_EQ(nt.GetCurrentPartition(), 0); } // Ensure that if we run on a system with only 1 node then we disable NUMA @@ -172,6 +177,26 @@ TEST_F(NumaTopologyTest, TwoNode) { } } +// Confirm that an empty node parses correctly (b/212827142). +TEST_F(NumaTopologyTest, EmptyNode) { + std::vector nodes; + nodes.emplace_back("0-5"); + nodes.emplace_back(""); + nodes.emplace_back("6-11"); + + const auto nt = CreateNumaTopology<3>(nodes); + + EXPECT_EQ(nt.numa_aware(), true); + EXPECT_EQ(nt.active_partitions(), 3); + + for (int cpu = 0; cpu <= 5; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 0); + } + for (int cpu = 6; cpu <= 11; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 2); + } +} + // Test that cpulists too long to fit into the 16 byte buffer used by // InitNumaTopology() parse successfully. TEST_F(NumaTopologyTest, LongCpuLists) { @@ -214,68 +239,13 @@ TEST_F(NumaTopologyTest, Host) { NumaTopology<4> nt; nt.Init(); + const size_t active_partitions = nt.active_partitions(); + // We don't actually know anything about the host, so there's not much more // we can do beyond checking that we didn't crash. -} - -// Ensure that we can parse randomized cpulists correctly. -TEST(ParseCpulistTest, Random) { - absl::BitGen gen; - - static constexpr int kIterations = 100; - for (int i = 0; i < kIterations; i++) { - cpu_set_t reference; - CPU_ZERO(&reference); - - // Set a random number of CPUs within the reference set. - const double density = absl::Uniform(gen, 0.0, 1.0); - for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (absl::Bernoulli(gen, density)) { - CPU_SET(cpu, &reference); - } - } - - // Serialize the reference set into a cpulist-style string. - std::vector components; - for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (!CPU_ISSET(cpu, &reference)) continue; - - const int start = cpu; - int next = cpu + 1; - while (next < CPU_SETSIZE && CPU_ISSET(next, &reference)) { - cpu = next; - next = cpu + 1; - } - - if (cpu == start) { - components.push_back(absl::StrCat(cpu)); - } else { - components.push_back(absl::StrCat(start, "-", cpu)); - } - } - const std::string serialized = absl::StrJoin(components, ","); - - // Now parse that string using our ParseCpulist function, randomizing the - // amount of data we provide to it from each read. - absl::string_view remaining(serialized); - const cpu_set_t parsed = - ParseCpulist([&](char *const buf, const size_t count) -> ssize_t { - // Calculate how much data we have left to provide. - const size_t max = std::min(count, remaining.size()); - - // If none, we have no choice but to provide nothing. - if (max == 0) return 0; - - // If we do have data, return a randomly sized subset of it to stress - // the logic around reading partial values. - const size_t copy = absl::Uniform(gen, static_cast(1), max); - memcpy(buf, remaining.data(), copy); - remaining.remove_prefix(copy); - return copy; - }); - - // We ought to have parsed the same set of CPUs that we serialized. - EXPECT_TRUE(CPU_EQUAL(&parsed, &reference)); + for (int cpu = 0, n = NumCPUs(); cpu < n; ++cpu) { + size_t partition = nt.GetCpuPartition(cpu); + EXPECT_LT(partition, active_partitions) << cpu; } } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h index 6380183a5004..edede9a81468 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2020 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,16 +16,22 @@ #ifndef TCMALLOC_INTERNAL_OPTIMIZATION_H_ #define TCMALLOC_INTERNAL_OPTIMIZATION_H_ +#include "absl/base/attributes.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + // Our wrapper for __builtin_assume, allowing us to check the assumption on // debug builds. #ifndef NDEBUG #ifdef __clang__ -#define ASSUME(cond) CHECK_CONDITION(cond), __builtin_assume(cond) +#define ASSUME(cond) TC_CHECK(cond), __builtin_assume(cond) #else #define ASSUME(cond) \ - CHECK_CONDITION(cond), (!(cond) ? __builtin_unreachable() : (void)0) + TC_CHECK(cond), (!(cond) ? __builtin_unreachable() : (void)0) #endif #else #ifdef __clang__ @@ -42,4 +49,21 @@ #define TCMALLOC_ATTRIBUTE_CONST #endif +// Can be applied to a return statement to tell the compiler to generate +// a tail call. +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::musttail) +#define TCMALLOC_MUSTTAIL [[clang::musttail]] +#else +#define TCMALLOC_MUSTTAIL +#endif + +inline void* AssumeNotNull(void* p) { + ASSUME(p != nullptr); + return p; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + #endif // TCMALLOC_INTERNAL_OPTIMIZATION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/internal/overflow.h similarity index 58% rename from contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc rename to contrib/libs/tcmalloc/tcmalloc/internal/overflow.h index c6dc90adcc94..970a88a501bc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/overflow.h @@ -1,4 +1,5 @@ -// Copyright 2019 The TCMalloc Authors +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,22 +13,29 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "absl/base/attributes.h" -#include "tcmalloc/runtime_size_classes.h" -#include "tcmalloc/size_class_info.h" +#ifndef TCMALLOC_INTERNAL_OVERFLOW_H_ +#define TCMALLOC_INTERNAL_OVERFLOW_H_ + +#include + +#include "absl/base/config.h" +#include "tcmalloc/internal/config.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -// Default implementation doesn't load runtime size classes. -// To enable runtime size classes, link with :runtime_size_classes. -// This is in a separate library so that it doesn't get inlined inside common.cc -ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE int MaybeSizeClassesFromEnv( - int max_size, int max_classes, SizeClassInfo* parsed) { - return -1; +inline bool MultiplyOverflow(size_t a, size_t b, size_t* out) { +#if ABSL_HAVE_BUILTIN(__builtin_mul_overflow) + return __builtin_mul_overflow(a, b, out); +#else + *out = a * b; + return b != 0 && *out / b != a; +#endif } } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_OVERFLOW_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/page_size.cc b/contrib/libs/tcmalloc/tcmalloc/internal/page_size.cc new file mode 100644 index 000000000000..0a3468601474 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/page_size.cc @@ -0,0 +1,45 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/page_size.h" + +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +size_t GetPageSize() { + ABSL_CONST_INIT static size_t page_size; + ABSL_CONST_INIT static absl::once_flag flag; + + absl::base_internal::LowLevelCallOnce(&flag, [&]() { +#if defined(__wasm__) || defined(__asmjs__) + page_size = static_cast(getpagesize()); +#else + page_size = static_cast(sysconf(_SC_PAGESIZE)); +#endif + }); + return page_size; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc b/contrib/libs/tcmalloc/tcmalloc/internal/page_size.h similarity index 75% rename from contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc rename to contrib/libs/tcmalloc/tcmalloc/internal/page_size.h index 323cce40edc7..7ea86da220ae 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/page_size.h @@ -1,4 +1,5 @@ -// Copyright 2019 The TCMalloc Authors +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef TCMALLOC_INTERNAL_PAGE_SIZE_H_ +#define TCMALLOC_INTERNAL_PAGE_SIZE_H_ + +#include + #include "absl/base/attributes.h" #include "tcmalloc/internal/config.h" @@ -19,12 +25,10 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -// This -if linked into a binary - overrides page_allocator.cc and forces HPAA -// on/subrelease on. -ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; } - -ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return 1; } +ABSL_ATTRIBUTE_PURE_FUNCTION size_t GetPageSize(); } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_PAGE_SIZE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.cc b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.cc new file mode 100644 index 000000000000..653f78378ff9 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.cc @@ -0,0 +1,368 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/pageflags.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "absl/base/optimization.h" +#include "absl/cleanup/cleanup.h" +#include "absl/status/status.h" +#include "absl/strings/numbers.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { +// From include/uapi/linux/kernel-page-flags.h +#define KPF_COMPOUND_HEAD 15 +#define KPF_COMPOUND_TAIL 16 +#define KPF_THP 22 + +#ifndef KPF_HUGE +#define KPF_HUGE 17 +#endif + +#ifndef KPF_MLOCKED +#define KPF_MLOCKED 33 +#endif +#define KPF_STALE 44 + +// If a page is Head or Tail it is a compound page. It cannot be both, but it +// can be neither, in which case it's just a native page and no special handling +// needs to be done. +constexpr bool PageHead(uint64_t flags) { + constexpr uint64_t kPageHead = (1UL << KPF_COMPOUND_HEAD); + return (flags & kPageHead) == kPageHead; +} +constexpr bool PageTail(uint64_t flags) { + constexpr uint64_t kPageTail = (1UL << KPF_COMPOUND_TAIL); + return (flags & kPageTail) == kPageTail; +} +constexpr bool PageThp(uint64_t flags) { + constexpr uint64_t kPageThp = (1UL << KPF_THP); + return (flags & kPageThp) == kPageThp; +} +constexpr bool PageHugetlbfs(uint64_t flags) { + constexpr uint64_t kPageHuge = (1UL << KPF_HUGE); + return (flags & kPageHuge) == kPageHuge; +} +constexpr bool IsHugepage(uint64_t flags) { + return PageThp(flags) || PageHugetlbfs(flags); +} +constexpr bool PageStale(uint64_t flags) { + constexpr uint64_t kPageStale = (1UL << KPF_STALE); + return (flags & kPageStale) == kPageStale; +} +constexpr bool PageLocked(uint64_t flags) { + constexpr uint64_t kPageMlocked = (1UL << KPF_MLOCKED); + // Locked pages are often "unevictable." KPF_LOCKED has a different meaning. + constexpr uint64_t kPageUnevictable = (1UL << KPF_UNEVICTABLE); + return (flags & (kPageMlocked | kPageUnevictable)) != 0; +} + +void MaybeAddToStats(PageStats& stats, const uint64_t flags, + const size_t delta) { + if (PageStale(flags)) stats.bytes_stale += delta; + if (PageLocked(flags)) stats.bytes_locked += delta; +} + +} // namespace + +PageFlags::PageFlags() + : fd_(signal_safe_open("/proc/self/pageflags", O_RDONLY)) {} + +PageFlags::PageFlags(const char* const alternate_filename) + : fd_(signal_safe_open(alternate_filename, O_RDONLY)) { + if (fd_ == -1) { + TC_LOG("Could not open %s (errno %d)", alternate_filename, errno); + } +} + +PageFlags::~PageFlags() { + if (fd_ >= 0) { + signal_safe_close(fd_); + } +} + +size_t PageFlags::GetOffset(const uintptr_t vaddr) { + TC_ASSERT_EQ(vaddr % kPageSize, 0); + return vaddr / kPageSize * kPagemapEntrySize; +} + +absl::StatusCode PageFlags::Seek(const uintptr_t vaddr) { + size_t offset = GetOffset(vaddr); + // Note: lseek can't be interrupted. + off_t status = ::lseek(fd_, offset, SEEK_SET); + if (status != offset) { + return absl::StatusCode::kUnavailable; + } + return absl::StatusCode::kOk; +} + +absl::StatusCode PageFlags::MaybeReadOne(uintptr_t vaddr, uint64_t& flags, + bool& is_huge) { + if (auto res = Seek(vaddr); res != absl::StatusCode::kOk) return res; + static_assert(sizeof(flags) == kPagemapEntrySize); + auto status = signal_safe_read(fd_, reinterpret_cast(&flags), + kPagemapEntrySize, nullptr); + if (status != kPagemapEntrySize) { + return absl::StatusCode::kUnavailable; + } + + if (ABSL_PREDICT_FALSE((PageHead(flags) || PageTail(flags)) && + !PageThp(flags))) { + TC_LOG("PageFlags asked for information on non-THP hugepage??"); + return absl::StatusCode::kFailedPrecondition; + } + + if (PageTail(flags)) { + if (auto res = Seek(vaddr & kHugePageMask); res != absl::StatusCode::kOk) { + return res; + } + auto status = signal_safe_read(fd_, reinterpret_cast(&flags), + kPagemapEntrySize, nullptr); + if (status != kPagemapEntrySize) { + return absl::StatusCode::kUnavailable; + } + if (ABSL_PREDICT_FALSE(PageTail(flags))) { + TC_LOG("Somehow still at tail page even after seeking?"); + return absl::StatusCode::kFailedPrecondition; + } + // NOMUTANTS--Efficiency improvement that's not visible + is_huge = true; + } else { + // The current page is not a tail page, but it could still be the very first + // page of a hugepage. If this is the case, also plumb the information + // upward so we don't waste time re-reading the next 511 tail pages. + // NOMUTANTS--Efficiency improvement that's not visible + is_huge = PageHead(flags); + } + + return absl::StatusCode::kOk; +} + +absl::StatusCode PageFlags::ReadMany(int64_t num_pages, PageStats& output) { + while (num_pages > 0) { + const size_t batch_size = std::min(kEntriesInBuf, num_pages); + const size_t to_read = kPagemapEntrySize * batch_size; + + // We read continuously. For the first read, this starts at wherever the + // first ReadOne ended. See above note for the reinterpret_cast. + auto status = + signal_safe_read(fd_, reinterpret_cast(buf_), to_read, nullptr); + if (status != to_read) { + return absl::StatusCode::kUnavailable; + } + for (int i = 0; i < batch_size; ++i) { + if (PageHead(buf_[i])) { + last_head_read_ = buf_[i]; + } + + if (PageTail(buf_[i])) { + if (ABSL_PREDICT_FALSE(last_head_read_ == -1)) { + TC_LOG("Did not see head page before tail page (i=%v, buf=%v)", i, + buf_[i]); + return absl::StatusCode::kFailedPrecondition; + } + auto last_read = last_head_read_; + MaybeAddToStats(output, last_read, kPageSize); + } else { + MaybeAddToStats(output, buf_[i], kPageSize); + } + } + num_pages -= batch_size; + } + return absl::StatusCode::kOk; +} + +bool PageFlags::IsHugepageBacked(const void* const addr) { + if (fd_ < 0) { + return false; + } + + uint64_t flags = 0; + uintptr_t uaddr = reinterpret_cast(addr); + // Round address down to get the start of the first page that has any bytes + // corresponding to the span [addr, addr+size). + uintptr_t basePage = uaddr & ~(kPageSize - 1); + // Seek into fd. + if (auto res = Seek(basePage); res != absl::StatusCode::kOk) return false; + // Read entry + static_assert(sizeof(flags) == kPagemapEntrySize); + auto status = signal_safe_read(fd_, reinterpret_cast(&flags), + kPagemapEntrySize, nullptr); + if (status != kPagemapEntrySize) { + return false; + } + // pass entry to check if its hugepage backed. + return IsHugepage(flags); +} + +std::optional PageFlags::Get(const void* const addr, + const size_t size) { + if (fd_ < 0) { + return std::nullopt; + } + last_head_read_ = -1; + + PageStats ret; + if (size == 0) return ret; + uint64_t result_flags = 0; + bool is_huge = false; + + uintptr_t uaddr = reinterpret_cast(addr); + // Round address down to get the start of the first page that has any bytes + // corresponding to the span [addr, addr+size). + uintptr_t basePage = uaddr & ~(kPageSize - 1); + // Round end address up to get the start of the first page that does not + // have any bytes corresponding to the span [addr, addr+size). + // The span is a subset of [basePage, endPage). + uintptr_t endPage = (uaddr + size + kPageSize - 1) & ~(kPageSize - 1); + + int64_t remainingPages = (endPage - basePage) / kPageSize; + + if (remainingPages == 1) { + if (auto res = MaybeReadOne(basePage, result_flags, is_huge); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + MaybeAddToStats(ret, result_flags, size); + if (ret.bytes_stale > 0) { + ret.stale_scan_seconds = MaybeReadStaleScanSeconds(); + } + return ret; + } + + // Since the input address might not be page-aligned (it can possibly point + // to an arbitrary object), we read staleness about the first page separately + // with ReadOne, then read the complete pages with ReadMany, and then read the + // last page with ReadOne again if needed. + + // Handle the first page. + if (auto res = MaybeReadOne(basePage, result_flags, is_huge); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + size_t firstPageSize = kPageSize - (uaddr - basePage); + if (is_huge) { + // The object starts in the middle of a native page, but the entire page + // might be stale. So the situation looks like, simplifying to four native + // pages per hugepage to make the diagram fit, an entire hugepage that looks + // like (where X is the span of interest): + // . basePage + // [....|..XX|XXXX|XXXX] + // ^^^^^^^ some other stale object(s) + // ^^ firstPageSize + // ^^^^^^^^^^^^^^ `pages_represented` pages, each of kPageSize + // The remainingPages <= 0 case covers the situation where the span ends + // before the hugepage. + const uint64_t base_page_offset = basePage & (kHugePageSize - 1); + const uint64_t base_page_index = base_page_offset / kPageSize; + const int64_t pages_represented = kPagesInHugePage - base_page_index; + remainingPages -= pages_represented; + if (remainingPages <= 0) { + // This hugepage represents every single page that this object is on; + // we're done. + MaybeAddToStats(ret, result_flags, size); + + if (ret.bytes_stale > 0) { + ret.stale_scan_seconds = MaybeReadStaleScanSeconds(); + } + return ret; + } + + // pages_represented - 1 is the number of full pages represented (see + // diagram) + MaybeAddToStats(ret, result_flags, + firstPageSize + (pages_represented - 1) * kPageSize); + + // We've read one uint64_t about a single page, but it represents 512 small + // pages. So the next page that is of interest is one hugepage away -- seek + // to make sure the next read doesn't double-count the native pages in + // between the two head pages. + if (auto res = Seek((basePage & kHugePageMask) + kHugePageSize); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + } else { + remainingPages--; + MaybeAddToStats(ret, result_flags, firstPageSize); + } + + // Handle all pages but the last page. + if (auto res = ReadMany(remainingPages - 1, ret); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + + // Check final page. It doesn't really matter if is_huge; we just want the + // statistics about the page that has the last byte of the object. + size_t lastPageSize = kPageSize - (endPage - uaddr - size); + if (auto res = MaybeReadOne(endPage - kPageSize, result_flags, is_huge); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + MaybeAddToStats(ret, result_flags, lastPageSize); + + if (ret.bytes_stale > 0) { + ret.stale_scan_seconds = MaybeReadStaleScanSeconds(); + } + return ret; +} + +uint64_t PageFlags::MaybeReadStaleScanSeconds(const char* filename) { + if (cached_scan_seconds_ != 0) return cached_scan_seconds_; + + int fd = signal_safe_open(filename, O_RDONLY); + if (fd == -1) { + TC_LOG("could not open %s (errno %d)", filename, errno); + return 0; + } + absl::Cleanup closer([fd] { signal_safe_close(fd); }); + char buf[32]; + int read = signal_safe_read(fd, buf, 32, /*bytes_read=*/nullptr); + if (read == -1) { + TC_LOG("could not read %s (errno %d)", filename, errno); + return 0; + } + if (read >= 32) { + buf[31] = '\0'; + TC_LOG("read nonsense from %s (%s)", filename, buf); + return 0; + } + buf[read] = '\0'; + if (!absl::SimpleAtoi(buf, &cached_scan_seconds_)) { + TC_LOG("read nonsense from %s (%s)", filename, buf); + cached_scan_seconds_ = 0; + } + + return cached_scan_seconds_; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.h b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.h new file mode 100644 index 000000000000..2034c4dcfca5 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags.h @@ -0,0 +1,154 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// /proc/self/pageflags is not available without kernel patches such as +// https://patchwork.kernel.org/project/linux-mm/patch/20211028205854.830200-1-almasrymina@google.com/ +// The pageflags that we look at are subject to change. + +#ifndef TCMALLOC_INTERNAL_PAGEFLAGS_H_ +#define TCMALLOC_INTERNAL_PAGEFLAGS_H_ + +#include +#include +#include + +#include + +#include "absl/status/status.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/page_size.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +struct PageStats { + size_t bytes_stale = 0; + size_t bytes_locked = 0; + // The number of seconds that must elapse (at minimum) for a page to be + // considered "stale". 0 indicates that kstaled is disabled or we weren't + // able to read from the scan_seconds sysfs control. (See b/111239799 + // regarding machines that disable kstaled). + // + // This isn't set if bytes_stale is zero because there are no bytes + // that it would refer to, and Get(nullptr, 0) will return an all-zero + // object as expected. + uint64_t stale_scan_seconds = 0; + + // This is currently used only by tests. It'll be good to convert this to + // C++20 "= default" when we increase the baseline compiler requirement. + bool operator==(const PageStats& rhs) const { + return bytes_stale == rhs.bytes_stale && bytes_locked == rhs.bytes_locked && + stale_scan_seconds == rhs.stale_scan_seconds; + } + + bool operator!=(const PageStats& rhs) const { return !(*this == rhs); } +}; + +// Base pageflags class that may be mocked for testing. +class PageFlagsBase { + public: + PageFlagsBase() = default; + virtual ~PageFlagsBase() = default; + PageFlagsBase(const PageFlagsBase&) = delete; + PageFlagsBase(PageFlagsBase&&) = delete; + PageFlagsBase& operator=(const PageFlagsBase&) = delete; + PageFlagsBase& operator=(PageFlagsBase&&) = delete; + virtual std::optional Get(const void* addr, size_t size) = 0; +}; + +// PageFlags offers a look at kernel page flags to identify pieces of memory as +// stale. This class is very similar to Residency but has some substantial +// differences to be hugepage aware. +// +// Specifically, if a page is huge, KPF_STALE is set only on the head native +// page of a hugepage and means that the entire hugepage is stale. Thus, when +// encountering tail pages, we must rewind to find the head page to get the +// information related to them. Native pages have KPF_STALE set as normal; no +// special handling needs to be done for them. +class PageFlags final : public PageFlagsBase { + public: + // This class keeps an open file handle to procfs. Destroy the object to + // reclaim it. + PageFlags(); + ~PageFlags() override; + + // Query a span of memory starting from `addr` for `size` bytes. The memory + // span must consist of only native-size pages and THP hugepages; the behavior + // is undefined if we encounter other hugepages (such as hugetlbfs). We try to + // bail out if we find hugetlbfs immediately, but in esoteric cases like a + // hugetlbfs in the middle of another mapping, this won't work. + // + // We use std::optional for return value as std::optional guarantees that no + // dynamic memory allocation would happen. In contrast, absl::StatusOr may + // dynamically allocate memory when needed. Using std::optional allows us to + // use the function in places where memory allocation is prohibited. + std::optional Get(const void* addr, size_t size) override; + bool IsHugepageBacked(const void* addr); + + private: + // Returns the offset in the pageflags file for the given virtual address. + size_t GetOffset(uintptr_t vaddr); + + // This helper seeks the internal file to the correct location for the given + // virtual address. + [[nodiscard]] absl::StatusCode Seek(uintptr_t vaddr); + + // Tries to read staleness information about the page that contains vaddr. + // Possibly seeks backwards in an effort to find head hugepages. + absl::StatusCode MaybeReadOne(uintptr_t vaddr, uint64_t& flags, + bool& is_huge); + // This helper reads staleness information for `num_pages` worth of _full_ + // pages and puts the results into `output`. It continues the read from the + // last Seek() or last Read operation. + absl::StatusCode ReadMany(int64_t num_pages, PageStats& output); + + static constexpr const char* kKstaledScanSeconds = + "/sys/kernel/mm/kstaled/scan_seconds"; + uint64_t MaybeReadStaleScanSeconds( + const char* filename = kKstaledScanSeconds); + + // For testing. + friend class PageFlagsFriend; + explicit PageFlags(const char* alternate_filename); + + // Size of the buffer used to gather results. + static constexpr int kBufferLength = 4096; + static constexpr int kPagemapEntrySize = 8; + static constexpr int kEntriesInBuf = kBufferLength / kPagemapEntrySize; + + const size_t kPageSize = GetPageSize(); + // You can technically not hard-code this but it would involve many more + // queries to figure out the size of every page. It's a lot easier to just + // assume any compound pages are 2 MB. + static constexpr int kHugePageSize = (2 << 20); + static constexpr uintptr_t kHugePageMask = ~(kHugePageSize - 1); + const size_t kPagesInHugePage = kHugePageSize / kPageSize; + + uint64_t buf_[kEntriesInBuf]; + // Information about the previous head page. For any future-encountered tail + // pages, we use the information from this page to determine staleness of the + // tail page. + uint64_t last_head_read_ = -1; + // Scan seconds. If zero, unknown / disabled. + uint64_t cached_scan_seconds_ = 0; + const int fd_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_PAGEFLAGS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/pageflags_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags_test.cc new file mode 100644 index 000000000000..cb9b21c8b2ba --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/pageflags_test.cc @@ -0,0 +1,571 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/pageflags.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/util.h" + +ABSL_FLAG(bool, check_staleness, false, + "If true, actually wait around for memory to go stale."); + +namespace tcmalloc { +namespace tcmalloc_internal { + +class PageFlagsFriend { + public: + explicit PageFlagsFriend() = default; + explicit PageFlagsFriend(absl::string_view filename) : r_(filename.data()) {} + + template + decltype(auto) Get(Args&&... args) { + return r_.Get(std::forward(args)...); + } + + decltype(auto) MaybeReadStaleScanSeconds(absl::string_view filename) { + return r_.MaybeReadStaleScanSeconds(filename.data()); + } + + decltype(auto) CachedScanSeconds() { return r_.cached_scan_seconds_; } + + decltype(auto) IsHugepageBacked(const void* const addr) { + return r_.IsHugepageBacked(addr); + } + + void SetCachedScanSeconds( + decltype(PageFlags::cached_scan_seconds_) scan_seconds) { + r_.cached_scan_seconds_ = scan_seconds; + } + + private: + PageFlags r_; +}; + +std::ostream& operator<<(std::ostream& os, const PageStats& s) { + return os << "{ stale = " << s.bytes_stale << ", locked = " << s.bytes_locked + << ", stale_scan_seconds = " << s.stale_scan_seconds << "}"; +} + +namespace { + +using ::testing::FieldsAre; +using ::testing::Optional; + +constexpr uint64_t kPageHead = (1UL << 15); +constexpr uint64_t kPageTail = (1UL << 16); +constexpr uint64_t kPageThp = (1UL << 22); +constexpr uint64_t kPageHuge = (1UL << 17); +constexpr uint64_t kPageStale = (1UL << 44); + +constexpr size_t kPagemapEntrySize = 8; +constexpr size_t kHugePageSize = 2 << 20; +constexpr size_t kHugePageMask = ~(kHugePageSize - 1); + +// Write the given content into the given filename. Suitable only for tests. +void SetContents(absl::string_view filename, absl::string_view content) { + int fd = + signal_safe_open(filename.data(), O_CREAT | O_WRONLY | O_TRUNC, 0644); + CHECK_NE(fd, -1) << errno << " while writing to " << filename; + int written = + signal_safe_write(fd, content.data(), content.length(), nullptr); + CHECK_EQ(written, content.length()) << errno; + CHECK_EQ(signal_safe_close(fd), 0) << errno; +} + +TEST(PageFlagsTest, Smoke) { + GTEST_SKIP() << "pageflags not commonly available"; + auto res = PageFlags{}.Get(nullptr, 0); + EXPECT_THAT(res, Optional(PageStats{})); +} + +TEST(PageFlagsTest, Stack) { + GTEST_SKIP() << "pageflags not commonly available"; + + char buf[256]; + std::fill(std::begin(buf), std::end(buf), 12); + ::benchmark::DoNotOptimize(buf); + + PageFlags s; + EXPECT_THAT(s.Get(reinterpret_cast(buf), sizeof(buf)), + Optional(PageStats{})); +} + +TEST(PageFlagsTest, Alignment) { + GTEST_SKIP() << "pageflags not commonly available"; + + const size_t kPageSize = getpagesize(); + const int kNumPages = 6 * kHugePageSize / kPageSize; + for (auto mmap_hint : std::initializer_list{ + nullptr, reinterpret_cast(0x00007BADDE000000), + reinterpret_cast(0x00007BADDF001000)}) { + void* p = mmap( + mmap_hint, kNumPages * kPageSize, PROT_READ | PROT_WRITE, + (mmap_hint == nullptr ? 0 : MAP_FIXED) | MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + ASSERT_NE(p, MAP_FAILED) << errno; + ASSERT_EQ(madvise(p, kPageSize * kNumPages, MADV_HUGEPAGE), 0) << errno; + + PageFlags s; + EXPECT_THAT(s.Get(p, kPageSize * kNumPages), Optional(PageStats{})) << p; + munmap(p, kNumPages * kPageSize); + } +} + +// Write an alternate "pageflags" file comprising all stale pages at the path +// indicated by `filename`. The actual pageflags are copied from the pages that +// obj spans, but we add KPF_STALE. The alternate pageflags file starts at 0, so +// we return a pointer to `obj` in the alternate virtual memory space. If `obj` +// is page-aligned, this is a zero pointer (not to be confused with a null +// pointer). +void* GenerateAllStaleTest(absl::string_view filename, void* obj, size_t size) { + const size_t kPageSize = getpagesize(); + + uintptr_t ptr = reinterpret_cast(obj); + uintptr_t pages_start = ptr & kHugePageMask; + uintptr_t new_offset = ptr - pages_start; + + off_t file_read_offset = pages_start / kPageSize * kPagemapEntrySize; + int read_fd = signal_safe_open("/proc/self/pageflags", O_RDONLY); + CHECK_NE(read_fd, -1) + << strerror(errno) + << " while reading pageflags; does your kernel support it?"; + int write_fd = signal_safe_open(filename.data(), O_CREAT | O_WRONLY, S_IRUSR); + CHECK_NE(write_fd, -1) << errno; + + CHECK_EQ(::lseek(read_fd, file_read_offset, SEEK_SET), file_read_offset); + std::array buf; + for (int i = 0; i < size / kHugePageSize + 3; ++i) { + CHECK_EQ(signal_safe_read(read_fd, reinterpret_cast(buf.data()), + kHugePageSize, nullptr), + kHugePageSize); + for (uint64_t& page : buf) { + if ((page & kPageHead) == kPageHead || (page & kPageTail) != kPageTail) { + page |= kPageStale; + } + } + CHECK_EQ(write(write_fd, buf.data(), kHugePageSize), kHugePageSize); + } + CHECK_EQ(close(read_fd), 0) << errno; + CHECK_EQ(close(write_fd), 0) << errno; + return reinterpret_cast(new_offset); +} + +TEST(PageFlagsTest, Stale) { + GTEST_SKIP() << "pageflags not commonly available"; + + constexpr size_t kPageSize = 4096; + constexpr int kNumPages = 6 * kHugePageSize / kPageSize; + // This is hardcoded because we need to know number of pages in a hugepage. + ASSERT_EQ(getpagesize(), kPageSize); + char* p = reinterpret_cast( + mmap(reinterpret_cast(0x00007BADDE001000), kNumPages * kPageSize, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + ASSERT_NE(p, MAP_FAILED) << errno; + absl::BitGen rng; + for (int i = 0; i < kNumPages * kPageSize; ++i) { + p[i] = absl::Uniform(rng, 0, 256); + } + + // TODO(patrickx): Use MADV_COLLAPSE when broadly available. + // four hugepages + // nohugepage hugepage hugepage hugepage nohugepage hugepage + ASSERT_EQ(madvise(p, kHugePageSize, MADV_NOHUGEPAGE), 0) << errno; + ASSERT_EQ(madvise(p + kHugePageSize, 3 * kHugePageSize, MADV_HUGEPAGE), 0) + << errno; + ASSERT_EQ(madvise(p + 4 * kHugePageSize, kHugePageSize, MADV_NOHUGEPAGE), 0) + << errno; + ASSERT_EQ(madvise(p + 5 * kHugePageSize, kHugePageSize, MADV_HUGEPAGE), 0) + << errno; + PageFlags s; + ASSERT_THAT(s.Get(p, kPageSize * kNumPages), Optional(PageStats{})); + + // This doesn't work within a short test timeout. But if you have your own + // machine with appropriate patches, you can try it out! + if (absl::GetFlag(FLAGS_check_staleness)) { + absl::Time start = absl::Now(); + bool ok = false; + do { + auto res = s.Get(p, kPageSize * kNumPages); + ASSERT_TRUE(res.has_value()); + if (res->bytes_stale > kNumPages * kPageSize / 2) { + LOG(INFO) << absl::StrFormat("Got %ld bytes stale, pointer is at %p", + res->bytes_stale, p); + ok = true; + break; + } + LOG(INFO) << "still waiting; stale = " << res->bytes_stale; + absl::SleepFor(absl::Seconds(5)); + } while (absl::Now() - start < absl::Seconds(600)); + EXPECT_TRUE(ok) << "Failed to get enough stale memory."; + } else { + std::string fake_pageflags = + absl::StrCat(testing::TempDir(), "/fake_pageflags"); + void* fake_p = + GenerateAllStaleTest(fake_pageflags, p, kNumPages * kPageSize); + // fake_p is likely already aligned, but might as well make sure. This is + // likely a zero pointer (not to be confused with nullptr). + void* base_p = reinterpret_cast(reinterpret_cast(fake_p) & + ~(kPageSize - 1)); + PageFlagsFriend mocks(fake_pageflags); + constexpr uint64_t kSetScanSeconds = 63; + mocks.SetCachedScanSeconds(kSetScanSeconds); + for (int num_pages = 0; num_pages < kNumPages; ++num_pages) { + for (int offset = -1; offset <= 1; ++offset) { + if (num_pages == 0 && offset == -1) continue; + // Messing around with scan_seconds is kind of confusing here but not as + // much overhead as adding a custom matcher. But if you add yet another + // field here it's time to write one. + uint64_t scan_seconds = kSetScanSeconds; + if (num_pages * kPageSize + offset == 0) scan_seconds = 0; + // CAUTION: If you think this test is very flaky, it's possible it's + // only passing when the machine you get scheduled on is out of + // hugepages. + EXPECT_THAT(mocks.Get(base_p, num_pages * kPageSize + offset), + Optional(FieldsAre(num_pages * kPageSize + offset, 0, + scan_seconds))) + << num_pages << "," << offset; + + EXPECT_THAT( + mocks.Get((char*)fake_p - offset, num_pages * kPageSize + offset), + Optional( + FieldsAre(num_pages * kPageSize + offset, 0, scan_seconds))) + << num_pages << "," << offset; + + EXPECT_THAT(mocks.Get(fake_p, num_pages * kPageSize + offset), + Optional(FieldsAre(num_pages * kPageSize + offset, 0, + scan_seconds))) + << num_pages << "," << offset; + + EXPECT_THAT( + mocks.Get((char*)fake_p + offset, num_pages * kPageSize + offset), + Optional( + FieldsAre(num_pages * kPageSize + offset, 0, scan_seconds))) + << num_pages << "," << offset; + + scan_seconds = kSetScanSeconds; + if (num_pages == 0) scan_seconds = 0; + EXPECT_THAT( + mocks.Get((char*)kHugePageSize + offset, num_pages * kPageSize), + Optional(FieldsAre(num_pages * kPageSize, 0, scan_seconds))) + << num_pages << "," << offset; + } + } + + EXPECT_THAT(mocks.Get(reinterpret_cast(2 * kHugePageSize + + 16 * kPageSize + 2), + kHugePageSize * 3), + Optional(FieldsAre(kHugePageSize * 3, 0, kSetScanSeconds))); + } + + ASSERT_EQ(munmap(p, kNumPages * kPageSize), 0) << errno; +} + +TEST(PageFlagsTest, Locked) { + GTEST_SKIP() << "pageflags not commonly available"; + +#if ABSL_HAVE_ADDRESS_SANITIZER || ABSL_HAVE_MEMORY_SANITIZER || \ + ABSL_HAVE_THREAD_SANITIZER + GTEST_SKIP() << "Skipped under sanitizers."; +#endif + + constexpr size_t kPageSize = 4096; + constexpr int kNumPages = 6 * kHugePageSize / kPageSize; + // This is hardcoded because we need to know number of pages in a hugepage. + ASSERT_EQ(getpagesize(), kPageSize); + char* p = reinterpret_cast( + mmap(reinterpret_cast(0x00007BADDE000000), kNumPages * kPageSize, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + ASSERT_NE(p, MAP_FAILED) << errno; + absl::BitGen rng; + for (int i = 0; i < kNumPages * kPageSize; ++i) { + p[i] = absl::Uniform(rng, 0, 256); + } + + PageFlags s; + ASSERT_THAT(s.Get(p, kPageSize * kNumPages), Optional(PageStats{})); + + ASSERT_EQ(madvise(p, kHugePageSize, MADV_NOHUGEPAGE), 0) << errno; + ASSERT_EQ(madvise(p + kHugePageSize, 3 * kHugePageSize, MADV_HUGEPAGE), 0) + << errno; + ASSERT_EQ(madvise(p + 4 * kHugePageSize, kHugePageSize, MADV_NOHUGEPAGE), 0) + << errno; + ASSERT_EQ(madvise(p + 5 * kHugePageSize, kHugePageSize, MADV_HUGEPAGE), 0) + << errno; + + ASSERT_THAT(s.Get(p, kPageSize * kNumPages), Optional(PageStats{})); + + ASSERT_EQ(mlock(p, kPageSize * kNumPages), 0) << errno; + + // Wait until the kernel has had time to propagate flags. + absl::Time start = absl::Now(); + do { + auto res = s.Get(p, kPageSize * kNumPages); + ASSERT_TRUE(res.has_value()); + if (res->bytes_locked > kNumPages * kPageSize / 2) { + LOG(INFO) << "Got " << res->bytes_locked + << " bytes locked, pointer is at " << (uintptr_t)p; + + if (res->bytes_locked == kNumPages * kPageSize) { + break; + } + } + LOG(INFO) << "still waiting; locked = " << res->bytes_locked; + absl::SleepFor(absl::Seconds(5)); + } while (absl::Now() - start < absl::Seconds(60)); + + auto res = s.Get(p, kPageSize * kNumPages); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->bytes_locked, kPageSize * kNumPages); + + ASSERT_EQ(munmap(p, kNumPages * kPageSize), 0) << errno; +} + +TEST(PageFlagsTest, OnlyTails) { + const size_t kPageSize = getpagesize(); + std::vector data(5 * kHugePageSize / kPageSize); + for (auto& page : data) { + page |= kPageTail; + page |= kPageThp; + } + std::string file_path = absl::StrCat(testing::TempDir(), "/only-tails"); + int write_fd = + signal_safe_open(file_path.c_str(), O_CREAT | O_WRONLY, S_IRUSR); + ASSERT_NE(write_fd, -1) << errno; + + size_t bytes_to_write = data.size() * sizeof(data[0]); + ASSERT_EQ(write(write_fd, data.data(), bytes_to_write), bytes_to_write) + << errno; + ASSERT_EQ(close(write_fd), 0) << errno; + + PageFlagsFriend s(file_path); + ASSERT_EQ(s.Get(reinterpret_cast(kHugePageSize), kHugePageSize), + std::nullopt); +} + +// Queries and checks the pageflags if the pages are hugepage-backed or not. +// TODO(b/28093874): Check to see if we can add a real pageflags test (e.g. +// using MADV_COLLAPSE) to confirm the hugepage status using pageflags. +TEST(PageFlagsTest, IsHugepageBacked) { + const auto test_hugepage_status = [&](uint64_t flags, bool expected) { + const size_t kPageSize = getpagesize(); + const size_t kPagesPerHugepage = kHugePageSize / kPageSize; + + std::vector data(kPagesPerHugepage); + for (auto& page : data) { + page |= flags; + } + std::string file_path = + absl::StrCat(testing::TempDir(), "/hugepage_backed_", flags); + int write_fd = + signal_safe_open(file_path.c_str(), O_CREAT | O_WRONLY, S_IRUSR); + ASSERT_NE(write_fd, -1) << errno; + + size_t bytes_to_write = data.size() * sizeof(data[0]); + ASSERT_EQ(write(write_fd, data.data(), bytes_to_write), bytes_to_write) + << errno; + ASSERT_EQ(close(write_fd), 0) << errno; + + PageFlagsFriend s(file_path); + for (int page = 0; page < kPagesPerHugepage; ++page) { + ASSERT_EQ(s.IsHugepageBacked(reinterpret_cast(page)), expected); + } + CHECK_EQ(signal_safe_close(write_fd), 0) << errno; + }; + + test_hugepage_status(kPageHuge, /*expected=*/true); + test_hugepage_status(kPageHuge | kPageThp, /*expected=*/true); + test_hugepage_status(kPageThp, /*expected=*/true); + test_hugepage_status(/*flags=*/0, /*expected=*/false); +} + +TEST(PageFlagsTest, TooManyTails) { + const size_t kPageSize = getpagesize(); + std::vector data(7 * kHugePageSize / kPageSize); + for (auto& page : data) { + page |= kPageTail; + page |= kPageThp; + } + data[kHugePageSize / kPageSize] = kPageHead | kPageThp; + data[2 * kHugePageSize / kPageSize] = kPageHead | kPageThp; + data[3 * kHugePageSize / kPageSize] = kPageHead | kPageThp; + data[5 * kHugePageSize / kPageSize] = kPageHead | kPageThp; + + std::string file_path = absl::StrCat(testing::TempDir(), "/too-many-tails"); + int write_fd = + signal_safe_open(file_path.c_str(), O_CREAT | O_WRONLY, S_IRUSR); + ASSERT_NE(write_fd, -1) << errno; + + size_t bytes_to_write = data.size() * sizeof(data[0]); + ASSERT_EQ(write(write_fd, data.data(), bytes_to_write), bytes_to_write) + << errno; + ASSERT_EQ(close(write_fd), 0) << errno; + + PageFlagsFriend s(file_path); + EXPECT_THAT(s.Get(reinterpret_cast(kHugePageSize), kHugePageSize), + Optional(PageStats{})); + EXPECT_THAT(s.Get(reinterpret_cast(kHugePageSize), 3 * kHugePageSize), + Optional(PageStats{})); + + EXPECT_THAT(s.Get(reinterpret_cast(3 * kHugePageSize), kHugePageSize), + Optional(PageStats{})); + EXPECT_THAT( + s.Get(reinterpret_cast(3 * kHugePageSize), 2 * kHugePageSize), + std::nullopt); + EXPECT_THAT( + s.Get(reinterpret_cast(3 * kHugePageSize), 3 * kHugePageSize), + std::nullopt); +} + +TEST(PageFlagsTest, NotThp) { + const size_t kPageSize = getpagesize(); + std::vector data(3 * kHugePageSize / kPageSize); + for (auto& page : data) { + page |= kPageHead; + } + + std::string file_path = absl::StrCat(testing::TempDir(), "/not-thp"); + int write_fd = + signal_safe_open(file_path.c_str(), O_CREAT | O_WRONLY, S_IRUSR); + ASSERT_NE(write_fd, -1) << errno; + + size_t bytes_to_write = data.size() * sizeof(data[0]); + ASSERT_EQ(write(write_fd, data.data(), bytes_to_write), bytes_to_write) + << errno; + ASSERT_EQ(close(write_fd), 0) << errno; + + PageFlagsFriend s(file_path); + EXPECT_THAT(s.Get(nullptr, kHugePageSize), std::nullopt); +} + +TEST(PageFlagsTest, CannotOpen) { + PageFlagsFriend s("/tmp/a667ba48-18ba-4523-a8a7-b49ece3a6c2b"); + EXPECT_FALSE(s.Get(nullptr, 1).has_value()); +} + +TEST(PageFlagsTest, CannotRead) { + PageFlagsFriend s("/dev/null"); + EXPECT_FALSE(s.Get(nullptr, 1).has_value()); +} + +TEST(PageFlagsTest, CannotSeek) { + PageFlagsFriend s("/dev/null"); + EXPECT_FALSE(s.Get(&s, 1).has_value()); +} + +// For this and the following tests, the EXPECT_* macros allocate memory so +// that's why we have a strange dance with resetting the AllocationGuard. +TEST(StaleSeconds, Read) { + GTEST_SKIP() << "pageflags not commonly available"; + // Allocate at least a couple hugepages or pageflags might have a short read. + auto alloc = std::make_unique>(); + + std::string fake_pageflags = + absl::StrCat(testing::TempDir(), "/fake_pageflags2"); + void* fake_p = GenerateAllStaleTest(fake_pageflags, &*alloc, 1); + std::string fake_stale_seconds = + absl::StrCat(testing::TempDir(), "/fake_stale_seconds"); + SetContents(fake_stale_seconds, "123"); + + std::optional g; + g.emplace(); + PageFlagsFriend s(fake_pageflags); + auto read_seconds = s.MaybeReadStaleScanSeconds(fake_stale_seconds); + auto result = s.Get(fake_p, 1); + g.reset(); + + EXPECT_THAT(read_seconds, 123); + EXPECT_THAT(result, Optional(FieldsAre(1, 0, 123))); +} + +void ExpectStaleSecondsFailedReadFrom(absl::string_view filename) { + std::optional g; + g.emplace(); + PageFlagsFriend s; + auto read_seconds = s.MaybeReadStaleScanSeconds(filename); + auto cached_seconds = s.CachedScanSeconds(); + g.reset(); + + EXPECT_EQ(read_seconds, cached_seconds); + EXPECT_EQ(cached_seconds, 0); +} + +TEST(StaleSeconds, NotFound) { + ExpectStaleSecondsFailedReadFrom(absl::StrCat( + testing::TempDir(), "/nonexistent-f52a3d06-ee84-42c1-a298-a93a4b164ff0")); +} + +TEST(StaleSeconds, Bad) { + std::string fake_stale_seconds = + absl::StrCat(testing::TempDir(), "/fake_stale_seconds2"); + SetContents(fake_stale_seconds, "[always]"); + + ExpectStaleSecondsFailedReadFrom(fake_stale_seconds); +} + +TEST(StaleSeconds, IntOutOfBounds) { + std::string fake_stale_seconds = + absl::StrCat(testing::TempDir(), "/fake_stale_seconds3"); + SetContents(fake_stale_seconds, "-1"); + + ExpectStaleSecondsFailedReadFrom(fake_stale_seconds); +} + +TEST(StaleSeconds, TextOverflow) { + std::string fake_stale_seconds = + absl::StrCat(testing::TempDir(), "/fake_stale_seconds4"); + std::string contents(1 << 22, '9'); + SetContents(fake_stale_seconds, contents); + + ExpectStaleSecondsFailedReadFrom(fake_stale_seconds); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h index f14798fe7468..6616ec6b9a89 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,8 +16,12 @@ #ifndef TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ #define TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ +#include +#include + #include "absl/base/attributes.h" #include "absl/time/time.h" +#include "tcmalloc/malloc_extension.h" extern "C" { @@ -25,32 +30,92 @@ ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetBackgroundReleaseRate( ABSL_ATTRIBUTE_WEAK uint64_t TCMalloc_Internal_GetHeapSizeHardLimit(); ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHPAASubrelease(); ABSL_ATTRIBUTE_WEAK void -TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(absl::Duration* v); -ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled(); -ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled(); -ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled(); +TCMalloc_Internal_GetHugePageFillerSkipSubreleaseShortInterval( + absl::Duration* v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_GetHugePageFillerSkipSubreleaseLongInterval( + absl::Duration* v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_GetHugeCacheDemandReleaseShortInterval(absl::Duration* v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_GetHugeCacheDemandReleaseLongInterval(absl::Duration* v); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReleasePartialAllocPagesEnabled(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHugeCacheDemandBasedRelease(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHugeRegionDemandBasedRelease(); +ABSL_ATTRIBUTE_WEAK bool +TCMalloc_Internal_GetReleasePagesFromHugeRegionEnabled(); +ABSL_ATTRIBUTE_WEAK bool +TCMalloc_Internal_GetResizeSizeClassMaxCapacityEnabled(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPrioritizeSpansEnabled(); ABSL_ATTRIBUTE_WEAK double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction(); ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPerCpuCachesEnabled(); ABSL_ATTRIBUTE_WEAK size_t TCMalloc_Internal_GetStats(char* buffer, size_t buffer_length); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingInterval( + int64_t v); +ABSL_ATTRIBUTE_WEAK int TCMalloc_Internal_GetSelSanPercent(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetSelSanPercent(int v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHPAASubrelease(bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetShufflePerCpuCachesEnabled( +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReleasePartialAllocPagesEnabled( bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled( +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHugeCacheDemandBasedRelease( bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHugeRegionDemandBasedRelease( + bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReleasePagesFromHugeRegionEnabled( + bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetResizeSizeClassMaxCapacityEnabled( + bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPrioritizeSpansEnabled(bool v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxTotalThreadCacheBytes( int64_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction( double v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetProfileSamplingRate(int64_t v); ABSL_ATTRIBUTE_WEAK void -TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(absl::Duration v); +TCMalloc_Internal_SetPerCpuCachesEnabledNoBuildRequirement(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetProfileSamplingInterval( + int64_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetBackgroundProcessActionsEnabled( + bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetBackgroundProcessSleepInterval( + absl::Duration v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetHugePageFillerSkipSubreleaseShortInterval( + absl::Duration v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetHugePageFillerSkipSubreleaseLongInterval(absl::Duration v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetHugeCacheDemandReleaseShortInterval(absl::Duration v); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetHugeCacheDemandReleaseLongInterval(absl::Duration v); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetMadviseColdRegionsNoHugepage(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMadviseColdRegionsNoHugepage( + bool v); +ABSL_ATTRIBUTE_WEAK uint8_t TCMalloc_Internal_GetMinHotAccessHint(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMinHotAccessHint(uint8_t v); +[[maybe_unused]] ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_PossiblyCold( + const void* ptr); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPerCpuCachesDynamicSlabEnabled(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPerCpuCachesDynamicSlabEnabled( + bool v); +ABSL_ATTRIBUTE_WEAK double +TCMalloc_Internal_GetPerCpuCachesDynamicSlabGrowThreshold(); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetPerCpuCachesDynamicSlabGrowThreshold(double v); +ABSL_ATTRIBUTE_WEAK double +TCMalloc_Internal_GetPerCpuCachesDynamicSlabShrinkThreshold(); +ABSL_ATTRIBUTE_WEAK void +TCMalloc_Internal_SetPerCpuCachesDynamicSlabShrinkThreshold(double v); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetMadviseFree(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMadviseFree(bool v); +ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::MadvisePreference +TCMalloc_Internal_GetMadvise(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMadvise( + tcmalloc::tcmalloc_internal::MadvisePreference v); } #endif // TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc index f8706f0f2120..ef5699f920a4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -22,13 +23,20 @@ #include #include +#include +#include +#include +#include #include "absl/base/attributes.h" #include "absl/base/call_once.h" // IWYU pragma: keep -#include "absl/base/internal/sysinfo.h" +#include "absl/base/optimization.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" #include "tcmalloc/internal/linux_syscall_support.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/sysinfo.h" #include "tcmalloc/internal/util.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -43,37 +51,6 @@ namespace percpu { // Restartable Sequence (RSEQ) -extern "C" { -// We provide a per-thread value (defined in percpu_.c) which both tracks -// thread-local initialization state and (with RSEQ) provides an atomic -// in-memory reference for this thread's execution CPU. This value is only -// valid when the thread is currently executing -// Possible values: -// Unavailable/uninitialized: -// { kCpuIdUnsupported, kCpuIdUninitialized } -// Initialized, available: -// [0, NumCpus()) (Always updated at context-switch) -ABSL_PER_THREAD_TLS_KEYWORD ABSL_ATTRIBUTE_WEAK volatile kernel_rseq - __rseq_abi = { - 0, static_cast(kCpuIdUninitialized), 0, 0, - {0, 0}, {{kCpuIdUninitialized, kCpuIdUninitialized}}, -}; - -#ifdef __ppc__ -// On PPC, we have two cases for accessing the __rseq_abi TLS variable: -// * For initial-exec TLS, we write the raw assembly for accessing the memory -// with the appropriate relocations and offsets. On optimized builds, this is -// the use case that matters. -// * For non-initial-exec TLS, access is far more involved. We call this helper -// function from percpu_rseq_ppc.S to leave the initialization and access to -// the compiler. -ABSL_ATTRIBUTE_UNUSED ABSL_ATTRIBUTE_NOINLINE void* tcmalloc_tls_fetch_pic() { - return const_cast(&__rseq_abi); -} -#endif - -} // extern "C" - enum PerCpuInitStatus { kFastMode, kSlowMode, @@ -81,47 +58,112 @@ enum PerCpuInitStatus { ABSL_CONST_INIT static PerCpuInitStatus init_status = kSlowMode; ABSL_CONST_INIT static absl::once_flag init_per_cpu_once; -#if TCMALLOC_PERCPU_USE_RSEQ +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ ABSL_CONST_INIT static std::atomic using_upstream_fence{false}; -#endif // TCMALLOC_PERCPU_USE_RSEQ +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ -// Is this thread's __rseq_abi struct currently registered with the kernel? -static bool ThreadRegistered() { return RseqCpuId() >= kCpuIdInitialized; } +extern "C" thread_local char tcmalloc_sampler ABSL_ATTRIBUTE_INITIAL_EXEC; static bool InitThreadPerCpu() { // If we're already registered, there's nothing further for us to do. - if (ThreadRegistered()) { + if (IsFastNoInit()) { return true; } -#ifdef __NR_rseq + // Mask signals and double check thread registration afterwards. If we + // encounter a signal between ThreadRegistered() above and rseq() and that + // signal initializes per-CPU, rseq() here will fail with EBUSY. + ScopedSigmask mask; + + if (IsFastNoInit()) { + return true; + } + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__NR_rseq) return 0 == syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0, TCMALLOC_PERCPU_RSEQ_SIGNATURE); #endif // __NR_rseq return false; } -bool UsingFlatVirtualCpus() { +bool UsingRseqVirtualCpus() { return false; } +static int UserVirtualCpuId() { + TC_BUG("initialized unsupported vCPU mode"); +} + +int VirtualCpu::Synchronize() { +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + int vcpu = kCpuIdUninitialized; + + if (TestSynchronize) { + vcpu = TestSynchronize(); + if (vcpu >= kCpuIdInitialized) { + tcmalloc_cached_vcpu = vcpu; + return vcpu; + } + } + + if (UsingVirtualCpus()) { + if (UsingRseqVirtualCpus()) + vcpu = __rseq_abi.vcpu_id; + else + vcpu = UserVirtualCpuId(); + } else { + vcpu = GetRealCpuUnsafe(); + } + + TC_CHECK_GE(vcpu, kCpuIdInitialized); + tcmalloc_cached_vcpu = vcpu; + return vcpu; +#else // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + TC_BUG("unsupported without rseq"); +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ +} + static void InitPerCpu() { - CHECK_CONDITION(absl::base_internal::NumCPUs() <= - std::numeric_limits::max()); + const auto maybe_numcpus = NumCPUsMaybe(); + if (!maybe_numcpus.has_value()) { + init_status = kSlowMode; + return; + } + TC_CHECK(*maybe_numcpus <= std::numeric_limits::max()); // Based on the results of successfully initializing the first thread, mark // init_status to initialize all subsequent threads. if (InitThreadPerCpu()) { init_status = kFastMode; -#if TCMALLOC_PERCPU_USE_RSEQ +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + // See the comment about data layout in percpu.h for details. + auto sampler_addr = reinterpret_cast(&tcmalloc_sampler); + // Have to use volatile because C++ compiler rejects to believe that + // objects can overlap. + volatile auto slabs_addr = reinterpret_cast(&tcmalloc_slabs); + auto rseq_abi_addr = reinterpret_cast(&__rseq_abi); + // Ensure __rseq_abi alignment required by ABI. + TC_CHECK_EQ(rseq_abi_addr % 32, 0); + // Ensure that all our TLS data is in a single cache line. + TC_CHECK_EQ(rseq_abi_addr / 64, slabs_addr / 64); + TC_CHECK_EQ(rseq_abi_addr / 64, + (sampler_addr + TCMALLOC_SAMPLER_HOT_OFFSET) / 64); + // Ensure that tcmalloc_slabs partially overlap with + // __rseq_abi.cpu_id_start as we expect. + TC_CHECK_EQ(slabs_addr, rseq_abi_addr + TCMALLOC_RSEQ_SLABS_OFFSET); + // Ensure Sampler is properly aligned. + TC_CHECK_EQ(sampler_addr % TCMALLOC_SAMPLER_ALIGN, 0); + // Ensure that tcmalloc_sampler is located before tcmalloc_slabs. + TC_CHECK_LE(sampler_addr + TCMALLOC_SAMPLER_SIZE, slabs_addr); + constexpr int kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8); // It is safe to make the syscall below multiple times. using_upstream_fence.store( - 0 == syscall(__NR_membarrier, - kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0), + 0 == syscall(__NR_membarrier, + kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0), std::memory_order_relaxed); -#endif // TCMALLOC_PERCPU_USE_RSEQ +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ } } @@ -130,20 +172,45 @@ static void InitPerCpu() { // completed then only the thread-level will be completed. A return of false // indicates that initialization failed and RSEQ is unavailable. bool InitFastPerCpu() { - absl::base_internal::LowLevelCallOnce(&init_per_cpu_once, InitPerCpu); + // On the first trip through this function do the necessary process-wide + // initialization work. + // + // We do this with all signals disabled so that we don't deadlock due to + // re-entering from a signal handler. + // + // We use a global atomic to record an 'initialized' state as a fast path + // check, which allows us to avoid the signal mask syscall that we must + // use to prevent nested initialization during a signal deadlocking on + // LowLevelOnceInit, before we can enter the 'init once' logic. + ABSL_CONST_INIT static std::atomic initialized(false); + if (!initialized.load(std::memory_order_acquire)) { + ScopedSigmask mask; + + absl::base_internal::LowLevelCallOnce(&init_per_cpu_once, [&] { + InitPerCpu(); + + // Set `initialized` to true after all initialization has completed. + // The below store orders with the load acquire further up, i.e., all + // initialization and side effects thereof are visible to any thread + // observing a true value in the fast path check. + initialized.store(true, std::memory_order_release); + }); + } // Once we've decided fast-cpu support is available, initialization for all // subsequent threads must succeed for consistency. - if (init_status == kFastMode && RseqCpuId() == kCpuIdUninitialized) { - CHECK_CONDITION(InitThreadPerCpu()); + if (init_status == kFastMode && GetRealCpuUnsafe() == kCpuIdUninitialized) { + TC_CHECK(InitThreadPerCpu()); } +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ // If we've decided to use slow mode, set the thread-local CPU ID to // __rseq_abi.cpu_id so that IsFast doesn't call this function again for // this thread. if (init_status == kSlowMode) { __rseq_abi.cpu_id = kCpuIdUnsupported; } +#endif return init_status == kFastMode; } @@ -153,32 +220,31 @@ bool InitFastPerCpu() { // ---------------------------------------------------------------------------- static bool SetAffinityOneCpu(int cpu) { - cpu_set_t set; - CPU_ZERO(&set); - CPU_SET(cpu, &set); - if (0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)) { + CpuSet set; + set.Zero(); + set.Set(cpu); + if (set.SetAffinity(0)) { return true; } - CHECK_CONDITION(errno == EINVAL); + TC_CHECK_EQ(errno, EINVAL); return false; } -// We're being asked to fence against the mask , but a NULL mask +// We're being asked to fence against the mask , but a -1 mask // means every CPU. Do we need ? -static bool NeedCpu(int cpu, const cpu_set_t* cpus) { - if (cpus == nullptr) return true; - return CPU_ISSET(cpu, cpus); +static bool NeedCpu(const int cpu, const int target) { + return target == -1 || target == cpu; } -static void SlowFence(const cpu_set_t* cpus) { +static void SlowFence(int target) { // Necessary, so the point in time mentioned below has visibility // of our writes. std::atomic_thread_fence(std::memory_order_seq_cst); // First, save our cpumask (the user may want it back.) - cpu_set_t old; - CPU_ZERO(&old); - CHECK_CONDITION(0 == sched_getaffinity(0, sizeof(cpu_set_t), &old)); + CpuSet old; + old.Zero(); + TC_CHECK(old.GetAffinity(0)); // Here's the basic idea: if we run on every CPU, then every thread // that runs after us has certainly seen every store we've made up @@ -189,8 +255,8 @@ static void SlowFence(const cpu_set_t* cpus) { // side, if we are unable to run on a particular CPU, the same is true for our // siblings (up to some races, dealt with below), so we don't need to. - for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { - if (!NeedCpu(cpu, cpus)) { + for (int cpu = 0, n = NumCPUs(); cpu < n; ++cpu) { + if (!NeedCpu(cpu, target)) { // unnecessary -- user doesn't care about synchronization on this cpu continue; } @@ -239,33 +305,32 @@ static void SlowFence(const cpu_set_t* cpus) { using tcmalloc::tcmalloc_internal::signal_safe_open; using tcmalloc::tcmalloc_internal::signal_safe_read; int fd = signal_safe_open("/proc/self/cpuset", O_RDONLY); - CHECK_CONDITION(fd >= 0); + TC_CHECK_GE(fd, 0); char c; - CHECK_CONDITION(1 == signal_safe_read(fd, &c, 1, nullptr)); - - CHECK_CONDITION(0 == signal_safe_close(fd)); + TC_CHECK_EQ(1, signal_safe_read(fd, &c, 1, nullptr)); + TC_CHECK_EQ(0, signal_safe_close(fd)); // Try to go back to what we originally had before Fence. - if (0 != sched_setaffinity(0, sizeof(cpu_set_t), &old)) { - CHECK_CONDITION(EINVAL == errno); + if (!old.SetAffinity(0)) { + TC_CHECK_EQ(EINVAL, errno); // The original set is no longer valid, which should only happen if // cpuset.cpus was changed at some point in Fence. If that happened and we // didn't fence, our control plane would have rewritten our affinity mask to // everything in cpuset.cpus, so do that. - cpu_set_t set; - CPU_ZERO(&set); - for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) { - CPU_SET(i, &set); + CpuSet set; + set.Zero(); + for (int i = 0, n = NumCPUs(); i < n; ++i) { + set.Set(i); } - CHECK_CONDITION(0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)); + TC_CHECK(set.SetAffinity(0)); } } -#if TCMALLOC_PERCPU_USE_RSEQ +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ static void UpstreamRseqFenceCpu(int cpu) { - ABSL_RAW_CHECK(using_upstream_fence.load(std::memory_order_relaxed), - "upstream fence unavailable."); + TC_CHECK(using_upstream_fence.load(std::memory_order_relaxed) && + "upstream fence unavailable."); constexpr int kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7); constexpr int kMEMBARRIER_CMD_FLAG_CPU = (1 << 0); @@ -273,76 +338,63 @@ static void UpstreamRseqFenceCpu(int cpu) { int64_t res = syscall(__NR_membarrier, kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, kMEMBARRIER_CMD_FLAG_CPU, cpu); - ABSL_RAW_CHECK(res == 0 || res == -ENXIO /* missing CPU */, - "Upstream fence failed."); + TC_CHECK(res == 0 || res == -ENXIO /* missing CPU */, + "Upstream fence failed."); } -#endif // TCMALLOC_PERCPU_USE_RSEQ +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ -// Interrupt every concurrently running sibling thread on any cpu in -// "cpus", and guarantee our writes up til now are visible to every -// other CPU. (cpus == NULL is equivalent to all CPUs.) -static void FenceInterruptCPUs(const cpu_set_t* cpus) { - CHECK_CONDITION(IsFast()); +// Interrupt every concurrently running sibling thread on "cpu", and guarantee +// our writes up til now are visible to every other CPU. (cpu == -1 is +// equivalent to all CPUs.) +static void FenceInterruptCPU(int cpu) { + TC_CHECK(IsFast()); // TODO(b/149390298): Provide an upstream extension for sys_membarrier to // interrupt ongoing restartable sequences. - SlowFence(cpus); -} - -void Fence() { - CompilerBarrier(); - - // Other operations (or all in RSEQ mode) might just be running on another - // CPU. Do something about that: use RSEQ::Fence() to just send interrupts - // and restart any such operation. -#if TCMALLOC_PERCPU_USE_RSEQ - if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(-1); - return; - } -#endif // TCMALLOC_PERCPU_USE_RSEQ - - FenceInterruptCPUs(nullptr); + SlowFence(cpu); } -void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) { +void FenceCpu(int vcpu) { // Prevent compiler re-ordering of code below. In particular, the call to - // GetCurrentCpu must not appear in assembly program order until after any + // GetRealCpu must not appear in assembly program order until after any // code that comes before FenceCpu in C++ program order. CompilerBarrier(); // A useful fast path: nothing needs doing at all to order us with respect // to our own CPU. - if (GetCurrentVirtualCpu(virtual_cpu_id_offset) == cpu) { + if (ABSL_PREDICT_TRUE(IsFastNoInit()) && VirtualCpu::Synchronize() == vcpu) { return; } - if (virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)) { + if (UsingRseqVirtualCpus()) { ASSUME(false); // With virtual CPUs, we cannot identify the true physical core we need to // interrupt. -#if TCMALLOC_PERCPU_USE_RSEQ - if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(-1); - return; - } -#endif // TCMALLOC_PERCPU_USE_RSEQ - FenceInterruptCPUs(nullptr); + FenceAllCpus(); return; } -#if TCMALLOC_PERCPU_USE_RSEQ + int real_cpu = vcpu; + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(cpu); + UpstreamRseqFenceCpu(real_cpu); return; } -#endif // TCMALLOC_PERCPU_USE_RSEQ +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ - cpu_set_t set; - CPU_ZERO(&set); - CPU_SET(cpu, &set); - FenceInterruptCPUs(&set); + FenceInterruptCPU(real_cpu); +} + +void FenceAllCpus() { +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + if (using_upstream_fence.load(std::memory_order_relaxed)) { + UpstreamRseqFenceCpu(-1); + return; + } +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + FenceInterruptCPU(-1); } } // namespace percpu diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h index ad2124e0d186..b289eb643994 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,12 +16,23 @@ #ifndef TCMALLOC_INTERNAL_PERCPU_H_ #define TCMALLOC_INTERNAL_PERCPU_H_ -#define TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT 18 +// sizeof(Sampler) +#define TCMALLOC_SAMPLER_SIZE 32 +// alignof(Sampler) +#define TCMALLOC_SAMPLER_ALIGN 8 +// Sampler::HotDataOffset() +#define TCMALLOC_SAMPLER_HOT_OFFSET 24 + +// Offset from __rseq_abi to the cached slabs address. +#define TCMALLOC_RSEQ_SLABS_OFFSET -4 + +// The bit denotes that tcmalloc_rseq.slabs contains valid slabs offset. +#define TCMALLOC_CACHED_SLABS_BIT 63 +#define TCMALLOC_CACHED_SLABS_MASK (1ul << TCMALLOC_CACHED_SLABS_BIT) // TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM defines whether or not we have an // implementation for the target OS and architecture. -#if defined(__linux__) && \ - (defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__)) +#if defined(__linux__) && (defined(__x86_64__) || defined(__aarch64__)) #define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 1 #else #define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 0 @@ -30,8 +42,6 @@ #define TCMALLOC_PERCPU_RSEQ_FLAGS 0x0 #if defined(__x86_64__) #define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x53053053 -#elif defined(__ppc__) -#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x0FE5000B #elif defined(__aarch64__) #define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0xd428bc00 #else @@ -51,25 +61,22 @@ #include #include -#include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/per_thread_tls.h" -#include "absl/base/macros.h" +#include "absl/base/attributes.h" #include "absl/base/optimization.h" -#include "tcmalloc/internal/atomic_danger.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/linux_syscall_support.h" #include "tcmalloc/internal/logging.h" -// TCMALLOC_PERCPU_USE_RSEQ defines whether TCMalloc support for RSEQ on the -// target architecture exists. We currently only provide RSEQ for 64-bit x86 and -// PPC binaries. -#if !defined(TCMALLOC_PERCPU_USE_RSEQ) -#if (ABSL_PER_THREAD_TLS == 1) && (TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM == 1) -#define TCMALLOC_PERCPU_USE_RSEQ 1 +// TCMALLOC_INTERNAL_PERCPU_USE_RSEQ defines whether TCMalloc support for RSEQ +// on the target architecture exists. We currently only provide RSEQ for 64-bit +// x86, Arm binaries. +#if !defined(TCMALLOC_INTERNAL_PERCPU_USE_RSEQ) +#if TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM == 1 +#define TCMALLOC_INTERNAL_PERCPU_USE_RSEQ 1 #else -#define TCMALLOC_PERCPU_USE_RSEQ 0 +#define TCMALLOC_INTERNAL_PERCPU_USE_RSEQ 0 #endif -#endif // !defined(TCMALLOC_PERCPU_USE_RSEQ) +#endif // !defined(TCMALLOC_INTERNAL_PERCPU_USE_RSEQ) GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -79,170 +86,200 @@ namespace percpu { inline constexpr int kRseqUnregister = 1; -// Internal state used for tracking initialization of RseqCpuId() +// Internal state used for tracking initialization of GetRealCpuUnsafe() inline constexpr int kCpuIdUnsupported = -2; inline constexpr int kCpuIdUninitialized = -1; inline constexpr int kCpuIdInitialized = 0; -#if TCMALLOC_PERCPU_USE_RSEQ -extern "C" ABSL_PER_THREAD_TLS_KEYWORD volatile kernel_rseq __rseq_abi; - -static inline int RseqCpuId() { return __rseq_abi.cpu_id; } - -static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { -#ifdef __x86_64__ - ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id) || - virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)); - return *reinterpret_cast(reinterpret_cast(&__rseq_abi) + - virtual_cpu_id_offset); -#else - ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id)); - return RseqCpuId(); -#endif -} -#else // !TCMALLOC_PERCPU_USE_RSEQ -static inline int RseqCpuId() { return kCpuIdUnsupported; } - -static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { - return kCpuIdUnsupported; -} +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ +// We provide a per-thread value (defined in percpu_rseq_asm.S) which both +// tracks thread-local initialization state and (with RSEQ) provides an atomic +// in-memory reference for this thread's execution CPU. This value is only +// valid when the thread is currently executing. +// Possible values: +// Unavailable/uninitialized: +// { kCpuIdUnsupported, kCpuIdUninitialized } +// Initialized, available: +// [0, NumCpus()) (Always updated at context-switch) +// +// CPU slabs region address caching. +// Calculation of the address of the current CPU slabs region is needed for +// allocation/deallocation fast paths, but is quite expensive. Due to variable +// shift and experimental support for "virtual CPUs", the calculation involves +// several additional loads and dependent calculations. Pseudo-code for the +// address calculation is as follows: +// +// cpu_offset = TcmallocSlab.virtual_cpu_id_offset_; +// cpu = *(&__rseq_abi + virtual_cpu_id_offset_); +// slabs_and_shift = TcmallocSlab.slabs_and_shift_; +// shift = slabs_and_shift & kShiftMask; +// shifted_cpu = cpu << shift; +// slabs = slabs_and_shift & kSlabsMask; +// slabs += shifted_cpu; +// +// To remove this calculation from fast paths, we cache the slabs address +// for the current CPU in thread local storage. However, when a thread is +// rescheduled to another CPU, we somehow need to understand that the cached +// address is not valid anymore. To achieve this, we overlap the top 4 bytes +// of the cached address with __rseq_abi.cpu_id_start. When a thread is +// rescheduled the kernel overwrites cpu_id_start with the current CPU number, +// which gives us the signal that the cached address is not valid anymore. +// To distinguish the high part of the cached address from the CPU number, +// we set the top bit in the cached address, real CPU numbers (<2^31) do not +// have this bit set. +// +// With these arrangements, slabs address calculation on allocation/deallocation +// fast paths reduces to load and check of the cached address: +// +// slabs = __rseq_abi[-4]; +// if ((slabs & (1 << 63)) == 0) goto slowpath; +// slabs &= ~(1 << 63); +// +// Note: here we assume little-endian byte order (which is the case for our +// supported architectures). On a little-endian arch, reading 8 bytes starting +// at __rseq_abi-4 gives __rseq_abi[-4...3]. So the tag bit (1<<63) is +// therefore from __rseq_abi[3]. That's also the most significant byte of +// __rseq_abi.cpu_id_start, hence real CPU numbers can't have this bit set +// (assuming <2^31 CPUs). +// +// The slow path does full slabs address calculation and caches it. +// +// Note: this makes __rseq_abi.cpu_id_start unusable for its original purpose. +// +// Since we need to export the __rseq_abi variable (as part of rseq ABI), +// we arrange overlapping of __rseq_abi and the preceding cached slabs +// address in percpu_rseq_asm.S (C++ is not capable of expressing that). +// __rseq_abi must be aligned to 32 bytes as per ABI. We want the cached slabs +// address to be contained within a single cache line (64 bytes), rather than +// split 2 cache lines. To achieve that we locate __rseq_abi in the second +// part of a cache line. +// For performance reasons we also collocate tcmalloc_sampler with __rseq_abi +// in the same cache line. +// InitPerCpu contains checks that the resulting data layout is as expected. + +// Top 4 bytes of this variable overlap with __rseq_abi.cpu_id_start. +extern "C" ABSL_CONST_INIT thread_local volatile uintptr_t tcmalloc_slabs + ABSL_ATTRIBUTE_INITIAL_EXEC; +extern "C" ABSL_CONST_INIT thread_local volatile kernel_rseq __rseq_abi + ABSL_ATTRIBUTE_INITIAL_EXEC; +extern "C" ABSL_CONST_INIT thread_local volatile int tcmalloc_cached_vcpu + ABSL_ATTRIBUTE_INITIAL_EXEC; + +// Provide weak definitions here to enable more efficient codegen. +// If compiler sees only extern declaration when generating accesses, +// then even with initial-exec model and -fno-PIE compiler has to assume +// that the definition may come from a dynamic library and has to use +// GOT access. When compiler sees even a weak definition, it knows the +// declaration will be in the current module and can generate direct accesses. +ABSL_CONST_INIT thread_local volatile uintptr_t tcmalloc_slabs + ABSL_ATTRIBUTE_WEAK = {}; +ABSL_CONST_INIT thread_local volatile kernel_rseq __rseq_abi + ABSL_ATTRIBUTE_WEAK = { + 0, static_cast(kCpuIdUninitialized), 0, 0, + {0, 0}, {{kCpuIdUninitialized, kCpuIdUninitialized}}, +}; +ABSL_CONST_INIT thread_local volatile int tcmalloc_cached_vcpu + ABSL_ATTRIBUTE_WEAK = kCpuIdUninitialized; + +inline int GetRealCpuUnsafe() { return __rseq_abi.cpu_id; } +#else // !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ +inline int GetRealCpuUnsafe() { return kCpuIdUnsupported; } #endif -typedef int (*OverflowHandler)(int cpu, size_t cl, void *item); -typedef void *(*UnderflowHandler)(int cpu, size_t cl); - // Functions below are implemented in the architecture-specific percpu_rseq_*.S // files. extern "C" { -int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, - intptr_t old_val, intptr_t new_val); - -#ifndef __x86_64__ -int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, - OverflowHandler f); -int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, - OverflowHandler f); -void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, - size_t shift); -void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, - UnderflowHandler f); -#endif // __x86_64__ - -// Push a batch for a slab which the Shift equal to -// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT -size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len); - -// Pop a batch for a slab which the Shift equal to -// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT -size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len); - -#ifdef __x86_64__ -int TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(int target_cpu, intptr_t *p, - intptr_t old_val, - intptr_t new_val); -size_t TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(void *ptr, size_t cl, - void **batch, - size_t len); -size_t TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(void *ptr, size_t cl, - void **batch, size_t len); -#endif -} +size_t TcmallocSlab_Internal_PushBatch(size_t size_class, void** batch, + size_t len); +size_t TcmallocSlab_Internal_PopBatch(size_t size_class, void** batch, + size_t len, + std::atomic* begin_ptr); +} // extern "C" // NOTE: We skirt the usual naming convention slightly above using "_" to // increase the visibility of functions embedded into the root-namespace (by // virtue of C linkage) in the supported case. -// Return whether we are using flat virtual CPUs. -bool UsingFlatVirtualCpus(); - -inline int GetCurrentCpuUnsafe() { -// On PowerPC, Linux maintains the current CPU in the bottom 12 bits of special -// purpose register SPRG3, which is readable from user mode. References: -// -// https://github.com/torvalds/linux/blob/164c09978cebebd8b5fc198e9243777dbaecdfa0/arch/powerpc/kernel/vdso.c#L727 -// https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L966 -// https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L593 -// https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-July/099011.html -// -// This is intended for VDSO syscalls, but is much faster if we simply inline it -// here, presumably due to the function call and null-check overheads of the -// VDSO version. As of 2014-07 the CPU time costs are something like 1.2 ns for -// the inline version vs 12 ns for VDSO. -#if defined(__PPC64__) && defined(__linux__) - uint64_t spr; - - // Mark the asm as volatile, so that it is not hoisted out of loops. - asm volatile("mfspr %0, 0x103;" : "=r"(spr)); +enum class RseqVcpuMode { kNone }; +inline RseqVcpuMode GetRseqVcpuMode() { return RseqVcpuMode::kNone; } - return spr & 0xfff; -#else - // Elsewhere, use the rseq mechanism. - return RseqCpuId(); -#endif +// Return whether we are using any kind of virtual CPUs. +inline bool UsingVirtualCpus() { + return GetRseqVcpuMode() != RseqVcpuMode::kNone; } -inline int GetCurrentCpu() { - // We can't use the unsafe version unless we have the appropriate version of - // the rseq extension. This also allows us a convenient escape hatch if the - // kernel changes the way it uses special-purpose registers for CPU IDs. - int cpu = GetCurrentCpuUnsafe(); +// Return whether we are using flat virtual CPUs (provided by kernel RSEQ). +bool UsingRseqVirtualCpus(); + +inline int GetRealCpu() { + // The "unsafe" variant strongly depends on RSEQ. + int cpu = GetRealCpuUnsafe(); // We open-code the check for fast-cpu availability since we do not want to - // force initialization in the first-call case. This so done so that we can - // use this in places where it may not always be safe to initialize and so - // that it may serve in the future as a proxy for callers such as - // CPULogicalId() without introducing an implicit dependence on the fast-path - // extensions. Initialization is also simply unneeded on some platforms. + // force initialization in the first-call case. This is done so that we can + // use this in places where it may not always be safe to initialize. + // Initialization is also simply unneeded on some platforms. if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) { return cpu; } #ifdef TCMALLOC_HAVE_SCHED_GETCPU cpu = sched_getcpu(); - ASSERT(cpu >= 0); + TC_ASSERT_GE(cpu, 0); #endif // TCMALLOC_HAVE_SCHED_GETCPU return cpu; } -inline int GetCurrentVirtualCpuUnsafe(const size_t virtual_cpu_id_offset) { - return VirtualRseqCpuId(virtual_cpu_id_offset); -} - -inline int GetCurrentVirtualCpu(const size_t virtual_cpu_id_offset) { - // We can't use the unsafe version unless we have the appropriate version of - // the rseq extension. This also allows us a convenient escape hatch if the - // kernel changes the way it uses special-purpose registers for CPU IDs. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); +// Static accessors functions for any kind of vCPU IDs, which transparently +// choose the right vCPU source based on the initialized mode. Wrapping it into +// a class helps to restrict access and avoid accidental misuse. +class VirtualCpu { + public: + // Returns the last vCPU ID since the last synchronization point. This may be + // used where the vCPU ID is not used to derive RSEQ-validated state. Returns + // kCpuIdUninitialized if this thread has never synchronized with a vCPU ID, + // or kCpuIdUnsupported if RSEQ is not used. + // + // This is safe, because without a RSEQ critical section to detect thread + // preemption, a thread may be preempted at any point and the virtual (or + // real) CPU may change. + static int get() { +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + return tcmalloc_cached_vcpu; +#else // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + return kCpuIdUnsupported; +#endif // TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + } - // We open-code the check for fast-cpu availability since we do not want to - // force initialization in the first-call case. This so done so that we can - // use this in places where it may not always be safe to initialize and so - // that it may serve in the future as a proxy for callers such as - // CPULogicalId() without introducing an implicit dependence on the fast-path - // extensions. Initialization is also simply unneeded on some platforms. - if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) { - return cpu; + // Returns the last vCPU ID since the last synchronization point. + // REQUIRES: Synchronize() has been called by this thread + static int GetAfterSynchronize() { + const int ret = get(); + TC_ASSERT_GE(ret, kCpuIdInitialized); + return ret; } -#ifdef TCMALLOC_HAVE_SCHED_GETCPU - cpu = sched_getcpu(); - ASSERT(cpu >= 0); -#endif // TCMALLOC_HAVE_SCHED_GETCPU + // Returns the current vCPU ID. Use to synchronize RSEQ-validated state that + // depends on the (per-CPU mutually exclusive) vCPU ID with the current vCPU + // ID after a thread preemption was detected. This function may be expensive, + // so it should only be called on slow paths. + static int Synchronize(); - return cpu; -} + private: + // The return value of Synchronize() may be overridden by tests if they define + // VirtualCpu::TestSynchronize(). + ABSL_ATTRIBUTE_WEAK static int TestSynchronize(); +}; bool InitFastPerCpu(); inline bool IsFast() { - if (!TCMALLOC_PERCPU_USE_RSEQ) { + if (!TCMALLOC_INTERNAL_PERCPU_USE_RSEQ) { return false; } - int cpu = RseqCpuId(); + int cpu = GetRealCpuUnsafe(); if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) { return true; @@ -258,10 +295,10 @@ inline bool IsFast() { // As IsFast(), but if this thread isn't already initialized, will not // attempt to do so. inline bool IsFastNoInit() { - if (!TCMALLOC_PERCPU_USE_RSEQ) { + if (!TCMALLOC_INTERNAL_PERCPU_USE_RSEQ) { return false; } - int cpu = RseqCpuId(); + int cpu = GetRealCpuUnsafe(); return ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized); } @@ -276,61 +313,47 @@ inline void CompilerBarrier() { // Internal tsan annotations, do not use externally. // Required as tsan does not natively understand RSEQ. -#ifdef THREAD_SANITIZER +#ifdef ABSL_HAVE_THREAD_SANITIZER extern "C" { -void __tsan_acquire(void *addr); -void __tsan_release(void *addr); +void __tsan_acquire(void* addr); +void __tsan_release(void* addr); } #endif // TSAN relies on seeing (and rewriting) memory accesses. It can't -// get at the memory acccesses we make from RSEQ assembler sequences, +// get at the memory accesses we make from RSEQ assembler sequences, // which means it doesn't know about the semantics our sequences // enforce. So if we're under TSAN, add barrier annotations. -inline void TSANAcquire(void *p) { -#ifdef THREAD_SANITIZER +inline void TSANAcquire(void* p) { +#ifdef ABSL_HAVE_THREAD_SANITIZER __tsan_acquire(p); #endif } -inline void TSANRelease(void *p) { -#ifdef THREAD_SANITIZER - __tsan_release(p); +inline void TSANAcquireBatch(void** batch, int n) { +#ifdef ABSL_HAVE_THREAD_SANITIZER + for (int i = 0; i < n; i++) { + __tsan_acquire(batch[i]); + } #endif } -inline void TSANMemoryBarrierOn(void *p) { - TSANAcquire(p); - TSANRelease(p); +inline void TSANRelease(void* p) { +#ifdef ABSL_HAVE_THREAD_SANITIZER + __tsan_release(p); +#endif } -// These methods may *only* be called if IsFast() has been called by the current -// thread (and it returned true). -inline int CompareAndSwapUnsafe(int target_cpu, std::atomic *p, - intptr_t old_val, intptr_t new_val, - const size_t virtual_cpu_id_offset) { - TSANMemoryBarrierOn(p); -#if TCMALLOC_PERCPU_USE_RSEQ - switch (virtual_cpu_id_offset) { - case offsetof(kernel_rseq, cpu_id): - return TcmallocSlab_Internal_PerCpuCmpxchg64( - target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p), - old_val, new_val); -#ifdef __x86_64__ - case offsetof(kernel_rseq, vcpu_id): - return TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU( - target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p), - old_val, new_val); -#endif // __x86_64__ - default: - __builtin_unreachable(); +inline void TSANReleaseBatch(void** batch, int n) { +#ifdef ABSL_HAVE_THREAD_SANITIZER + for (int i = 0; i < n; i++) { + __tsan_release(batch[i]); } -#else // !TCMALLOC_PERCPU_USE_RSEQ - __builtin_unreachable(); -#endif // !TCMALLOC_PERCPU_USE_RSEQ +#endif } -void FenceCpu(int cpu, const size_t virtual_cpu_id_offset); +void FenceCpu(int vcpu); +void FenceAllCpus(); } // namespace percpu } // namespace subtle diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_early_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_early_test.cc new file mode 100644 index 000000000000..6d6603ac2154 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_early_test.cc @@ -0,0 +1,39 @@ +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "tcmalloc/internal/percpu.h" + +namespace tcmalloc::tcmalloc_internal::subtle::percpu { +namespace { + +ABSL_CONST_INIT std::optional success; + +TEST(PerCpu, IsRegistered) { + // Verify preinit ran. Its success should be identical to running it after + // main has started. + EXPECT_TRUE(success.has_value()); + EXPECT_EQ(success, IsFast()); +} + +void register_rseq() { success = IsFast(); } + +__attribute__((section(".preinit_array"), + used)) void (*__local_install_factory_preinit)() = register_rseq; + +} // namespace +} // namespace tcmalloc::tcmalloc_internal::subtle::percpu diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S index 3cdaf17835d0..d6a684c130f0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S @@ -90,8 +90,7 @@ // distinct from label_start, as the return IP must be "signed" (see // SIGN_ABORT()). // -// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for -// relocations, but could be read-only for non-PIE builds. +// __rseq_cs only needs to be writeable to allow for relocations. #define DEFINE_UPSTREAM_CS(label) \ .pushsection __rseq_cs, "aw"; \ .balign 32; \ @@ -114,10 +113,10 @@ .globl label##_trampoline; \ .type label##_trampoline, @function; \ label##_trampoline: \ - .cfi_startproc; \ + CFI(.cfi_startproc); \ BTI_C; \ b .L##label##_abort; \ - .cfi_endproc; \ + CFI(.cfi_endproc); \ .size label##_trampoline, . - label##_trampoline; \ .popsection; @@ -141,8 +140,8 @@ label##_trampoline: \ #endif /* FETCH_CPU assumes &__rseq_abi is in x5. */ -#define FETCH_CPU(dest) \ - ldr dest, [x5, #4] /* cpuid is 32-bits */ +#define FETCH_CPU(dest, offset) ldrh dest, [x5, offset] +#define FETCH_SLABS(dest) ldr dest, [x5, TCMALLOC_RSEQ_SLABS_OFFSET] /* With PIE have initial-exec TLS, even in the presence of position independent code. */ @@ -198,309 +197,148 @@ label##_trampoline: \ /* start of atomic restartable sequences */ -/* - * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, - * long old_val, long new_val) - * w0: target_cpu - * x1: p - * x2: old_val - * x3: new_val - */ - .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64 - .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: - .cfi_startproc - BTI_C - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) - FETCH_CPU(w4) - cmp w0, w4 /* check cpu vs current_cpu */ - bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit - ldr x6, [x1] - cmp x6, x2 /* verify *p == old */ - bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch - str x3, [x1] -.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: - mov x0, x4 - ret /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch: - mov x0, #-1 /* mismatch versus "old" or "check", return -1 */ - ret - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) - -/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( - * void *ptr (x0), - * size_t cl (w1), - * void** batch (x2), - * size_t len (w3) { - * uint64_t r8 = __rseq_abi.cpu_id - * uint64_t* r8 = CpuMemoryStart(x0, r8) - * Header* hdr = r8 + w1 * 8 +/* size_t TcmallocSlab_Internal_PushBatch( + * size_t size_class (x0), + * void** batch (x1), + * size_t len (x2)) { + * uint64_t* r8 = tcmalloc_rseq.slabs; + * if ((r8 & TCMALLOC_CACHED_SLABS_BIT) == 0) return 0; + * r8 &= ~TCMALLOC_CACHED_SLABS_BIT; + * Header* hdr = r8 + r0 * 8 * uint64_t r9 = hdr->current (zero-extend 16bit) * uint64_t r10 = hdr->end (zero-extend 16bit) * if (r9 >= r10) return 0 - * r11 = r3 + * r11 = r2 * r10 = r9 + min(len, r10 - r9) * r13 = r9 + r10 * r9 = r8 + r9 * 8 * r14 = r8 + r13 * 8 * loop: - * r12 = *(r11-=8) (pre-index) Pop from Batch - * *(r9+=8) = r12 (post-index) Push to Slab + * r12 = *(r11 -= 8) (pre-index) Pop from Batch + * *(r9 += 8) = r12 (post-index) Push to Slab * if (r9 != r14) goto loop * hdr->current = r13 (16bit store) * return r10 * } */ .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift - .type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: - .cfi_startproc + .globl TcmallocSlab_Internal_PushBatch + .type TcmallocSlab_Internal_PushBatch, @function +TcmallocSlab_Internal_PushBatch: + CFI(.cfi_startproc) BTI_C - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) - FETCH_CPU(w8) - lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */ - add x8, x0, x8 - add x4, x8, x1, LSL #3 /* r4 = hdr */ - ldrh w9, [x4] /* r9 = current */ - ldrh w10, [x4, #6] /* r10 = end */ + START_RSEQ(TcmallocSlab_Internal_PushBatch) + FETCH_SLABS(x8) + tbz x8, #TCMALLOC_CACHED_SLABS_BIT, .LTcmallocSlab_Internal_PushBatch_no_capacity + and x8, x8, #~TCMALLOC_CACHED_SLABS_MASK + add x15, x8, x0, LSL #2 /* r15 = hdr */ + ldrh w9, [x15] /* r9 = current */ + ldrh w10, [x15, #2] /* r10 = end */ cmp w9, w10 - bge .LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity - add x11, x2, x3, LSL #3 /* r11 = batch + len * 8 */ + bge .LTcmallocSlab_Internal_PushBatch_no_capacity + add x11, x1, x2, LSL #3 /* r11 = batch + len * 8 */ sub w10, w10, w9 /* r10 = free capacity */ - cmp w3, w10 - csel w10, w3, w10, ls /* r10 = min(len, free capacity), amount we are + cmp w2, w10 + csel w10, w2, w10, ls /* r10 = min(len, free capacity), amount we are pushing */ add x13, x9, x10 /* r13 = current + amount we are pushing. */ add x9, x8, x9, LSL #3 /* r9 = current cpu slab stack */ add x14, x8, x13, LSL #3 /* r14 = new current address */ -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: + tst w10, #1 + beq .LTcmallocSlab_Internal_PushBatch_loop ldr x12, [x11, #-8]! /* r12 = [--r11] */ str x12, [x9], #8 /* [r9++] = r12 */ + cmp w10, #1 + beq .LTcmallocSlab_Internal_PushBatch_store +.LTcmallocSlab_Internal_PushBatch_loop: + ldr q4, [x11, #-16]! /* q4 = [r11 - 2], r11 -= 2 */ + str q4, [x9], #16 /* [r9 += 2] = q4 */ cmp x9, x14 /* if current cpu slab address == new current address */ - bne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop - strh w13, [x4] /* store new current index */ -.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: + bne .LTcmallocSlab_Internal_PushBatch_loop +.LTcmallocSlab_Internal_PushBatch_store: + strh w13, [x15] /* store new current index */ +.LTcmallocSlab_Internal_PushBatch_commit: mov x0, x10 ret -.LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity: +.LTcmallocSlab_Internal_PushBatch_no_capacity: mov x0, #0 ret - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) - -/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( - * void *ptr (x0), - * size_t cl (w1), - * void** batch (x2), - * size_t len (w3) { - * uint64_t r8 = __rseq_abi.cpu_id - * uint64_t* r8 = CpuMemoryStart(ptr, r8) - * Header* hdr = GetHeader(r8, cl) + CFI(.cfi_endproc) +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch) + +/* size_t TcmallocSlab_Internal_PopBatch( + * size_t size_class (x0), + * void** batch (x1), + * size_t len (x2), + * std::atomic* begin_ptr (x3)) { + * uint64_t* r8 = tcmalloc_rseq.slabs; + * if ((r8 & TCMALLOC_CACHED_SLABS_BIT) == 0) return 0; + * r8 &= ~TCMALLOC_CACHED_SLABS_BIT; + * Header* hdr = GetHeader(r8, size_class) * uint64_t r9 = hdr->current - * uint64_t r10 = hdr->begin + * uint64_t r10 = *begin_ptr * if (r9 <= r10) return 0 * r11 = min(len, r9 - r10) * r13 = r8 + r9 * 8 * r9 = r9 - r11 - * r12 = r2 - * r14 = r2 + r11 * 8 + * r12 = r1 + * r14 = r1 + r11 * 8 * loop: * r10 = *(r13 -= 8) (pre-index) Pop from slab - * *(r12+=8) = r10 (post-index) Push to Batch + * *(r12 += 8) = r10 (post-index) Push to Batch * if (r12 != r14) goto loop * hdr->current = r9 * return r11 * } */ .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift - .type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: - .cfi_startproc + .globl TcmallocSlab_Internal_PopBatch + .type TcmallocSlab_Internal_PopBatch, @function +TcmallocSlab_Internal_PopBatch: + CFI(.cfi_startproc) BTI_C - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) - FETCH_CPU(w8) - lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */ - add x8, x0, x8 - add x4, x8, x1, LSL #3 - ldrh w9, [x4] /* current */ - ldrh w10, [x4, #4] /* begin */ + START_RSEQ(TcmallocSlab_Internal_PopBatch) + FETCH_SLABS(x8) + tbz x8, #TCMALLOC_CACHED_SLABS_BIT, .LTcmallocSlab_Internal_PopBatch_no_items + and x8, x8, #~TCMALLOC_CACHED_SLABS_MASK + add x15, x8, x0, LSL #2 + ldrh w9, [x15] /* current */ + ldrh w10, [x3] /* begin */ cmp w10, w9 - bhs .LTcmallocSlab_Internal_PopBatch_FixedShift_no_items + bhs .LTcmallocSlab_Internal_PopBatch_no_items sub w11, w9, w10 /* r11 = available items */ - cmp w3, w11 - csel w11, w3, w11, ls /* r11 = min(len, available items), amount we are + cmp w2, w11 + csel w11, w2, w11, ls /* r11 = min(len, available items), amount we are popping */ add x13, x8, x9, LSL #3 /* r13 = current cpu slab stack */ sub x9, x9, x11 /* update new current */ - mov x12, x2 /* r12 = batch */ - add x14, x2, x11, LSL #3 /* r14 = batch + amount we are popping*8 */ -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: + mov x12, x1 /* r12 = batch */ + add x14, x1, x11, LSL #3 /* r14 = batch + amount we are popping*8 */ + tst w11, #1 + beq .LTcmallocSlab_Internal_PopBatch_loop ldr x10, [x13, #-8]! /* r10 = [--r13] */ str x10, [x12], #8 /* [r12++] = r10 */ + cmp w11, #1 + beq .LTcmallocSlab_Internal_PopBatch_store +.LTcmallocSlab_Internal_PopBatch_loop: + ldr q4, [x13, #-16]! /* q4 = [r13 - 2], r13 -= 2 */ + str q4, [x12], #16 /* [r12 += 2] = q4 */ cmp x12, x14 /* if current batch == batch + amount we are popping */ - bne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop - strh w9, [x4] /* store new current */ -.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: + bne .LTcmallocSlab_Internal_PopBatch_loop +.LTcmallocSlab_Internal_PopBatch_store: + strh w9, [x15] /* store new current */ +.LTcmallocSlab_Internal_PopBatch_commit: mov x0, x11 ret -.LTcmallocSlab_Internal_PopBatch_FixedShift_no_items: +.LTcmallocSlab_Internal_PopBatch_no_items: mov x0, #0 ret - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) - - .globl TcmallocSlab_Internal_Push - .type TcmallocSlab_Internal_Push, @function -TcmallocSlab_Internal_Push: -.LTcmallocSlab_Internal_Push_entry: - .cfi_startproc - // Arguments use: - // * x0: (Argument: Slabs*) cpu_0_slab_ptr - // * x1: (Argument: uintptr_t) cl - // * x2: (Argument: uintptr_t) p - // * w3: (Argument: size_t) shift - // * x4: (Argument: uintptr_t) f - // Return value: current CPU - // Available x5-x15 - - BTI_C - START_RSEQ(TcmallocSlab_Internal_Push) - FETCH_CPU(w8) - lsl x9, x8, x3 - add x9, x0, x9 - add x10, x9, x1, LSL #3 - ldrh w12, [x10] /* current */ - ldrh w11, [x10, #6] /* end */ - cmp w11, w12 - ble .LTcmallocSlab_Internal_Push_no_capacity - str x2, [x9, x12, LSL #3] - add w12, w12, #1 - strh w12, [x10] -.LTcmallocSlab_Internal_Push_commit: - mov x0, x8 - ret -.LTcmallocSlab_Internal_Push_no_capacity: - mov x0, x8 - TAILCALL(x4) -.LTcmallocSlab_Internal_Push_region3: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push) - - - .globl TcmallocSlab_Internal_Push_FixedShift - .type TcmallocSlab_Internal_Push_FixedShift, @function -TcmallocSlab_Internal_Push_FixedShift: - .cfi_startproc - // Arguments use: - // * x0: (Argument: Slabs*) cpu_0_slab_ptr - // * x1: (Argument: uintptr_t) cl - // * x2: (Argument: uintptr_t) p - // * x3: (Argument: uintptr_t) f - // Return value: current CPU - // Available x4-x15 - - BTI_C - START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) - FETCH_CPU(w8) - lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - add x9, x0, x9 - add x10, x9, x1, LSL #3 - ldrh w12, [x10] /* current */ - ldrh w11, [x10, #6] /* end */ - cmp w11, w12 - ble .LTcmallocSlab_Internal_Push_FixedShift_no_capacity - str x2, [x9, x12, LSL #3] - add w12, w12, #1 - strh w12, [x10] -.LTcmallocSlab_Internal_Push_FixedShift_commit: - mov x0, x8 - ret -.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: - mov x0, x8 - TAILCALL(x3) - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift) - - .globl TcmallocSlab_Internal_Pop_FixedShift - .type TcmallocSlab_Internal_Pop_FixedShift, @function -TcmallocSlab_Internal_Pop_FixedShift: - .cfi_startproc - // Arguments use: - // * x0: (Argument: Slabs*) cpu_0_slab_ptr - // * x1: (Argument: uintptr_t) cl - // * x2: (Argument: uintptr_t) f - // Return value: current CPU - // Available x3-x15 - - BTI_C - START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) - FETCH_CPU(w8) /* r8 = CPU */ - lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - /* r9 = CPU shifted */ - add x9, x0, x9 /* r9 = start of CPU region */ - add x10, x9, x1, LSL #3 /* r10 = start of slab header */ - ldrh w12, [x10] /* r12 = current index */ - ldrh w11, [x10, #4] /* r11 = begin index */ - cmp w11, w12 /* if begin >= current */ - bge .LTcmallocSlab_Internal_Pop_FixedShift_no_items - sub w12, w12, #1 /* r12 = current-- */ - ldr x3, [x9, x12, LSL #3] /* r3 = [start + current * 8] */ - strh w12, [x10] /* store new current index */ -.LTcmallocSlab_Internal_Pop_FixedShift_commit: - mov x0, x3 /* return popped item */ - ret -.LTcmallocSlab_Internal_Pop_FixedShift_no_items: - mov x0, x8 /* call overflow handler with CPU ID */ - TAILCALL(x2) - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift) - - .globl TcmallocSlab_Internal_Pop - .type TcmallocSlab_Internal_Pop, @function -TcmallocSlab_Internal_Pop: - .cfi_startproc - // Arguments use: - // * x0: (Argument: Slabs*) cpu_0_slab_ptr - // * x1: (Argument: uintptr_t) cl - // * x2: (Argument: uintptr_t) f - // * w3: (Argument: size_t) shift - // Return value: Value - // Available x4-x15 - - BTI_C - START_RSEQ(TcmallocSlab_Internal_Pop) - FETCH_CPU(w8) /* r8 = CPU ID */ - lsl x9, x8, x3 /* x9 = CPU shifted by (r3) */ - add x9, x0, x9 /* x9 = start of this CPU region */ - add x10, x9, x1, LSL #3 /* r10 = slab header addr */ - ldrh w12, [x10] /* r12 = current index */ - ldrh w11, [x10, #4] /* x11 = begin index */ - cmp w11, w12 /* if begin >= current */ - bge .LTcmallocSlab_Internal_Pop_no_items - sub w12, w12, #1 /* r12 = current-- */ - ldr x4, [x9, x12, LSL #3] /* r4 = [start + current * 8] */ - strh w12, [x10] /* update current index */ -.LTcmallocSlab_Internal_Pop_commit: - mov x0, x4 /* return popped item */ - ret -.LTcmallocSlab_Internal_Pop_no_items: - mov x0, x8 /* call overflow handler with CPU ID */ - TAILCALL(x2) - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop) + CFI(.cfi_endproc) +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch) .section .note.GNU-stack,"",@progbits diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S index 0219a2760a06..1219f8a89238 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S @@ -16,16 +16,51 @@ #include "tcmalloc/internal/percpu.h" +#ifdef __GCC_HAVE_DWARF2_CFI_ASM +#define CFI(...) __VA_ARGS__ +#else +#define CFI(...) +#endif + #if TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM #if defined(__x86_64__) #include "tcmalloc/internal/percpu_rseq_x86_64.S" -#elif defined(__ppc__) -#include "tcmalloc/internal/percpu_rseq_ppc.S" #elif defined(__aarch64__) #include "tcmalloc/internal/percpu_rseq_aarch64.S" #else #error "RSEQ support expected, but not found." #endif + +// See the comment about data layout in percpu.h for details. +.type tcmalloc_sampler, @object +.type tcmalloc_cached_vcpu, @object +.type tcmalloc_slabs, @object +.type __rseq_abi, @object +.section .tdata, "awT", @progbits +.globl tcmalloc_sampler +.globl tcmalloc_cached_vcpu +.globl tcmalloc_slabs +.globl __rseq_abi +.p2align 6 +.zero 64 + 32 - TCMALLOC_SAMPLER_SIZE - 8 +tcmalloc_sampler: +.zero TCMALLOC_SAMPLER_SIZE +tcmalloc_cached_vcpu: +.long 0xffffffff // cpu_id (kCpuIdUninitialized) +tcmalloc_slabs: +.long 0 +__rseq_abi: +.long 0 // cpu_id_start +.long 0xffffffff // cpu_id (kCpuIdUninitialized) +.quad 0 // rseq_cs +.long 0 // flags +.quad 0 // padding +.short 0xffff // numa_node_id (kCpuIdUninitialized) +.short 0xffff // vcpu_id (kCpuIdUninitialized) +.size __rseq_abi, 32 +.size tcmalloc_sampler, TCMALLOC_SAMPLER_SIZE +.size tcmalloc_slabs, 8 + #endif // TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM // We do not need an executable stack. Put this outside the @@ -33,9 +68,9 @@ // .note.GNU-stack section implies executable stack" errors. // // Cf. http://en.chys.info/2010/12/note-gnu-stack/ -#if defined(__arm__) || defined(__PPC64__) +#if defined(__arm__) .section .note.GNU-stack, "", %progbits #else .section .note.GNU-stack, "", @progbits -#endif // __arm__ || __PPC64__ +#endif // __arm__ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S deleted file mode 100644 index 234f28c2e7ea..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright 2019 The TCMalloc Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Rseq critical section functions and restart handlers. -// -// They must also avoid writing the nonvolatile and reserved general purpose -// registers defined by the Power Architecture 64-Bit ELF V2 ABI -// -// * r1-r2 -// * r13 -// * r14-r31 -// -// Finally, note that the restart handler reserves the right to clobber -// condition registers. This means that critical section functions must not -// explicitly or implicitly read condition registers outside of their -// [start, limit) critical regions. - -#ifndef __ppc__ -#error "percpu_rseq_ppc.S should only be included for PPC builds" -#endif - -#include "tcmalloc/internal/percpu.h" - -// Use the ELFv2 ABI. -.abiversion 2 -.section google_malloc, "ax" - -//////////////////////////////////////////////////////////////////////// -// Macros -//////////////////////////////////////////////////////////////////////// - -/* - * Provide a directive to specify the size of symbol "label", relative to the - * current location and its start. - */ -#define ENCODE_SIZE(label) .size label, . - label; - -// Place the CPU number into the bottom 12 bits of dst. The upper 52 bits are -// unspecified. -// -// See GetCurrentCpu() for notes on the implementation. -#define GET_CPU_UNMASKED(dst) \ - mfspr dst, 259 - -// Given an unmasked CPU number, put the interesting parts into dst. -#define MASK_CPU(dst, src) \ - clrldi dst, src, 52 - -// Like GET_CPU_UNMASKED, but guarantees that the upper bits are cleared. May -// be slower than the unmasked version. -#define GET_CPU(dst) \ - GET_CPU_UNMASKED(dst); \ - MASK_CPU(dst, dst) - -// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP -// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq -// syscall's signature parameter). This signature is used to annotate valid -// abort IPs (since rseq_cs could live in a user-writable segment). -#define SIGN_ABORT() \ - .long TCMALLOC_PERCPU_RSEQ_SIGNATURE; - -// DEFINE_UPSTREAM_CS triggers the generation of rseq_cs table (the triple of -// start, commit, abort IPs) and a trampoline function. -// -// Upstream API Exposition: -// -// START_RSEQ() // vvvvv emits a bunch of things -// global entry point: -// TOC setup -// METHOD_critical_abort: -// local entry point: -// store rseq_cs to __rseq_abi.rseq_cs, starting restartable sequence -// METHOD_start: // Emitted as part of START_RSEQ() -// // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -// -// GET_CPU...() // Reads current CPU -// ... -// single store // Commits sequence -// METHOD_critical_limit: -// ...return... -// -// START_RSEQ does several things: -// * We need to set up the TOC pointer for global entry points. -// * When restarting, we return to the local entry point, since the TOC pointer -// is left intact from the restart. METHOD_critical_abort and local entry -// point are therefore the same address. -// * It stores to the TLS to register that we're in a restartable sequence with -// the kernel. -// -// This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a -// (rodata) constant table, whose address is used to start the critical -// section, and the abort trampoline. -// -// The trampoline is used because: -// 1. Restarts are expected to be rare, so the extra jump when restarting is -// expected to be infrequent. -// 2. The upstream restartable sequence implementation expects the trailing 4 -// bytes of the abort PC to be "signed" (to prevent manipulation of the PC -// to an arbitrary choice). For us, this is -// TCMALLOC_PERCPU_RSEQ_SIGNATURE. This value is passed to the kernel -// during configuration of the rseq syscall. This would either need to be -// encoded as a nop* at the start of every restartable sequence, increasing -// instruction cache pressure, or placed directly before the entry point. -// -// * The upstream rseq protocol appears to be converging on using a trap -// instruction (twui), so we cannot allow it to appear anywhere in our -// actual executed path. -// -// Upon restart, the (upstream) kernel API clears the per-thread restartable -// sequence state. We return to METHOD_abort (rather than METHOD_start), as we -// need to reinitialize this value. - -// This macro defines a relocation associated with the provided label to keep -// section GC from discarding it independently of label. -#if !defined(__clang_major__) || __clang_major__ >= 9 -#define PINSECTION(label) .reloc 0, R_PPC64_NONE, label -#else -#define PINSECTION(label) -#endif - -// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for -// relocations, but could be read-only for non-PIE builds. -#define DEFINE_UPSTREAM_CS(label) \ - .pushsection __rseq_cs, "aw"; \ - .balign 32; \ - .protected __rseq_cs_##label; \ - .type __rseq_cs_##label,@object; \ - .size __rseq_cs_##label,32; \ - __rseq_cs_##label: \ - .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \ - .quad .L##label##_critical_start; \ - .quad .L##label##_critical_limit - .L##label##_critical_start; \ - .quad label##_trampoline; \ - PINSECTION(.L##label##array); \ - .popsection; \ - .pushsection __rseq_cs_ptr_array, "aw"; \ - .L##label##array: \ - .quad __rseq_cs_##label; \ - .popsection; \ - .pushsection rseq_trampoline, "ax"; \ - SIGN_ABORT(); \ - .globl label##_trampoline; \ - .type label##_trampoline, @function; \ -label##_trampoline: \ - .cfi_startproc; \ - b .L##label##_critical_abort; \ - .cfi_endproc; \ - .size label##_trampoline, . - label##_trampoline; \ - .popsection - -// With PIE: We have initial-exec TLS, even in the presence of position -// independent code. -#if !defined(__PIC__) || defined(__PIE__) - -#define START_RSEQ(label) \ - .L##label##_gep0: \ - addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \ - addi %r2, %r2, .TOC.-.L##label##_gep0@l; \ - .L##label##_critical_abort: \ - .L##label##_lep0: \ - .localentry label,.-label; \ - addis %r9, %r2, __rseq_cs_##label@toc@ha; \ - addi %r9, %r9, __rseq_cs_##label@toc@l; \ - addis %r10, %r13, __rseq_abi@tprel@ha; \ - addi %r10, %r10, __rseq_abi@tprel@l; \ - std %r9, 8(%r10); \ - .L##label##_critical_start: - -#else /* !defined(__PIC__) || defined(__PIE__) */ - -// Handle non-initial exec TLS. When performance matters, we should be using -// initial-exec TLS. -// -// We need to caller-save r3-r8, as they are our arguments to the actual -// restartable sequence code. - -#define START_RSEQ(label) \ - .L##label##_gep0: \ - addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \ - addi %r2, %r2, .TOC.-.L##label##_gep0@l; \ - .L##label##_critical_abort: \ - .L##label##_lep0: \ - .localentry label,.-label; \ - mflr 0; \ - std %r0, 0x10(1); \ - std %r3, -0x10(1); \ - std %r4, -0x18(1); \ - std %r5, -0x20(1); \ - std %r6, -0x28(1); \ - std %r7, -0x30(1); \ - std %r8, -0x38(1); \ - stdu %r1, -0x200(1); \ - bl tcmalloc_tls_fetch_pic; \ - nop; \ - mr %r10, %r3; \ - addi %r1, %r1, 0x200; \ - ld %r8, -0x38(1); \ - ld %r7, -0x30(1); \ - ld %r6, -0x28(1); \ - ld %r5, -0x20(1); \ - ld %r4, -0x18(1); \ - ld %r3, -0x10(1); \ - ld %r0, 0x10(1); \ - mtlr 0; \ - addis %r9, %r2, __rseq_cs_##label@toc@ha; \ - addi %r9, %r9, __rseq_cs_##label@toc@l; \ - std %r9, 8(%r10); \ - .L##label##_critical_start: - -#endif - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PerCpuCmpxchg64 -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_PerCpuCmpxchg64 -.type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: -.LTcmallocSlab_Internal_PerCpuCmpxchg64_entry: - .cfi_startproc - // Register use: - // - // * r3: (Argument: int64) target_cpu - // * r4: (Argument: intptr_t*) p - // * r5: (Argument: intptr_t) old_val - // * r6: (Argument: intptr_t) new_val - // * r7: The current CPU number. - // * r8: The current value of *p. - // - - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) - - // Are we running on the target CPU? - GET_CPU(%r7) - cmpd %r7, %r3 - bne .LCAS_wrong_cpu - - // Load the current value of *p. - ld %r8, 0(%r4) - - // Is the value up to date? - cmpd %r8, %r5 - bne .LCAS_wrong_value - - // Store the new value, committing the operation. - std %r6, 0(%r4) -.LTcmallocSlab_Internal_PerCpuCmpxchg64_critical_limit: - - // Return the target CPU, which is already in r3. - blr - -.LCAS_wrong_cpu: - // Return the current CPU. - mr %r3, %r7 - blr - -.LCAS_wrong_value: - // Return -1. - li %r3, -1 - blr - -.LTcmallocSlab_Internal_PerCpuCmpxchg64_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64); - - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Push -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_Push -.type TcmallocSlab_Internal_Push, @function -TcmallocSlab_Internal_Push: -.LTcmallocSlab_Internal_Push_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) p - // * r6: (Argument: size_t) shift - // * r7: (Argument: uintptr_t) f - // Return value: current CPU - // Available r8 r9 r10 r11 r12 - // Note that r12 may be overwritten in rseq_restart_address_internal so - // cannot be relied upon across restartable sequence boundaries. - - START_RSEQ(TcmallocSlab_Internal_Push) - - GET_CPU(%r8) // r8 = current CPU, includes MASK operation - sld %r9, %r8, %r6 // r9 = r8 << shift (r6) - add %r9, %r3, %r9 // r9 = start of this CPU region - rldicr %r10, %r4, 3, 60 // r10 = header offset for class size cl (r4) - add %r10, %r9, %r10 // r10 = slab header addr (class offset + CPU base) - lhz %r12, 0(%r10) // r12 = current index - lhz %r11, 6(%r10) // r11 = length - cmpld %cr7, %r11, %r12 // compare current index with length - ble %cr7, .LTcmallocSlab_Internal_Push_no_capacity - rldicr %r11, %r12, 3, 60 // r11 = offset of current index - addi %r12, %r12, 1 // current index += 1 - stdx %r5, %r9, %r11 // store pointer p (r5) into current offset - sth %r12, 0(%r10) // update current index - -.LTcmallocSlab_Internal_Push_critical_limit: - mr %r3, %r8 // Return current CPU in r3 - blr - -.LTcmallocSlab_Internal_Push_no_capacity: - mr %r3, %r8 // Place current CPU in r3 - // r7 already contains target function - b .LPushOverflowTrampoline - -.LTcmallocSlab_Internal_Push_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push); - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Push_FixedShift -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_Push_FixedShift -.type TcmallocSlab_Internal_Push_FixedShift, @function -TcmallocSlab_Internal_Push_FixedShift: -.LTcmallocSlab_Internal_Push_FixedShift_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) p - // * r6: (Argument: uintptr_t) f - - START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) - - GET_CPU_UNMASKED(%r7) // r7 = unmasked CPU - // Mask upper 52 bits of %r7 and shift left in single - // operation. Removes the need to have a separate - // MASK operation on the critical path. - clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - add %r8, %r3, %r8 // r8 = start of this CPU region - rldicr %r9, %r4, 3, 60 // r9 = start of header - add %r9, %r8, %r9 // r9 = slab header addr - lhz %r10, 0(%r9) // r10 = current index - lhz %r11, 6(%r9) // r11 = end index - cmpld %cr7, %r11, %r10 // Check for space - ble %cr7, .LTcmallocSlab_Internal_Push_FixedShift_no_capacity - rldicr %r11, %r10, 3, 60 // r11 = offset of current index - addi %r10, %r10, 1 // current index ++ - stdx %r5, %r8, %r11 // store the item (from r5) - sth %r10, 0(%r9) // store current index - -.LTcmallocSlab_Internal_Push_FixedShift_critical_limit: - MASK_CPU(%r3, %r7) // Return and mask CPU into %r3 - blr - -.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: - MASK_CPU(%r3, %r7) // Move and mask CPU into %r3 - mr %r7, %r6 // Move target function into r7 - b .LPushOverflowTrampoline - -.LTcmallocSlab_Internal_Push_FixedShift_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift); - - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Pop -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_Pop -.type TcmallocSlab_Internal_Pop, @function -TcmallocSlab_Internal_Pop: -.LTcmallocSlab_Internal_Pop_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) f - // * r6: (Argument: size_t) shift - // Available r7 r8 r9 r10 r11 - // r12 can be used as a temporary within rseq - - START_RSEQ(TcmallocSlab_Internal_Pop) - - GET_CPU(%r7) // r7 = CPU, includes mask operation - sld %r12, %r7, %r6 // r12 = CPU shifted by shift (r6) - add %r12, %r3, %r12 // r12 = start of this CPU region - rldicr %r8, %r4, 3, 60 // r8 = offset to class size - add %r8, %r12, %r8 // r8 = slab header addr for class size - lhz %r9, 0(%r8) // r9 = current index - lhz %r10, 4(%r8) // r10 = begin - cmpld %cr7, %r10, %r9 // Check that we have items to pop - bge %cr7, .LTcmallocSlab_Internal_Pop_no_item - subi %r9, %r9, 1 // r9 = current index -- - rldicr %r10, %r9, 3, 60 // r10 = offset to current item - ldx %r11, %r12, %r10 // load the item from base + index - sth %r9, 0(%r8) // store current index - -.LTcmallocSlab_Internal_Pop_critical_limit: - // Move the item into r3, now that it's safe to do so. - mr %r3, %r11 - blr - -.LTcmallocSlab_Internal_Pop_no_item: - mr %r3, %r7 // Place CPU into r3 - b .LPopUnderflowTrampoline - -.LTcmallocSlab_Internal_Pop_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop); - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Pop_FixedShift -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_Pop_FixedShift -.type TcmallocSlab_Internal_Pop_FixedShift, @function -TcmallocSlab_Internal_Pop_FixedShift: -.LTcmallocSlab_Internal_Pop_FixedShift_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) f - - START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) - - GET_CPU_UNMASKED(%r6) // r6 = current CPU - // Following instruction combines mask and shift - clrlsldi %r7, %r6, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - // r7 = header offset - add %r7, %r3, %r7 // r7 = start of this CPU region - rldicr %r8, %r4, 3, 60 // r8 = offset of size class - add %r8, %r7, %r8 // r8 = slab header addr - lhz %r9, 0(%r8) // r9 = current index - lhz %r10, 4(%r8) // r10 = begin index - cmpld %cr7, %r10, %r9 // Check that there are elements available - bge %cr7, .LTcmallocSlab_Internal_Pop_FixedShift_no_item - subi %r9, %r9, 1 // current index -- - rldicr %r10, %r9, 3, 60 // r10 = offset of current index - ldx %r11, %r7, %r10 // r11 = load the item - sth %r9, 0(%r8) // update current index - -.LTcmallocSlab_Internal_Pop_FixedShift_critical_limit: - // Move the item into r3, now that it's safe to do so. - mr %r3, %r11 - blr - -.LTcmallocSlab_Internal_Pop_FixedShift_no_item: - MASK_CPU(%r3, %r6) // Extract CPU from unmasked value in %r6 - b .LPopUnderflowTrampoline - -.LTcmallocSlab_Internal_Pop_FixedShift_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift); - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PushBatch_FixedShift -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_PushBatch_FixedShift -.type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: -.LTcmallocSlab_Internal_PushBatch_FixedShift_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) batch - // * r6: (Argument: uintptr_t) len - - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) - - GET_CPU_UNMASKED(%r7) - clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - add %r8, %r3, %r8 // r8 - start of this CPU region - sldi %r9, %r4, 3 - add %r9, %r8, %r9 // r9 - slab header addr - lhz %r10, 0(%r9) // r10 - current - lhz %r11, 6(%r9) // r11 - end - sldi %r7, %r6, 3 // r7 - len * 8 - cmpld %cr7, %r11, %r10 // current < end? - ble %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit - sub %r11, %r11, %r10 // r11 - available capacity - // r11 = min(r11, r6) - cmpld %cr7, %r6, %r11 - bge %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_min - mr %r11, %r6 -.LTcmallocSlab_Internal_PushBatch_FixedShift_min: - add %r11, %r10, %r11 - sldi %r11, %r11, 3 - sldi %r10, %r10, 3 - - // At this point: - // r5 - batch, r7 - offset in the batch - // r8 - cpu region, r10 - offset into the cpu region, r11 - limit of offset -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: - subi %r7, %r7, 8 - ldx %r12, %r5, %r7 // load the item - stdx %r12, %r8, %r10 // store the item - addi %r10, %r10, 8 - cmpld %cr7, %r10, %r11 - bne %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_loop - rotrdi %r10, %r10, 3 - sth %r10, 0(%r9) // update current - -.LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit: - // return r6 - r7 / 8 - rotrdi %r7, %r7, 3 - sub %r3, %r6, %r7 - blr - -.LTcmallocSlab_Internal_PushBatch_FixedShift_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift); - -//////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PopBatch_FixedShift -//////////////////////////////////////////////////////////////////////// - -.globl TcmallocSlab_Internal_PopBatch_FixedShift -.type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: -.LTcmallocSlab_Internal_PopBatch_FixedShift_entry: - .cfi_startproc - // Arguments use: - // * r3: (Argument: Slabs*) cpu_0_slab_ptr - // * r4: (Argument: uintptr_t) cl - // * r5: (Argument: uintptr_t) batch - // * r6: (Argument: uintptr_t) len - - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) - - GET_CPU_UNMASKED(%r7) - clrlsldi %r7, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT - add %r7, %r3, %r7 // r7 - start of this CPU region - sldi %r8, %r4, 3 - add %r8, %r7, %r8 // r8 - slab header addr - lhz %r9, 0(%r8) // r9 - current - lhz %r10, 4(%r8) // r10 - begin - li %r11, 0 // current position in batch - cmpld %cr7, %r10, %r9 - bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit - sub %r10, %r9, %r10 // r10 - available items - // r10 = min(r10, r6) - cmpld %cr7, %r6, %r10 - bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_min - mr %r10, %r6 -.LTcmallocSlab_Internal_PopBatch_FixedShift_min: - sub %r10, %r9, %r10 - sldi %r10, %r10, 3 - sldi %r9, %r9, 3 - - // At this point: - // r5 - batch, r11 - offset in the batch - // r7 - cpu region, r9 - offset into the cpu region, r10 - limit of offset -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: - subi %r9, %r9, 8 - ldx %r12, %r7, %r9 // load the item - stdx %r12, %r5, %r11 // store the item - addi %r11, %r11, 8 - cmpld %cr7, %r9, %r10 - bne %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_loop - rotrdi %r9, %r9, 3 - sth %r9, 0(%r8) // update current - -.LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit: - rotrdi %r3, %r11, 3 - blr - -.LTcmallocSlab_Internal_PopBatch_FixedShift_function_limit: - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift); - - // Input: r7 points to the function to tail call. r3...r6 are args for it. -.LPushOverflowTrampoline: - mtctr %r7 - mr %r12, %r7 // Callee expects r12 to point to its first instruction. - bctr - - // Input: r5 points to the function to tail call. r3...r4 are args for it. -.LPopUnderflowTrampoline: - mtctr %r5 - mr %r12, %r5 // Callee expects r12 to point to its first instruction. - bctr - -.section .note.GNU-stack,"",%progbits - diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc index 1438d8c3d826..249157d06749 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc @@ -15,7 +15,6 @@ // Provides skeleton RSEQ functions which raise a hard error in the case of // being erroneously called on an unsupported platform. -#include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/percpu.h" #if !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM @@ -27,53 +26,31 @@ namespace subtle { namespace percpu { static void Unsupported() { - Crash(kCrash, __FILE__, __LINE__, - "RSEQ function called on unsupported platform."); + TC_BUG("RSEQ function called on unsupported platform."); } -int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, - intptr_t old_val, intptr_t new_val) { +int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t* p, + intptr_t old_val, intptr_t new_val, + size_t virtual_cpu_id_offset) { Unsupported(); return -1; } -int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, - OverflowHandler f) { - Unsupported(); - return -1; -} - -int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, - OverflowHandler f) { - Unsupported(); - return -1; -} - -void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, - size_t shift) { - Unsupported(); - return nullptr; -} - -void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, - UnderflowHandler f) { - Unsupported(); - return nullptr; -} - -size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len) { +size_t TcmallocSlab_Internal_PushBatch(size_t size_class, void** batch, + size_t len, uintptr_t slabs_and_shift, + size_t virtual_cpu_id_offset) { Unsupported(); return 0; } -size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len) { +size_t TcmallocSlab_Internal_PopBatch(size_t size_class, void** batch, + size_t len, uintptr_t slabs_and_shift, + size_t virtual_cpu_id_offset) { Unsupported(); return 0; } -int PerCpuReadCycleCounter(int64_t *cycles) { +int PerCpuReadCycleCounter(int64_t* cycles) { Unsupported(); return -1; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S index 866f4f90ca10..797fec5572fd 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S @@ -75,8 +75,7 @@ // distinct from label_start, as the return IP must be "signed" (see // SIGN_ABORT()). // -// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for -// relocations, but could be read-only for non-PIE builds. +// __rseq_cs only needs to be writeable to allow for relocations. #define DEFINE_UPSTREAM_CS(label) \ .pushsection __rseq_cs, "aw"; \ .balign 32; \ @@ -98,9 +97,9 @@ .globl label##_trampoline; \ .type label##_trampoline, @function; \ label##_trampoline: \ - .cfi_startproc; \ + CFI(.cfi_startproc); \ jmp .L##label##_abort; \ - .cfi_endproc; \ + CFI(.cfi_endproc); \ .size label##_trampoline, . - label##_trampoline; // This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP @@ -134,13 +133,15 @@ label##_trampoline: \ /* With PIE; have initial-exec TLS, even in the presence of position independent code. */ #if !defined(__PIC__) || defined(__PIE__) -#define FETCH_CPU(dest) movl %fs:__rseq_abi@TPOFF+4, dest; -#define FETCH_VCPU(dest) movzwl %fs:__rseq_abi@TPOFF+30, dest; -#define START_RSEQ(src) \ - .L##src##_abort: \ - leaq __rseq_cs_##src(%rip), %rax; \ - movq %rax, %fs:__rseq_abi@TPOFF+8; \ - .L##src##_start: +#define FETCH_CPU(dest, offset) \ + movzwl %fs:__rseq_abi@TPOFF(offset), dest; +#define FETCH_SLABS(dest) \ + movq %fs:__rseq_abi@TPOFF + TCMALLOC_RSEQ_SLABS_OFFSET, dest +#define START_RSEQ(src) \ + .L##src##_abort: \ + leaq __rseq_cs_##src(%rip), %rax; \ + movq %rax, %fs:__rseq_abi@TPOFF+8; \ + .L##src##_start: #else /* !defined(__PIC__) || defined(__PIE__) */ @@ -151,16 +152,14 @@ label##_trampoline: \ * tcmalloc_tls_fetch_pic does not appear in the restartable sequence's address * range. */ -#define FETCH_CPU(dest) \ - movl 4(%rax), dest; /* cpuid is 32-bits */ -#define FETCH_VCPU(dest) \ - movzwl 30(%rax), dest; /* vcpu_id is 16-bits */ +#define FETCH_CPU(dest, offset) movzwl (%rax, offset), dest; +#define FETCH_SLABS(dest) movq TCMALLOC_RSEQ_SLABS_OFFSET(%rax), dest #define START_RSEQ(src) \ .L##src##_abort: \ call tcmalloc_internal_tls_fetch_pic@PLT; \ leaq __rseq_cs_##src(%rip), %r11; \ movq %r11, 8(%rax); \ - .L##src##_start: + .L##src##_start: /* * We can safely call this function from within an RSEQ section as it only @@ -170,12 +169,12 @@ label##_trampoline: \ .local tcmalloc_internal_tls_fetch_pic .type tcmalloc_internal_tls_fetch_pic, @function tcmalloc_internal_tls_fetch_pic: - .cfi_startproc + CFI(.cfi_startproc) push %rbp - .cfi_def_cfa_offset 16 - .cfi_offset 6, -16 + CFI(.cfi_def_cfa_offset 16) + CFI(.cfi_offset 6, -16) mov %rsp, %rbp - .cfi_def_cfa_register 6 + CFI(.cfi_def_cfa_register 6) sub $0x30, %rsp mov %rsi, -0x08(%rbp) /* atypical abi: tcmalloc_tls_fetch_pic preserves regs */ mov %rdi, -0x10(%rbp) @@ -201,10 +200,10 @@ tcmalloc_internal_tls_fetch_pic: mov -0x30(%rbp), %r9 add $0x30, %rsp leave - .cfi_def_cfa_register 7 - .cfi_def_cfa_offset 8 + CFI(.cfi_def_cfa_register 7) + CFI(.cfi_def_cfa_offset 8) ret; /* &__rseq_abi in %rax */ - .cfi_endproc + CFI(.cfi_endproc) ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic) #endif /* !defined(__PIC__) || defined(__PIE__) */ @@ -212,73 +211,19 @@ ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic) /* start of atomic restartable sequences */ -/* - * NOTE: We don't use cmpxchgq in the following functions since this would - make checking the success of our commit operation dependent on flags (which - * are in turn clobbered by the restart region) -- furthermore we can't just - * retry to fill in the flags since the restarted cmpxchg may have actually - * succeeded; spuriously failing subsequent attempts. - */ - -/* - * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, - * long old_val, long new_val) - */ - .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64 - .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64); - FETCH_CPU(%eax); - cmp %eax, %edi; /* check cpu vs current_cpu */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit; - cmp %rdx, (%rsi); /* verify *p == old */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch; - mov %rcx, (%rsi); -.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: - ret; /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch: - mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */ - ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) - - .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU - .type TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU, @function -TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU); - FETCH_VCPU(%eax); - cmp %eax, %edi; /* check cpu vs current_cpu */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit; - cmp %rdx, (%rsi); /* verify *p == old */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch; - mov %rcx, (%rsi); -.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit: - ret; /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch: - mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */ - ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) - -/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( - * void *ptr (%rdi), - * size_t cl (%rsi), - * void** batch (%rdx), - * size_t len (%rcx) { - * uint64_t r8 = __rseq_abi.cpu_id; - * uint64_t* r8 = CpuMemoryStart(rdi, r8); - * Header* hdr = r8 + rsi * 8; +/* size_t TcmallocSlab_Internal_PushBatch( + * size_t size_class (%rdi), + * void** batch (%rsi), + * size_t len (%rdx)) { + * uint64_t* r8 = tcmalloc_rseq.slabs; + * if ((r8 & TCMALLOC_CACHED_SLABS_BIT) == 0) return 0; + * r8 &= ~TCMALLOC_CACHED_SLABS_BIT; + * Header* hdr = r8 + rdi * 8; * uint64_t r9 = hdr->current; * uint64_t r10 = hdr->end; * if (r9 >= r10) return 0; - * r11 = rcx; - * r10 = r9 + min(rcx, r10 - r9); + * r11 = rdx; + * r10 = r9 + min(rdx, r10 - r9); * loop: * r11--; * rax = batch[r11]; @@ -286,97 +231,59 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) * r9++; * if (r9 != r10) goto loop; * hdr->current = r9; - * return rcx - r11; + * return rdx - r11; * } */ .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift - .type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift); - FETCH_CPU(%r8d); - shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; - /* multiply cpu by 256k */ - lea (%rdi, %r8), %r8; - movzwq (%r8, %rsi, 8), %r9; /* current */ - movzwq 6(%r8, %rsi, 8), %r10; /* end */ + .globl TcmallocSlab_Internal_PushBatch + .type TcmallocSlab_Internal_PushBatch, @function +TcmallocSlab_Internal_PushBatch: + CFI(.cfi_startproc) + START_RSEQ(TcmallocSlab_Internal_PushBatch); + FETCH_SLABS(%r8); + btrq $TCMALLOC_CACHED_SLABS_BIT, %r8; + jnc .LTcmallocSlab_Internal_PushBatch_full; + movzwq (%r8, %rdi, 4), %r9; /* current */ + movzwq 2(%r8, %rdi, 4), %r10; /* end */ cmpq %r10, %r9; - jae .LTcmallocSlab_Internal_PushBatch_FixedShift_full; - movq %rcx, %r11; /* r11 = copy of len */ + jae .LTcmallocSlab_Internal_PushBatch_full; + movq %rdx, %r11; /* r11 = copy of len */ subq %r9, %r10; /* r10 = free capacity */ - cmpq %rcx, %r10; - cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */ + cmpq %rdx, %r10; + cmovaq %rdx, %r10; /* r10 = min(len, free capacity) */ addq %r9, %r10; -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PushBatch_loop: decq %r11; - movq (%rdx, %r11, 8), %rax; + movq (%rsi, %r11, 8), %rax; movq %rax, (%r8, %r9, 8); incq %r9; cmpq %r9, %r10; - jne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop - movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: - movq %rcx, %rax; + jne .LTcmallocSlab_Internal_PushBatch_loop + movw %r9w, (%r8, %rdi, 4); +.LTcmallocSlab_Internal_PushBatch_commit: + movq %rdx, %rax; subq %r11, %rax; ret; -.LTcmallocSlab_Internal_PushBatch_FixedShift_full: +.LTcmallocSlab_Internal_PushBatch_full: xor %rax, %rax; ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) + CFI(.cfi_endproc) +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch) - .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift_VCPU - .type TcmallocSlab_Internal_PushBatch_FixedShift_VCPU, @function -TcmallocSlab_Internal_PushBatch_FixedShift_VCPU: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU); - FETCH_VCPU(%r8d); - shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; - /* multiply cpu by 256k */ - lea (%rdi, %r8), %r8; - movzwq (%r8, %rsi, 8), %r9; /* current */ - movzwq 6(%r8, %rsi, 8), %r10; /* end */ - cmpq %r10, %r9; - jae .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full; - movq %rcx, %r11; /* r11 = copy of len */ - subq %r9, %r10; /* r10 = free capacity */ - cmpq %rcx, %r10; - cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */ - addq %r9, %r10; -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop: - decq %r11; - movq (%rdx, %r11, 8), %rax; - movq %rax, (%r8, %r9, 8); - incq %r9; - cmpq %r9, %r10; - jne .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop - movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_commit: - movq %rcx, %rax; - subq %r11, %rax; - ret; -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full: - xor %rax, %rax; - ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) - -/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( - * void *ptr (%rdi), - * size_t cl (%rsi), - * void** batch (%rdx), - * size_t len (%rcx) { - * uint64_t r8 = __rseq_abi.cpu_id; - * uint64_t* r8 = CpuMemoryStart(rdi, r8); - * Header* hdr = GetHeader(rdi, rax, cl); +/* size_t TcmallocSlab_Internal_PopBatch( + * size_t size_class (%rdi), + * void** batch (%rsi), + * size_t len (%rdx), + * std::atomic* begin_ptr (%rcx)) { + * uint64_t* r8 = tcmalloc_rseq.slabs; + * if ((r8 & TCMALLOC_CACHED_SLABS_BIT) == 0) return 0; + * r8 &= ~TCMALLOC_CACHED_SLABS_BIT; + * Header* hdr = GetHeader(rdi, rax, size_class); * uint64_t r9 = hdr->current; - * uint64_t r10 = hdr->begin; + * uint64_t r10 = *begin_ptr; * if (r9 <= r10) return 0; - * r11 = min(rcx, r9 - r10); + * r11 = min(rdx, r9 - r10); * rax = 0; * loop: * r9--; @@ -389,75 +296,35 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) * } */ .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift - .type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift); - FETCH_CPU(%r8d); - shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; - /* multiply cpu by 256k */ - lea (%rdi, %r8), %r8; - movzwq (%r8, %rsi, 8), %r9; /* current */ - movzwq 4(%r8, %rsi, 8), %r10; /* begin */ - cmp %r10, %r9; - jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_empty; - movq %r9, %r11; - subq %r10, %r11; /* r11 = available items */ - cmpq %rcx, %r11; - cmovaq %rcx, %r11; /* r11 = min(len, available items) */ + .globl TcmallocSlab_Internal_PopBatch + .type TcmallocSlab_Internal_PopBatch, @function +TcmallocSlab_Internal_PopBatch: + CFI(.cfi_startproc) + START_RSEQ(TcmallocSlab_Internal_PopBatch); + FETCH_SLABS(%r8); xorq %rax, %rax; -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: - decq %r9; - movq (%r8, %r9, 8), %r10; - movq %r10, (%rdx, %rax, 8); - incq %rax; - cmpq %rax, %r11; - jne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop - movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: - ret; -.LTcmallocSlab_Internal_PopBatch_FixedShift_empty: - xor %rax, %rax; - ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) - - .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift_VCPU - .type TcmallocSlab_Internal_PopBatch_FixedShift_VCPU, @function -TcmallocSlab_Internal_PopBatch_FixedShift_VCPU: - .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU); - FETCH_VCPU(%r8d); - shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; - /* multiply cpu by 256k */ - lea (%rdi, %r8), %r8; - movzwq (%r8, %rsi, 8), %r9; /* current */ - movzwq 4(%r8, %rsi, 8), %r10; /* begin */ + btrq $TCMALLOC_CACHED_SLABS_BIT, %r8; + jnc .LTcmallocSlab_Internal_PopBatch_commit; + movzwq (%r8, %rdi, 4), %r9; /* current */ + movzwq (%rcx), %r10; /* begin */ cmp %r10, %r9; - jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty; + jbe .LTcmallocSlab_Internal_PopBatch_commit; movq %r9, %r11; subq %r10, %r11; /* r11 = available items */ - cmpq %rcx, %r11; - cmovaq %rcx, %r11; /* r11 = min(len, available items) */ - xorq %rax, %rax; -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop: + cmpq %rdx, %r11; + cmovaq %rdx, %r11; /* r11 = min(len, available items) */ +.LTcmallocSlab_Internal_PopBatch_loop: decq %r9; movq (%r8, %r9, 8), %r10; - movq %r10, (%rdx, %rax, 8); + movq %r10, (%rsi, %rax, 8); incq %rax; cmpq %rax, %r11; - jne .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop - movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_commit: - ret; -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty: - xor %rax, %rax; + jne .LTcmallocSlab_Internal_PopBatch_loop + movw %r9w, (%r8, %rdi, 4); +.LTcmallocSlab_Internal_PopBatch_commit: ret; - .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) + CFI(.cfi_endproc) +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch) .section .note.GNU-stack,"",@progbits diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h index 91d15ba9088d..ea396b775d54 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,49 +16,82 @@ #ifndef TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_ #define TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_ +#if defined(__linux__) +#include +#else +#include +#endif +#include + +#include +#include #include +#include #include +#include +#include +#include "absl/base/attributes.h" #include "absl/base/casts.h" #include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/sysinfo.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/numeric/bits.h" +#include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/mincore.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal/prefetch.h" +#include "tcmalloc/internal/sysinfo.h" -#if defined(TCMALLOC_PERCPU_USE_RSEQ) -#if !defined(__clang__) -#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1 -#elif __clang_major__ >= 9 && !__has_feature(speculative_load_hardening) -// asm goto requires the use of Clang 9 or newer: -// https://releases.llvm.org/9.0.0/tools/clang/docs/ReleaseNotes.html#c-language-changes-in-clang -// -// SLH (Speculative Load Hardening) builds do not support asm goto. We can -// detect these compilation modes since -// https://github.com/llvm/llvm-project/commit/379e68a763097bed55556c6dc7453e4b732e3d68. -#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1 #if __clang_major__ >= 11 -#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT 1 -#endif - -#else -#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0 -#endif -#else -#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0 +#define TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT 1 #endif GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { +struct PerSizeClassMaxCapacity { + size_t size_class; + size_t max_capacity; +}; + struct PerCPUMetadataState { size_t virtual_size; size_t resident_size; }; +struct ResizeSlabsInfo { + void* old_slabs; + size_t old_slabs_size; +}; + namespace subtle { namespace percpu { +enum class Shift : uint8_t; +constexpr uint8_t ToUint8(Shift shift) { return static_cast(shift); } +constexpr Shift ToShiftType(size_t shift) { + TC_ASSERT_EQ(ToUint8(static_cast(shift)), shift); + return static_cast(shift); +} + +// The allocation size for the slabs array. +inline size_t GetSlabsAllocSize(Shift shift, int num_cpus) { + return static_cast(num_cpus) << ToUint8(shift); +} + +// Since we lazily initialize our slab, we expect it to be mmap'd and not +// resident. We align it to a page size so neighboring allocations (from +// TCMalloc's internal arena) do not necessarily cause the metadata to be +// faulted in. +// +// We prefer a small page size (EXEC_PAGESIZE) over the anticipated huge page +// size to allow small-but-slow to allocate the slab in the tail of its +// existing Arena block. +static constexpr std::align_val_t kPhysicalPageAlign{EXEC_PAGESIZE}; + // Tcmalloc slab for per-cpu caching mode. // Conceptually it is equivalent to an array of NumClasses PerCpuSlab's, // and in fallback implementation it is implemented that way. But optimized @@ -69,1204 +103,1322 @@ namespace percpu { template class TcmallocSlab { public: + using DrainHandler = absl::FunctionRef; + using ShrinkHandler = + absl::FunctionRef; + + // We use a single continuous region of memory for all slabs on all CPUs. + // This region is split into NumCPUs regions of a power-of-2 size + // (32/64/128/256/512k). + // First NumClasses words of each CPU region are occupied by slab + // headers (Header struct). The remaining memory contain slab arrays. + // struct Slabs { + // std::atomic header[NumClasses]; + // void* mem[]; + // }; + constexpr TcmallocSlab() = default; // Init must be called before any other methods. - // is memory allocation callback (e.g. malloc). - // callback returns max capacity for size class . - // indicates that per-CPU slabs should be populated on demand + // is memory for the slabs with size corresponding to . + // callback returns max capacity for size class . // indicates the number of bits to shift the CPU ID in order to - // obtain the location of the per-CPU slab. If this parameter matches - // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in - // percpu_intenal.h then the assembly language versions of push/pop - // batch can be used; otherwise batch operations are emulated. + // obtain the location of the per-CPU slab. // // Initial capacity is 0 for all slabs. - void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), bool lazy, - size_t shift); - - // Only may be called if Init(..., lazy = true) was used. - void InitCPU(int cpu, size_t (*capacity)(size_t cl)); - - // For tests. - void Destroy(void(free)(void*)); + void Init(absl::FunctionRef alloc, + void* slabs, absl::FunctionRef capacity, + Shift shift); - // Number of elements in cpu/cl slab. - size_t Length(int cpu, size_t cl) const; + void InitSlabs(void* slabs, Shift shift, + absl::FunctionRef capacity); - // Number of elements (currently) allowed in cpu/cl slab. - size_t Capacity(int cpu, size_t cl) const; - - // If running on cpu, increment the cpu/cl slab's capacity to no greater than - // min(capacity+len, max_cap) and return the increment applied. Otherwise - // return 0. Note: max_cap must be the same as returned by capacity callback - // passed to Init. - size_t Grow(int cpu, size_t cl, size_t len, size_t max_cap); - - // If running on cpu, decrement the cpu/cl slab's capacity to no less than - // max(capacity-len, 0) and return the actual decrement applied. Otherwise - // return 0. - size_t Shrink(int cpu, size_t cl, size_t len); + // Lazily initializes the slab for a specific cpu. + // callback returns max capacity for size class . + // + // Prior to InitCpu being called on a particular `cpu`, non-const operations + // other than Push/Pop/PushBatch/PopBatch are invalid. + void InitCpu(int cpu, absl::FunctionRef capacity); + + // Update maximum capacities allocated to each size class. + // Build and initialize so as to use new maximum capacities + // provided by callback for the . + // updates capacities for the with the new + // once the slabs are initialized. + // provides an array of new maximum capacities to be + // updated for size classes. + // provides the number of size classes for which the + // capacity needs to be updated. + // callback drains the old slab. + [[nodiscard]] ResizeSlabsInfo UpdateMaxCapacities( + void* new_slabs, absl::FunctionRef capacity, + absl::FunctionRef update_capacity, + absl::FunctionRef populated, DrainHandler drain_handler, + PerSizeClassMaxCapacity* new_max_capacity, int classes_to_resize); + + // Grows or shrinks the size of the slabs to use the value. First + // we initialize , then lock all headers on the old slabs, + // atomically update to use the new slabs, and teardown the old slabs. Returns + // a pointer to old slabs to be madvised away along with the size of the old + // slabs and the number of bytes that were reused. + // + // is memory allocation callback (e.g. malloc). + // callback returns max capacity for size class . + // returns whether the corresponding cpu has been populated. + // + // Caller must ensure that there are no concurrent calls to InitCpu, + // ShrinkOtherCache, or Drain. + [[nodiscard]] ResizeSlabsInfo ResizeSlabs( + Shift new_shift, void* new_slabs, + absl::FunctionRef capacity, + absl::FunctionRef populated, DrainHandler drain_handler); + + // For tests. Returns the freed slabs pointer. + void* Destroy(absl::FunctionRef free); + + // Number of elements in cpu/size_class slab. + size_t Length(int cpu, size_t size_class) const; + + // Number of elements (currently) allowed in cpu/size_class slab. + size_t Capacity(int cpu, size_t size_class) const; + + // If running on cpu, increment the cpu/size_class slab's capacity to no + // greater than min(capacity+len, max_capacity()) and return the + // increment applied. Otherwise return 0. + // is a callback that takes the current slab shift as input and + // returns the max capacity of for that shift value - this is in + // order to ensure that the shift value used is consistent with the one used + // in the rest of this function call. Note: max_capacity must be the same as + // returned by capacity callback passed to Init. + size_t Grow(int cpu, size_t size_class, size_t len, + absl::FunctionRef max_capacity); // Add an item (which must be non-zero) to the current CPU's slab. Returns - // true if add succeeds. Otherwise invokes and returns false (assuming - // that returns negative value). - bool Push(size_t cl, void* item, OverflowHandler f); + // true if add succeeds. Otherwise invokes and returns + // false (assuming that returns negative value). + bool Push(size_t size_class, void* item); // Remove an item (LIFO) from the current CPU's slab. If the slab is empty, - // invokes and returns its result. - void* Pop(size_t cl, UnderflowHandler f); + // invokes and returns its result. + [[nodiscard]] void* Pop(size_t size_class); // Add up to items to the current cpu slab from the array located at // . Returns the number of items that were added (possibly 0). All - // items not added will be returned at the start of . Items are only - // not added if there is no space on the current cpu. + // items not added will be returned at the start of . Items are not + // added if there is no space on the current cpu, or if the thread was + // re-scheduled since last Push/Pop. // REQUIRES: len > 0. - size_t PushBatch(size_t cl, void** batch, size_t len); + size_t PushBatch(size_t size_class, void** batch, size_t len); // Pop up to items from the current cpu slab and return them in . - // Returns the number of items actually removed. + // Returns the number of items actually removed. If the thread was + // re-scheduled since last Push/Pop, the function returns 0. // REQUIRES: len > 0. - size_t PopBatch(size_t cl, void** batch, size_t len); - - // Decrements the cpu/cl slab's capacity to no less than max(capacity-len, 0) - // and returns the actual decrement applied. It attempts to shrink any - // unused capacity (i.e end-current) in cpu/cl's slab; if it does not have - // enough unused items, it pops up to items from cpu/cl slab and then - // shrinks the freed capacity. + size_t PopBatch(size_t size_class, void** batch, size_t len); + + // Caches the current cpu slab offset in tcmalloc_slabs if it wasn't + // cached and the cpu is not stopped. Returns the current cpu and the flag + // if the offset was previously uncached and is now cached. If the cpu + // is stopped, returns {-1, true}. + std::pair CacheCpuSlab(); + + // Uncaches the slab offset for the current thread, so that the next Push/Pop + // operation will return false. + void UncacheCpuSlab(); + + // Synchronization protocol between local and remote operations. + // This class supports a set of cpu local operations (Push/Pop/ + // PushBatch/PopBatch/Grow), and a set of remote operations that + // operate on non-current cpu's slab (GrowOtherCache/ShrinkOtherCache/ + // Drain/Resize). Local operations always use a restartable sequence + // that aborts if the slab pointer (tcamlloc_slab) is uncached. + // Caching of the slab pointer after rescheduling checks if + // stopped_[cpu] is unset. Remote operations set stopped_[cpu] + // and then execute Fence, this ensures that any local operation + // on the cpu will abort without changing any state and that the + // slab pointer won't be cached on the cpu. This part uses relaxed atomic + // operations on stopped_[cpu] because the Fence provides all necessary + // synchronization between remote and local threads. When a remote operation + // finishes, it unsets stopped_[cpu] using release memory ordering. + // This ensures that any new local operation on the cpu that observes + // unset stopped_[cpu] with acquire memory ordering, will also see all + // side-effects of the remote operation, and won't interfere with it. + // StopCpu/StartCpu implement the corresponding parts of the remote + // synchronization protocol. + void StopCpu(int cpu); + void StartCpu(int cpu); + + // Grows the cpu/size_class slab's capacity to no greater than + // min(capacity+len, max_capacity()) and returns the increment + // applied. + // is a callback that takes the current slab shift as input and + // returns the max capacity of for that shift value - this is in + // order to ensure that the shift value used is consistent with the one used + // in the rest of this function call. Note: max_capacity must be the same as + // returned by capacity callback passed to Init. + // This may be called from another processor, not just the . + size_t GrowOtherCache(int cpu, size_t size_class, size_t len, + absl::FunctionRef max_capacity); + + // Decrements the cpu/size_class slab's capacity to no less than + // max(capacity-len, 0) and returns the actual decrement applied. It attempts + // to shrink any unused capacity (i.e end-current) in cpu/size_class's slab; + // if it does not have enough unused items, it pops up to items from + // cpu/size_class slab and then shrinks the freed capacity. // // May be called from another processor, not just the . // REQUIRES: len > 0. - typedef void (*ShrinkHandler)(void* arg, size_t cl, void** batch, size_t n); - size_t ShrinkOtherCache(int cpu, size_t cl, size_t len, void* shrink_ctx, - ShrinkHandler f); + size_t ShrinkOtherCache(int cpu, size_t size_class, size_t len, + ShrinkHandler shrink_handler); // Remove all items (of all classes) from 's slab; reset capacity for all // classes to zero. Then, for each sizeclass, invoke - // DrainHandler(drain_ctx, cl, , ); + // DrainHandler(size_class, , ); // // It is invalid to concurrently execute Drain() for the same CPU; calling // Push/Pop/Grow/Shrink concurrently (even on the same CPU) is safe. - typedef void (*DrainHandler)(void* drain_ctx, size_t cl, void** batch, - size_t n, size_t cap); - void Drain(int cpu, void* drain_ctx, DrainHandler f); + void Drain(int cpu, DrainHandler drain_handler); PerCPUMetadataState MetadataMemoryUsage() const; - // We use a single continuous region of memory for all slabs on all CPUs. - // This region is split into NumCPUs regions of size kPerCpuMem (256k). - // First NumClasses words of each CPU region are occupied by slab - // headers (Header struct). The remaining memory contain slab arrays. - struct Slabs { - std::atomic header[NumClasses]; - void* mem[]; - }; - - inline int GetCurrentVirtualCpuUnsafe() { - return VirtualRseqCpuId(virtual_cpu_id_offset_); + // Gets the current shift of the slabs. Intended for use by the thread that + // calls ResizeSlabs(). + uint8_t GetShift() const { + return ToUint8(GetSlabsAndShift(std::memory_order_relaxed).second); } private: - // Slab header (packed, atomically updated 64-bit). + // In order to support dynamic slab metadata sizes, we need to be able to + // atomically update both the slabs pointer and the shift value so we store + // both together in an atomic SlabsAndShift, which manages the bit operations. + class SlabsAndShift { + public: + // These masks allow for distinguishing the shift bits from the slabs + // pointer bits. The maximum shift value is less than kShiftMask and + // kShiftMask is less than kPhysicalPageAlign. + static constexpr size_t kShiftMask = 0xFF; + static constexpr size_t kSlabsMask = ~kShiftMask; + + constexpr explicit SlabsAndShift() noexcept : raw_(0) {} + SlabsAndShift(const void* slabs, Shift shift) + : raw_(reinterpret_cast(slabs) | ToUint8(shift)) { + TC_ASSERT_EQ(raw_ & kShiftMask, ToUint8(shift)); + TC_ASSERT_EQ(reinterpret_cast(raw_ & kSlabsMask), slabs); + } + + std::pair Get() const { + static_assert(kShiftMask >= 0 && kShiftMask <= UCHAR_MAX, + "kShiftMask must fit in a uint8_t"); + // Avoid expanding the width of Shift else the compiler will insert an + // additional instruction to zero out the upper bits on the critical path + // of alloc / free. Not zeroing out the bits is safe because both ARM and + // x86 only use the lowest byte for shift count in variable shifts. + return {reinterpret_cast(raw_ & kSlabsMask), + static_cast(raw_ & kShiftMask)}; + } + + bool operator!=(const SlabsAndShift& other) const { + return raw_ != other.raw_; + } + + private: + uintptr_t raw_; + }; + + // Slab header (packed, atomically updated 32-bit). + // Current and end are pointer offsets from per-CPU region start. + // The slot array is prefixed with an item that has low bit set and ends + // at end, and the occupied slots are up to current. struct Header { - // All values are word offsets from per-CPU region start. - // The array is [begin, end). + // The end offset of the currently occupied slots. uint16_t current; - // Copy of end. Updated by Shrink/Grow, but is not overwritten by Drain. - uint16_t end_copy; - // Lock updates only begin and end with a 32-bit write. - union { - struct { - uint16_t begin; - uint16_t end; - }; - uint32_t lock_update; - }; - - // Lock is used by Drain to stop concurrent mutations of the Header. - // Lock sets begin to 0xffff and end to 0, which makes Push and Pop fail - // regardless of current value. - bool IsLocked() const; - void Lock(); + // The end offset of the slot array for this size class. + uint16_t end; }; - // We cast Header to std::atomic. - static_assert(sizeof(Header) == sizeof(std::atomic), - "bad Header size"); - - Slabs* slabs_ = nullptr; - size_t shift_ = 0; - // This is in units of bytes. - size_t virtual_cpu_id_offset_ = offsetof(kernel_rseq, cpu_id); - - Slabs* CpuMemoryStart(int cpu) const; - std::atomic* GetHeader(int cpu, size_t cl) const; - static Header LoadHeader(std::atomic* hdrp); - static void StoreHeader(std::atomic* hdrp, Header hdr); - static int CompareAndSwapHeader(int cpu, std::atomic* hdrp, - Header old, Header hdr, - size_t virtual_cpu_id_offset); + using AtomicHeader = std::atomic; + + // We cast Header to AtomicHeader. + static_assert(sizeof(Header) == sizeof(AtomicHeader)); + + // We mark the pointer that's stored right before size class object range + // in the slabs array with this mask. When we reach pointer marked with this + // mask when popping, we understand that we reached the beginning of the + // range (the slab is empty). The pointer is also a valid pointer for + // prefetching, so it allows us to always prefetch the previous element + // when popping. + static constexpr uintptr_t kBeginMark = 1; + + // It's important that we use consistent values for slabs/shift rather than + // loading from the atomic repeatedly whenever we use one of the values. + [[nodiscard]] std::pair GetSlabsAndShift( + std::memory_order order) const { + return slabs_and_shift_.load(order).Get(); + } + + static void* CpuMemoryStart(void* slabs, Shift shift, int cpu); + static AtomicHeader* GetHeader(void* slabs, Shift shift, int cpu, + size_t size_class); + static Header LoadHeader(AtomicHeader* hdrp); + static void StoreHeader(AtomicHeader* hdrp, Header hdr); + void DrainCpu(void* slabs, Shift shift, int cpu, DrainHandler drain_handler); + void DrainOldSlabs(void* slabs, Shift shift, int cpu, + const std::array& old_begins, + DrainHandler drain_handler); + + // Implementation of InitCpu() allowing for reuse in ResizeSlabs(). + void InitCpuImpl(void* slabs, Shift shift, int cpu, + absl::FunctionRef capacity); + + std::pair CacheCpuSlabSlow(); + + // We store both a pointer to the array of slabs and the shift value together + // so that we can atomically update both with a single store. + std::atomic slabs_and_shift_{}; + // Remote Cpu operation (Resize/Drain/Grow/Shrink) is running so any local + // operations (Push/Pop) should fail. + std::atomic* stopped_ = nullptr; + // begins_[size_class] is offset of the size_class region in the slabs area. + std::atomic* begins_ = nullptr; }; +// RAII for StopCpu/StartCpu. template -inline size_t TcmallocSlab::Length(int cpu, size_t cl) const { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - return hdr.IsLocked() ? 0 : hdr.current - hdr.begin; -} +class ScopedSlabCpuStop { + public: + ScopedSlabCpuStop(TcmallocSlab& slab, int cpu) + : slab_(slab), cpu_(cpu) { + slab_.StopCpu(cpu_); + } + + ~ScopedSlabCpuStop() { slab_.StartCpu(cpu_); } + + private: + TcmallocSlab& slab_; + const int cpu_; + + ScopedSlabCpuStop(const ScopedSlabCpuStop&) = delete; + ScopedSlabCpuStop& operator=(const ScopedSlabCpuStop&) = delete; +}; template -inline size_t TcmallocSlab::Capacity(int cpu, size_t cl) const { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - return hdr.IsLocked() ? 0 : hdr.end - hdr.begin; +inline size_t TcmallocSlab::Length(int cpu, + size_t size_class) const { + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + Header hdr = LoadHeader(GetHeader(slabs, shift, cpu, size_class)); + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + // We can read inconsistent hdr/begin during Resize, to avoid surprising + // callers return 0 instead of overflows values. + return std::max(0, hdr.current - begin); } template -inline size_t TcmallocSlab::Grow(int cpu, size_t cl, size_t len, - size_t max_cap) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - std::atomic* hdrp = GetHeader(cpu, cl); - for (;;) { - Header old = LoadHeader(hdrp); - if (old.IsLocked() || old.end - old.begin == max_cap) { - return 0; - } - uint16_t n = std::min(len, max_cap - (old.end - old.begin)); - Header hdr = old; - hdr.end += n; - hdr.end_copy += n; - const int ret = - CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); - if (ret == cpu) { - return n; - } else if (ret >= 0) { - return 0; - } - } +inline size_t TcmallocSlab::Capacity(int cpu, + size_t size_class) const { + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + Header hdr = LoadHeader(GetHeader(slabs, shift, cpu, size_class)); + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + return std::max(0, hdr.end - begin); } -template -inline size_t TcmallocSlab::Shrink(int cpu, size_t cl, size_t len) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - std::atomic* hdrp = GetHeader(cpu, cl); - for (;;) { - Header old = LoadHeader(hdrp); - if (old.IsLocked() || old.current == old.end) { - return 0; - } - uint16_t n = std::min(len, old.end - old.current); - Header hdr = old; - hdr.end -= n; - hdr.end_copy -= n; - const int ret = - CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); - if (ret == cpu) { - return n; - } else if (ret >= 0) { - return 0; - } +#if defined(__x86_64__) +#define TCMALLOC_RSEQ_RELOC_TYPE "R_X86_64_NONE" +#define TCMALLOC_RSEQ_JUMP "jmp" +#if !defined(__PIC__) && !defined(__PIE__) +#define TCMALLOC_RSEQ_SET_CS(name) \ + "movq $__rseq_cs_" #name "_%=, %[rseq_cs_addr]\n" +#else +#define TCMALLOC_RSEQ_SET_CS(name) \ + "lea __rseq_cs_" #name \ + "_%=(%%rip), %[scratch]\n" \ + "movq %[scratch], %[rseq_cs_addr]\n" +#endif + +#elif defined(__aarch64__) +// The trampoline uses a non-local branch to restart critical sections. +// The trampoline is located in the .text.unlikely section, and the maximum +// distance of B and BL branches in ARM64 is limited to 128MB. If the linker +// detects the distance being too large, it injects a thunk which may clobber +// the x16 or x17 register according to the ARMv8 ABI standard. +// The actual clobbering is hard to trigger in a test, so instead of waiting +// for clobbering to happen in production binaries, we proactively always +// clobber x16 and x17 to shake out bugs earlier. +// RSEQ critical section asm blocks should use TCMALLOC_RSEQ_CLOBBER +// in the clobber list to account for this. +#ifndef NDEBUG +#define TCMALLOC_RSEQ_TRAMPLINE_SMASH \ + "mov x16, #-2097\n" \ + "mov x17, #-2099\n" +#else +#define TCMALLOC_RSEQ_TRAMPLINE_SMASH +#endif +#define TCMALLOC_RSEQ_CLOBBER "x16", "x17" +#define TCMALLOC_RSEQ_RELOC_TYPE "R_AARCH64_NONE" +#define TCMALLOC_RSEQ_JUMP "b" +#define TCMALLOC_RSEQ_SET_CS(name) \ + TCMALLOC_RSEQ_TRAMPLINE_SMASH \ + "adrp %[scratch], __rseq_cs_" #name \ + "_%=\n" \ + "add %[scratch], %[scratch], :lo12:__rseq_cs_" #name \ + "_%=\n" \ + "str %[scratch], %[rseq_cs_addr]\n" +#endif + +#if !defined(__clang_major__) || __clang_major__ >= 9 +#define TCMALLOC_RSEQ_RELOC ".reloc 0, " TCMALLOC_RSEQ_RELOC_TYPE ", 1f\n" +#else +#define TCMALLOC_RSEQ_RELOC +#endif + +// Common rseq asm prologue. +// It uses labels 1-4 and assumes the critical section ends with label 5. +// The prologue assumes there is [scratch] input with a scratch register. +#define TCMALLOC_RSEQ_PROLOGUE(name) \ + /* __rseq_cs only needs to be writeable to allow for relocations.*/ \ + ".pushsection __rseq_cs, \"aw?\"\n" \ + ".balign 32\n" \ + ".local __rseq_cs_" #name \ + "_%=\n" \ + ".type __rseq_cs_" #name \ + "_%=,@object\n" \ + ".size __rseq_cs_" #name \ + "_%=,32\n" \ + "__rseq_cs_" #name \ + "_%=:\n" \ + ".long 0x0\n" \ + ".long 0x0\n" \ + ".quad 4f\n" \ + ".quad 5f - 4f\n" \ + ".quad 2f\n" \ + ".popsection\n" TCMALLOC_RSEQ_RELOC \ + ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" \ + "1:\n" \ + ".balign 8\n" \ + ".quad __rseq_cs_" #name \ + "_%=\n" /* Force this section to be retained. \ + It is for debugging, but is otherwise not referenced. */ \ + ".popsection\n" \ + ".pushsection .text.unlikely, \"ax?\"\n" /* This is part of the upstream \ + rseq ABI. The 4 bytes prior to \ + the abort IP must match \ + TCMALLOC_PERCPU_RSEQ_SIGNATURE \ + (as configured by our rseq \ + syscall's signature parameter). \ + This signature is used to \ + annotate valid abort IPs (since \ + rseq_cs could live in a \ + user-writable segment). */ \ + ".long %c[rseq_sig]\n" \ + ".local " #name \ + "_trampoline_%=\n" \ + ".type " #name \ + "_trampoline_%=,@function\n" \ + "" #name \ + "_trampoline_%=:\n" \ + "2:\n" TCMALLOC_RSEQ_JUMP \ + " 3f\n" \ + ".size " #name "_trampoline_%=, . - " #name \ + "_trampoline_%=\n" \ + ".popsection\n" /* Prepare */ \ + "3:\n" TCMALLOC_RSEQ_SET_CS(name) /* Start */ \ + "4:\n" + +#define TCMALLOC_RSEQ_INPUTS \ + [rseq_cs_addr] "m"(__rseq_abi.rseq_cs), \ + [rseq_slabs_addr] "m"(*reinterpret_cast( \ + reinterpret_cast(&__rseq_abi) + \ + TCMALLOC_RSEQ_SLABS_OFFSET)), \ + [rseq_sig] "n"( \ + TCMALLOC_PERCPU_RSEQ_SIGNATURE), /* Also pass common consts, there \ + is no cost to passing unused \ + consts. */ \ + [cached_slabs_bit] "n"(TCMALLOC_CACHED_SLABS_BIT), \ + [cached_slabs_mask_neg] "n"(~TCMALLOC_CACHED_SLABS_MASK) + +// Store v to p (*p = v) if the current thread wasn't rescheduled +// (still has the slab pointer cached). Otherwise returns false. +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool StoreCurrentCpu(volatile void* p, + T v) { + uintptr_t scratch = 0; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__x86_64__) + asm(TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_StoreCurrentCpu) + R"( + xorq %[scratch], %[scratch] + btq $%c[cached_slabs_bit], %[rseq_slabs_addr] + jnc 5f + movl $1, %k[scratch] + mov %[v], %[p] + 5 :)" + : [scratch] "=&r"(scratch) + : TCMALLOC_RSEQ_INPUTS, [p] "m"(*static_cast(p)), + [v] "r"(v) + : "cc", "memory"); +#elif TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__aarch64__) + uintptr_t tmp; + // Aarch64 requires different argument references for different sizes + // for the STR instruction (%[v] vs %w[v]), so we have to duplicate + // the asm block. + if constexpr (sizeof(T) == sizeof(uint64_t)) { + asm(TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_StoreCurrentCpu) + R"( + mov %[scratch], #0 + ldr %[tmp], %[rseq_slabs_addr] + tbz %[tmp], #%c[cached_slabs_bit], 5f + mov %[scratch], #1 + str %[v], %[p] + 5 :)" + : [scratch] "=&r"(scratch), [tmp] "=&r"(tmp) + : TCMALLOC_RSEQ_INPUTS, [p] "m"(*static_cast(p)), + [v] "r"(v) + : TCMALLOC_RSEQ_CLOBBER, "cc", "memory"); + } else { + static_assert(sizeof(T) == sizeof(uint32_t)); + asm(TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_StoreCurrentCpu) + R"( + mov %[scratch], #0 + ldr %[tmp], %[rseq_slabs_addr] + tbz %[tmp], #%c[cached_slabs_bit], 5f + mov %[scratch], #1 + str %w[v], %[p] + 5 :)" + : [scratch] "=&r"(scratch), [tmp] "=&r"(tmp) + : TCMALLOC_RSEQ_INPUTS, [p] "m"(*static_cast(p)), + [v] "r"(v) + : TCMALLOC_RSEQ_CLOBBER, "cc", "memory"); } +#endif + return scratch; } -#if defined(__x86_64__) -template -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( - typename TcmallocSlab::Slabs* slabs, size_t cl, void* item, - const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO +// Prefetch slabs memory for the case of repeated pushes/pops. +// Note: this prefetch slows down micro-benchmarks, but provides ~0.1-0.5% +// speedup for larger real applications. +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void PrefetchSlabMemory(uintptr_t ptr) { + PrefetchWT0(reinterpret_cast(ptr)); +} + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__x86_64__) +// Note: These helpers must be "static inline" to avoid ODR violations due to +// different labels emitted in TCMALLOC_RSEQ_PROLOGUE. +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab_Internal_Push( + size_t size_class, void* item) { + uintptr_t scratch, current; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT asm goto( #else bool overflow; asm volatile( #endif - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for - // relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_X86_64_NONE, 1f\n" + TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_Push) + // scratch = tcmalloc_slabs; + "movq %[rseq_slabs_addr], %[scratch]\n" + // if (scratch & TCMALLOC_CACHED_SLABS_MASK>) goto overflow_label; + // scratch &= ~TCMALLOC_CACHED_SLABS_MASK; + "btrq $%c[cached_slabs_bit], %[scratch]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "jnc %l[overflow_label]\n" +#else + "jae 5f\n" // ae==c #endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".byte 0x0f, 0x1f, 0x05\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Push_trampoline_%=\n" - ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Push_trampoline_%=:\n" - "2:\n" - "jmp 3f\n" - ".size TcmallocSlab_Internal_Push_trampoline_%=, . - " - "TcmallocSlab_Internal_Push_trampoline_%=;\n" - ".popsection\n" - // Prepare - // - // TODO(b/151503411): Pending widespread availability of LLVM's asm - // goto with output contraints - // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can - // return the register allocations to the compiler rather than using - // explicit clobbers. Prior to this, blocks which use asm goto cannot - // also specify outputs. - // - // r10: Scratch - // r11: Current - "3:\n" - "lea __rseq_cs_TcmallocSlab_Internal_Push_%=(%%rip), %%r10\n" - "mov %%r10, %c[rseq_cs_offset](%[rseq_abi])\n" - // Start - "4:\n" - // scratch = __rseq_abi.cpu_id; - "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %%r10d\n" - // scratch = slabs + scratch - "shlq %b[shift], %%r10\n" - "add %[slabs], %%r10\n" - // r11 = slabs->current; - "movzwq (%%r10, %[cl], 8), %%r11\n" - // if (ABSL_PREDICT_FALSE(r11 >= slabs->end)) { goto overflow; } - "cmp 6(%%r10, %[cl], 8), %%r11w\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + // current = slabs->current; + "movzwq (%[scratch], %[size_class], 4), %[current]\n" + // if (ABSL_PREDICT_FALSE(current >= slabs->end)) { goto overflow_label; } + "cmp 2(%[scratch], %[size_class], 4), %w[current]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT "jae %l[overflow_label]\n" #else "jae 5f\n" // Important! code below this must not affect any flags (i.e.: ccae) // If so, the above code needs to explicitly set a ccae return value. #endif - "mov %[item], (%%r10, %%r11, 8)\n" - "lea 1(%%r11), %%r11\n" - "mov %%r11w, (%%r10, %[cl], 8)\n" + "mov %[item], (%[scratch], %[current], 8)\n" + "lea 1(%[current]), %[current]\n" + "mov %w[current], (%[scratch], %[size_class], 4)\n" // Commit "5:\n" : -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - [overflow] "=@ccae"(overflow) +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [overflow] "=@ccae"(overflow), #endif - : [rseq_abi] "r"(&__rseq_abi), - [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_cpu_offset] "r"(virtual_cpu_id_offset), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), - [slabs] "r"(slabs), [cl] "r"(cl), [item] "r"(item) - : "cc", "memory", "r10", "r11" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + [scratch] "=&r"(scratch), [current] "=&r"(current) + : TCMALLOC_RSEQ_INPUTS, [size_class] "r"(size_class), [item] "r"(item) + : "cc", "memory" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT : overflow_label #endif ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT if (ABSL_PREDICT_FALSE(overflow)) { goto overflow_label; } #endif - return 0; + // Current now points to the slot we are going to push to next. + PrefetchSlabMemory(scratch + current * sizeof(void*)); + return true; overflow_label: - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); - return f(cpu, cl, item); + return false; } #endif // defined(__x86_64__) -#if defined(__aarch64__) - -template -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( - typename TcmallocSlab::Slabs* slabs, size_t cl, void* item, - const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { - void* region_start; - uint64_t cpu_id; - void* end_ptr; - uintptr_t current; - uintptr_t end; - // Multiply cl by the bytesize of each header - size_t cl_lsl3 = cl * 8; -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__aarch64__) +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab_Internal_Push( + size_t size_class, void* item) { + uintptr_t region_start, scratch, end_ptr, end; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT asm goto( #else bool overflow; asm volatile( #endif - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for - // relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_AARCH64_NONE, 1f\n" + TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_Push) + // region_start = tcmalloc_slabs; + "ldr %[region_start], %[rseq_slabs_addr]\n" + // if (region_start & TCMALLOC_CACHED_SLABS_MASK) goto overflow_label; + // region_start &= ~TCMALLOC_CACHED_SLABS_MASK; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "tbz %[region_start], #%c[cached_slabs_bit], %l[overflow_label]\n" + "and %[region_start], %[region_start], #%c[cached_slabs_mask_neg]\n" +#else + "subs %[region_start], %[region_start], %[cached_slabs_mask]\n" + "b.ls 5f\n" #endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Push_trampoline_%=\n" - ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Push_trampoline_%=:\n" - "2:\n" - "b 3f\n" - ".popsection\n" - // Prepare - // - // TODO(b/151503411): Pending widespread availability of LLVM's asm - // goto with output contraints - // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can - // return the register allocations to the compiler rather than using - // explicit clobbers. Prior to this, blocks which use asm goto cannot - // also specify outputs. - "3:\n" - // Use current as scratch here to hold address of this function's - // critical section - "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - "add %[current], %[current], " - ":lo12:__rseq_cs_TcmallocSlab_Internal_Push_%=\n" - "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" - // Start - "4:\n" - // cpu_id = __rseq_abi.cpu_id; - "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" - // region_start = Start of cpu region - "lsl %[region_start], %[cpu_id], %[shift]\n" - "add %[region_start], %[region_start], %[slabs]\n" // end_ptr = &(slab_headers[0]->end) - "add %[end_ptr], %[region_start], #6\n" - // current = slab_headers[cl]->current (current index) - "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" - // end = slab_headers[cl]->end (end index) - "ldrh %w[end], [%[end_ptr], %[cl_lsl3]]\n" - // if (ABSL_PREDICT_FALSE(current >= end)) { goto overflow; } - "cmp %[end], %[current]\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - "b.le %l[overflow_label]\n" + "add %[end_ptr], %[region_start], #2\n" + // scratch = slab_headers[size_class]->current (current index) + "ldrh %w[scratch], [%[region_start], %[size_class_lsl2]]\n" + // end = slab_headers[size_class]->end (end index) + "ldrh %w[end], [%[end_ptr], %[size_class_lsl2]]\n" + // if (ABSL_PREDICT_FALSE(end <= scratch)) { goto overflow_label; } + "cmp %[end], %[scratch]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "b.ls %l[overflow_label]\n" #else - "b.le 5f\n" - // Important! code below this must not affect any flags (i.e.: ccae) - // If so, the above code needs to explicitly set a ccae return value. + "b.ls 5f\n" + // Important! code below this must not affect any flags (i.e.: ccls) + // If so, the above code needs to explicitly set a ccls return value. #endif - "str %[item], [%[region_start], %[current], LSL #3]\n" - "add %w[current], %w[current], #1\n" - "strh %w[current], [%[region_start], %[cl_lsl3]]\n" + "str %[item], [%[region_start], %[scratch], LSL #3]\n" + "add %w[scratch], %w[scratch], #1\n" + "strh %w[scratch], [%[region_start], %[size_class_lsl2]]\n" // Commit "5:\n" - : [end_ptr] "=&r"(end_ptr), [cpu_id] "=&r"(cpu_id), - [current] "=&r"(current), [end] "=&r"(end), + : [end_ptr] "=&r"(end_ptr), [scratch] "=&r"(scratch), [end] "=&r"(end), +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [overflow] "=@ccls"(overflow), +#endif [region_start] "=&r"(region_start) - -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - , - [overflow] "=@ccae"(overflow) + : TCMALLOC_RSEQ_INPUTS, +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [cached_slabs_mask] "r"(TCMALLOC_CACHED_SLABS_MASK), #endif - : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), - [cl_lsl3] "r"(cl_lsl3), [item] "r"(item), [rseq_abi] "r"(&__rseq_abi), - [shift] "r"(shift), - // Constants - [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) - : "cc", "memory" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + [size_class_lsl2] "r"(size_class << 2), [item] "r"(item) + : TCMALLOC_RSEQ_CLOBBER, "memory" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + , + "cc" : overflow_label #endif ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT if (ABSL_PREDICT_FALSE(overflow)) { goto overflow_label; } #endif - return 0; + return true; overflow_label: -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); -#else - // With asm goto--without output constraints--the value of scratch is - // well-defined by the compiler and our implementation. As an optimization on - // this case, we can avoid looking up cpu_id again, by undoing the - // transformation of cpu_id to the value of scratch. - int cpu = cpu_id; -#endif - return f(cpu, cl, item); + return false; } #endif // defined (__aarch64__) template inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab::Push( - size_t cl, void* item, OverflowHandler f) { - ASSERT(item != nullptr); -#if defined(__x86_64__) || defined(__aarch64__) - return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f, - virtual_cpu_id_offset_) >= 0; + size_t size_class, void* item) { + TC_ASSERT_NE(size_class, 0); + TC_ASSERT_NE(item, nullptr); + TC_ASSERT_EQ(reinterpret_cast(item) & kBeginMark, 0); + // Speculatively annotate item as released to TSan. We may not succeed in + // pushing the item, but if we wait for the restartable sequence to succeed, + // it may become visible to another thread before we can trigger the + // annotation. + TSANRelease(item); +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + return TcmallocSlab_Internal_Push(size_class, item); #else - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { - return TcmallocSlab_Internal_Push_FixedShift(slabs_, cl, item, f) >= 0; - } else { - return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f) >= 0; - } + return false; #endif } -#if defined(__x86_64__) +// PrefetchNextObject provides a common code path across architectures for +// generating a prefetch of the next object. +// +// It is in a distinct, always-lined method to make its cost more transparent +// when profiling with debug information. +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void PrefetchNextObject( + void* prefetch_target) { + // A note about prefetcht0 in Pop: While this prefetch may appear costly, + // trace analysis shows the target is frequently used (b/70294962). Stalling + // on a TLB miss at the prefetch site (which has no deps) and prefetching the + // line async is better than stalling at the use (which may have deps) to fill + // the TLB and the cache miss. + // + // See "Beyond malloc efficiency to fleet efficiency" + // (https://research.google/pubs/pub50370/), section 6.4 for additional + // details. + // + // TODO(b/214608320): Evaluate prefetch for write. + __builtin_prefetch(prefetch_target, 0, 3); +} + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__x86_64__) template -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( - typename TcmallocSlab::Slabs* slabs, size_t cl, - UnderflowHandler f, const size_t shift, - const size_t virtual_cpu_id_offset) { +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab::Pop( + size_t size_class) { + TC_ASSERT_NE(size_class, 0); + void* next; void* result; - void* scratch; - uintptr_t current; -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - asm goto + uintptr_t scratch, current; + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + asm goto( #else bool underflow; - asm + asm( #endif - ( - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow - // for relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_X86_64_NONE, 1f\n" + TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_Pop) + // scratch = tcmalloc_slabs; + "movq %[rseq_slabs_addr], %[scratch]\n" + // if (scratch & TCMALLOC_CACHED_SLABS_MASK) goto overflow_label; + // scratch &= ~TCMALLOC_CACHED_SLABS_MASK; + "btrq $%c[cached_slabs_bit], %[scratch]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "jnc %l[underflow_path]\n" +#else + "cmc\n" + "jc 5f\n" #endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".byte 0x0f, 0x1f, 0x05\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" - ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Pop_trampoline_%=:\n" - "2:\n" - "jmp 3f\n" - ".size TcmallocSlab_Internal_Pop_trampoline_%=, . - " - "TcmallocSlab_Internal_Pop_trampoline_%=;\n" - ".popsection\n" - // Prepare - "3:\n" - "lea __rseq_cs_TcmallocSlab_Internal_Pop_%=(%%rip), %[scratch];\n" - "mov %[scratch], %c[rseq_cs_offset](%[rseq_abi])\n" - // Start - "4:\n" - // scratch = __rseq_abi.cpu_id; - "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %k[scratch]\n" - // scratch = slabs + scratch - "shlq %b[shift], %[scratch]\n" - "add %[slabs], %[scratch]\n" - // current = scratch->header[cl].current; - "movzwq (%[scratch], %[cl], 8), %[current]\n" - // if (ABSL_PREDICT_FALSE(scratch->header[cl].begin > current)) - "cmp 4(%[scratch], %[cl], 8), %w[current]\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - "jbe %l[underflow_path]\n" + // current = scratch->header[size_class].current; + "movzwq (%[scratch], %[size_class], 4), %[current]\n" + "movq -8(%[scratch], %[current], 8), %[result]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "testb $%c[begin_mark_mask], %b[result]\n" + "jnz %l[underflow_path]\n" #else - "jbe 5f\n" - // Important! code below this must not affect any flags (i.e.: ccbe) - // If so, the above code needs to explicitly set a ccbe return value. + "btq $%c[begin_mark_bit], %[result]\n" + "jc 5f\n" + // Important! code below this must not affect any flags (i.e.: ccc) + // If so, the above code needs to explicitly set a ccc return value. #endif - "mov -16(%[scratch], %[current], 8), %[result]\n" - // A note about prefetcht0 in Pop: While this prefetch may appear - // costly, trace analysis shows the target is frequently used - // (b/70294962). Stalling on a TLB miss at the prefetch site (which - // has no deps) and prefetching the line async is better than stalling - // at the use (which may have deps) to fill the TLB and the cache - // miss. - "prefetcht0 (%[result])\n" - "movq -8(%[scratch], %[current], 8), %[result]\n" - "lea -1(%[current]), %[current]\n" - "mov %w[current], (%[scratch], %[cl], 8)\n" - // Commit - "5:\n" - : [result] "=&r"(result), -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - [underflow] "=@ccbe"(underflow), + "movq -16(%[scratch], %[current], 8), %[next]\n" + "lea -1(%[current]), %[current]\n" + "movw %w[current], (%[scratch], %[size_class], 4)\n" + // Commit + "5:\n" + : [result] "=&r"(result), +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [underflow] "=@ccc"(underflow), #endif - [scratch] "=&r"(scratch), [current] "=&r"(current) - : [rseq_abi] "r"(&__rseq_abi), - [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_cpu_offset] "r"(virtual_cpu_id_offset), - [rseq_sig] "n"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), - [slabs] "r"(slabs), [cl] "r"(cl) - : "cc", "memory" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - : underflow_path + [scratch] "=&r"(scratch), [current] "=&r"(current), [next] "=&r"(next) + : TCMALLOC_RSEQ_INPUTS, +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [begin_mark_mask] "n"(kBeginMark), +#else + [begin_mark_bit] "n"(absl::countr_zero(kBeginMark)), +#endif + [size_class] "r"(size_class) + : "cc", "memory" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + : underflow_path #endif - ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + ); +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT if (ABSL_PREDICT_FALSE(underflow)) { goto underflow_path; } #endif - - return result; + TC_ASSERT(next); + TC_ASSERT(result); + TSANAcquire(result); + + // The next pop will be from current-1, but because we prefetch the previous + // element we've already just read that, so prefetch current-2. + PrefetchSlabMemory(scratch + (current - 2) * sizeof(void*)); + PrefetchNextObject(next); + return AssumeNotNull(result); underflow_path: -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); -#else - // With asm goto--without output constraints--the value of scratch is - // well-defined by the compiler and our implementation. As an optimization on - // this case, we can avoid looking up cpu_id again, by undoing the - // transformation of cpu_id to the value of scratch. - int cpu = - (reinterpret_cast(scratch) - reinterpret_cast(slabs)) >> - shift; -#endif - return f(cpu, cl); + return nullptr; } #endif // defined(__x86_64__) -#if defined(__aarch64__) +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ && defined(__aarch64__) template -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( - typename TcmallocSlab::Slabs* slabs, size_t cl, - UnderflowHandler f, const size_t shift, - const size_t virtual_cpu_id_offset) { +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab::Pop( + size_t size_class) { + TC_ASSERT_NE(size_class, 0); void* result; void* region_start; - uint64_t cpu_id; - void* begin_ptr; - uintptr_t current; - uintptr_t new_current; - uintptr_t begin; - // Multiply cl by the bytesize of each header - size_t cl_lsl3 = cl * 8; -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - asm goto + void* prefetch; + uintptr_t scratch; + uintptr_t previous; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + asm goto( #else bool underflow; - asm + asm( #endif - ( - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow - // for relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_AARCH64_NONE, 1f\n" + TCMALLOC_RSEQ_PROLOGUE(TcmallocSlab_Internal_Pop) + // region_start = tcmalloc_slabs; + "ldr %[region_start], %[rseq_slabs_addr]\n" + // if (region_start & TCMALLOC_CACHED_SLABS_MASK) goto overflow_label; + // region_start &= ~TCMALLOC_CACHED_SLABS_MASK; +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "tbz %[region_start], #%c[cached_slabs_bit], %l[underflow_path]\n" +#else + "tst %[region_start], %[cached_slabs_mask]\n" + "b.eq 5f\n" #endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" - ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Pop_trampoline_%=:\n" - "2:\n" - "b 3f\n" - ".popsection\n" - // Prepare - "3:\n" - // Use current as scratch here to hold address of this function's - // critical section - "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - "add %[current], %[current], " - ":lo12:__rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" - // Start - "4:\n" - // cpu_id = __rseq_abi.cpu_id; - "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" - // region_start = Start of cpu region - "lsl %[region_start], %[cpu_id], %[shift]\n" - "add %[region_start], %[region_start], %[slabs]\n" - // begin_ptr = &(slab_headers[0]->begin) - "add %[begin_ptr], %[region_start], #4\n" - // current = slab_headers[cl]->current (current index) - "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" - // begin = slab_headers[cl]->begin (begin index) - "ldrh %w[begin], [%[begin_ptr], %[cl_lsl3]]\n" - // if (ABSL_PREDICT_FALSE(begin >= current)) { goto overflow; } - "cmp %w[begin], %w[current]\n" - "sub %w[new_current], %w[current], #1\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - "b.ge %l[underflow_path]\n" + "and %[region_start], %[region_start], #%c[cached_slabs_mask_neg]\n" + // scratch = slab_headers[size_class]->current (current index) + "ldrh %w[scratch], [%[region_start], %[size_class_lsl2]]\n" + // scratch-- + "sub %w[scratch], %w[scratch], #1\n" + "ldr %[result], [%[region_start], %[scratch], LSL #3]\n" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "tbnz %[result], #%c[begin_mark_bit], %l[underflow_path]\n" #else - "b.ge 5f\n" - // Important! code below this must not affect any flags (i.e.: ccbe) - // If so, the above code needs to explicitly set a ccbe return value. + // Temporary use %[previous] to store %[result] with inverted mark bit. + "eor %[previous], %[result], #%c[begin_mark_mask]\n" + "tst %[previous], #%c[begin_mark_mask]\n" + "b.eq 5f\n" + // Important! code below this must not affect any flags (i.e.: cceq) + // If so, the above code needs to explicitly set a cceq return value. +#endif + "sub %w[previous], %w[scratch], #1\n" + "ldr %[prefetch], [%[region_start], %[previous], LSL #3]\n" + "strh %w[scratch], [%[region_start], %[size_class_lsl2]]\n" + // Commit + "5:\n" + : +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [underflow] "=@cceq"(underflow), #endif - // current-- - "ldr %[result], [%[region_start], %[new_current], LSL #3]\n" - "strh %w[new_current], [%[region_start], %[cl_lsl3]]\n" - // Commit - "5:\n" - : -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - [underflow] "=@ccbe"(underflow), + [result] "=&r"(result), [prefetch] "=&r"(prefetch), + // Temps + [region_start] "=&r"(region_start), [previous] "=&r"(previous), + [scratch] "=&r"(scratch) + // Real inputs + : TCMALLOC_RSEQ_INPUTS, +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [begin_mark_bit] "n"(absl::countr_zero(kBeginMark)), +#else + [cached_slabs_mask] "r"(TCMALLOC_CACHED_SLABS_MASK), #endif - [result] "=&r"(result), - // Temps - [cpu_id] "=&r"(cpu_id), [region_start] "=&r"(region_start), - [begin] "=&r"(begin), [current] "=&r"(current), - [new_current] "=&r"(new_current), [begin_ptr] "=&r"(begin_ptr) - // Real inputs - : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), - [cl_lsl3] "r"(cl_lsl3), [rseq_abi] "r"(&__rseq_abi), - [shift] "r"(shift), - // constants - [rseq_cs_offset] "in"(offsetof(kernel_rseq, rseq_cs)), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) - : "cc", "memory" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - : underflow_path + [begin_mark_mask] "n"(kBeginMark), [size_class] "r"(size_class), + [size_class_lsl2] "r"(size_class << 2) + : TCMALLOC_RSEQ_CLOBBER, "memory" +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + , + "cc" + : underflow_path #endif - ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + ); +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT if (ABSL_PREDICT_FALSE(underflow)) { goto underflow_path; } #endif - - return result; + TSANAcquire(result); + PrefetchNextObject(prefetch); + return AssumeNotNull(result); underflow_path: -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); -#else - // With asm goto--without output constraints--the value of scratch is - // well-defined by the compiler and our implementation. As an optimization on - // this case, we can avoid looking up cpu_id again, by undoing the - // transformation of cpu_id to the value of scratch. - int cpu = cpu_id; -#endif - return f(cpu, cl); + return nullptr; } #endif // defined(__aarch64__) +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ template inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab::Pop( - size_t cl, UnderflowHandler f) { -#if defined(__x86_64__) || defined(__aarch64__) - return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_, - virtual_cpu_id_offset_); -#else - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { - return TcmallocSlab_Internal_Pop_FixedShift(slabs_, cl, f); - } else { - return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_); - } -#endif + size_t size_class) { + return nullptr; } +#endif -static inline void* NoopUnderflow(int cpu, size_t cl) { return nullptr; } - -static inline int NoopOverflow(int cpu, size_t cl, void* item) { return -1; } +template +inline size_t TcmallocSlab::Grow( + int cpu, size_t size_class, size_t len, + absl::FunctionRef max_capacity) { + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + const size_t max_cap = max_capacity(ToUint8(shift)); + auto* hdrp = GetHeader(slabs, shift, cpu, size_class); + Header hdr = LoadHeader(hdrp); + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + ssize_t have = static_cast(max_cap - (hdr.end - begin)); + if (have <= 0) { + return 0; + } + uint16_t n = std::min(len, have); + hdr.end += n; + return StoreCurrentCpu(hdrp, hdr) ? n : 0; +} template -inline size_t TcmallocSlab::PushBatch(size_t cl, void** batch, - size_t len) { - ASSERT(len != 0); - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { -#if TCMALLOC_PERCPU_USE_RSEQ - // TODO(b/159923407): TcmallocSlab_Internal_PushBatch_FixedShift needs to be - // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid - // needing to dispatch on two separate versions of the same function with - // only minor differences between them. - switch (virtual_cpu_id_offset_) { - case offsetof(kernel_rseq, cpu_id): - return TcmallocSlab_Internal_PushBatch_FixedShift(slabs_, cl, batch, - len); -#ifdef __x86_64__ - case offsetof(kernel_rseq, vcpu_id): - return TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(slabs_, cl, - batch, len); -#endif // __x86_64__ - default: - __builtin_unreachable(); - } -#else // !TCMALLOC_PERCPU_USE_RSEQ - __builtin_unreachable(); -#endif // !TCMALLOC_PERCPU_USE_RSEQ - } else { - size_t n = 0; - // Push items until either all done or a push fails - while (n < len && Push(cl, batch[len - 1 - n], NoopOverflow)) { - n++; - } - return n; +inline std::pair TcmallocSlab::CacheCpuSlab() { +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + if (ABSL_PREDICT_FALSE((tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK) == 0)) { + return CacheCpuSlabSlow(); } + // We already have slab offset cached, so the slab is indeed full/empty. +#endif + return {VirtualCpu::GetAfterSynchronize(), false}; } template -inline size_t TcmallocSlab::PopBatch(size_t cl, void** batch, - size_t len) { - ASSERT(len != 0); - size_t n = 0; - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { -#if TCMALLOC_PERCPU_USE_RSEQ - // TODO(b/159923407): TcmallocSlab_Internal_PopBatch_FixedShift needs to be - // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid - // needing to dispatch on two separate versions of the same function with - // only minor differences between them. - switch (virtual_cpu_id_offset_) { - case offsetof(kernel_rseq, cpu_id): - n = TcmallocSlab_Internal_PopBatch_FixedShift(slabs_, cl, batch, len); - break; -#ifdef __x86_64__ - case offsetof(kernel_rseq, vcpu_id): - n = TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(slabs_, cl, batch, - len); - break; -#endif // __x86_64__ - default: - __builtin_unreachable(); - } +inline void TcmallocSlab::UncacheCpuSlab() { +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + tcmalloc_slabs = 0; +#endif +} - // PopBatch is implemented in assembly, msan does not know that the returned - // batch is initialized. - ANNOTATE_MEMORY_IS_INITIALIZED(batch, n * sizeof(batch[0])); -#else // !TCMALLOC_PERCPU_USE_RSEQ - __builtin_unreachable(); -#endif // !TCMALLOC_PERCPU_USE_RSEQ - } else { - // Pop items until either all done or a pop fails - while (n < len && (batch[n] = Pop(cl, NoopUnderflow))) { - n++; - } - } +template +inline size_t TcmallocSlab::PushBatch(size_t size_class, + void** batch, size_t len) { + TC_ASSERT_NE(size_class, 0); + TC_ASSERT_NE(len, 0); + // We need to annotate batch[...] as released before running the restartable + // sequence, since those objects become visible to other threads the moment + // the restartable sequence is complete and before the annotation potentially + // runs. + // + // This oversynchronizes slightly, since PushBatch may succeed only partially. + TSANReleaseBatch(batch, len); + return TcmallocSlab_Internal_PushBatch(size_class, batch, len); +} + +template +inline size_t TcmallocSlab::PopBatch(size_t size_class, + void** batch, size_t len) { + TC_ASSERT_NE(size_class, 0); + TC_ASSERT_NE(len, 0); + const size_t n = TcmallocSlab_Internal_PopBatch(size_class, batch, len, + &begins_[size_class]); + TC_ASSERT_LE(n, len); + + // PopBatch is implemented in assembly, msan does not know that the returned + // batch is initialized. + ANNOTATE_MEMORY_IS_INITIALIZED(batch, n * sizeof(batch[0])); + TSANAcquireBatch(batch, n); return n; } template -inline typename TcmallocSlab::Slabs* -TcmallocSlab::CpuMemoryStart(int cpu) const { - char* const bytes = reinterpret_cast(slabs_); - return reinterpret_cast(&bytes[cpu << shift_]); +inline void* TcmallocSlab::CpuMemoryStart(void* slabs, Shift shift, + int cpu) { + return &static_cast(slabs)[cpu << ToUint8(shift)]; } template -inline std::atomic* TcmallocSlab::GetHeader( - int cpu, size_t cl) const { - return &CpuMemoryStart(cpu)->header[cl]; +inline auto TcmallocSlab::GetHeader(void* slabs, Shift shift, + int cpu, size_t size_class) + -> AtomicHeader* { + TC_ASSERT_NE(size_class, 0); + return &static_cast( + CpuMemoryStart(slabs, shift, cpu))[size_class]; } template -inline typename TcmallocSlab::Header -TcmallocSlab::LoadHeader(std::atomic* hdrp) { +inline auto TcmallocSlab::LoadHeader(AtomicHeader* hdrp) -> Header { return absl::bit_cast
(hdrp->load(std::memory_order_relaxed)); } template -inline void TcmallocSlab::StoreHeader(std::atomic* hdrp, +inline void TcmallocSlab::StoreHeader(AtomicHeader* hdrp, Header hdr) { - hdrp->store(absl::bit_cast(hdr), std::memory_order_relaxed); + hdrp->store(absl::bit_cast(hdr), std::memory_order_relaxed); } template -inline int TcmallocSlab::CompareAndSwapHeader( - int cpu, std::atomic* hdrp, Header old, Header hdr, - const size_t virtual_cpu_id_offset) { -#if __SIZEOF_POINTER__ == 8 - const int64_t old_raw = absl::bit_cast(old); - const int64_t new_raw = absl::bit_cast(hdr); - return CompareAndSwapUnsafe(cpu, hdrp, static_cast(old_raw), - static_cast(new_raw), - virtual_cpu_id_offset); -#else - Crash(kCrash, __FILE__, __LINE__, "This architecture is not supported."); +void TcmallocSlab::Init( + absl::FunctionRef alloc, void* slabs, + absl::FunctionRef capacity, Shift shift) { + stopped_ = new (alloc(sizeof(stopped_[0]) * NumCPUs(), + std::align_val_t{ABSL_CACHELINE_SIZE})) + std::atomic[NumCPUs()]; + for (int cpu = NumCPUs() - 1; cpu >= 0; cpu--) { + stopped_[cpu].store(false, std::memory_order_relaxed); + } + begins_ = static_cast*>(alloc( + sizeof(begins_[0]) * NumClasses, std::align_val_t{ABSL_CACHELINE_SIZE})); + InitSlabs(slabs, shift, capacity); + +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + // This is needed only for tests that create/destroy slabs, + // w/o this cpu_id_start may contain wrong offset for a new slab. + __rseq_abi.cpu_id_start = 0; #endif } template -inline bool TcmallocSlab::Header::IsLocked() const { - return begin == 0xffffu; +void TcmallocSlab::InitSlabs( + void* slabs, Shift shift, absl::FunctionRef capacity) { + slabs_and_shift_.store({slabs, shift}, std::memory_order_relaxed); + size_t consumed_bytes = + (NumClasses * sizeof(Header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + bool prev_empty = false; + for (size_t size_class = 1; size_class < NumClasses; ++size_class) { + size_t cap = capacity(size_class); + TC_CHECK_EQ(static_cast(cap), cap); + // One extra element for prefetch/begin marker. + if (!prev_empty) { + consumed_bytes += sizeof(void*); + } + prev_empty = cap == 0; + begins_[size_class].store(consumed_bytes / sizeof(void*), + std::memory_order_relaxed); + consumed_bytes += cap * sizeof(void*); + if (consumed_bytes > (1 << ToUint8(shift))) { + TC_BUG("per-CPU memory exceeded, have %v, need %v, size_class %v", + 1 << ToUint8(shift), consumed_bytes, size_class); + } + } } template -inline void TcmallocSlab::Header::Lock() { - // Write 0xffff to begin and 0 to end. This blocks new Push'es and Pop's. - // Note: we write only 4 bytes. The first 4 bytes are left intact. - // See Drain method for details. tl;dr: C++ does not allow us to legally - // express this without undefined behavior. - std::atomic* p = - reinterpret_cast*>(&lock_update); - Header hdr; - hdr.begin = 0xffffu; - hdr.end = 0; - p->store(absl::bit_cast(hdr.lock_update), std::memory_order_relaxed); +void TcmallocSlab::InitCpu( + int cpu, absl::FunctionRef capacity) { + ScopedSlabCpuStop cpu_stop(*this, cpu); + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + InitCpuImpl(slabs, shift, cpu, capacity); } template -void TcmallocSlab::Init(void*(alloc)(size_t size), - size_t (*capacity)(size_t cl), bool lazy, - size_t shift) { -#ifdef __x86_64__ - if (UsingFlatVirtualCpus()) { - virtual_cpu_id_offset_ = offsetof(kernel_rseq, vcpu_id); - } -#endif // __x86_64__ - - shift_ = shift; - size_t mem_size = absl::base_internal::NumCPUs() * (1ul << shift); - void* backing = alloc(mem_size); - // MSan does not see writes in assembly. - ANNOTATE_MEMORY_IS_INITIALIZED(backing, mem_size); - if (!lazy) { - memset(backing, 0, mem_size); - } - slabs_ = static_cast(backing); - size_t bytes_used = 0; - for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { - bytes_used += sizeof(std::atomic) * NumClasses; - void** elems = CpuMemoryStart(cpu)->mem; - - for (size_t cl = 0; cl < NumClasses; ++cl) { - size_t cap = capacity(cl); - CHECK_CONDITION(static_cast(cap) == cap); - - if (cap == 0) { - continue; - } - - if (cap) { - if (!lazy) { - // In Pop() we prefetch the item a subsequent Pop() would return; this - // is slow if it's not a valid pointer. To avoid this problem when - // popping the last item, keep one fake item before the actual ones - // (that points, safely, to itself.) - *elems = elems; - elems++; - } - - // One extra element for prefetch - bytes_used += (cap + 1) * sizeof(void*); - } - - if (!lazy) { - // TODO(ckennelly): Consolidate this initialization logic with that in - // InitCPU. - size_t offset = elems - reinterpret_cast(CpuMemoryStart(cpu)); - CHECK_CONDITION(static_cast(offset) == offset); - - Header hdr; - hdr.current = offset; - hdr.begin = offset; - hdr.end = offset; - hdr.end_copy = offset; - - StoreHeader(GetHeader(cpu, cl), hdr); - } - - elems += cap; - CHECK_CONDITION(reinterpret_cast(elems) - - reinterpret_cast(CpuMemoryStart(cpu)) <= - (1 << shift_)); +void TcmallocSlab::InitCpuImpl( + void* slabs, Shift shift, int cpu, + absl::FunctionRef capacity) { + TC_CHECK(stopped_[cpu].load(std::memory_order_relaxed)); + TC_CHECK_LE((1 << ToUint8(shift)), (1 << 16) * sizeof(void*)); + + // Initialize prefetch target and compute the offsets for the + // boundaries of each size class' cache. + void* curr_slab = CpuMemoryStart(slabs, shift, cpu); + void** elems = reinterpret_cast( + (reinterpret_cast(GetHeader(slabs, shift, cpu, NumClasses)) + + sizeof(void*) - 1) & + ~(sizeof(void*) - 1)); + bool prev_empty = false; + for (size_t size_class = 1; size_class < NumClasses; ++size_class) { + size_t cap = capacity(size_class); + TC_CHECK_EQ(static_cast(cap), cap); + + // This item serves both as the marker of slab begin (Pop checks for low bit + // set to understand that it reached begin), and as prefetching stub + // (Pop prefetches the previous element and prefetching an invalid pointer + // is slow, this is a valid pointer for prefetching). + if (!prev_empty) { + *elems = reinterpret_cast(reinterpret_cast(elems) | + kBeginMark); + ++elems; + } + prev_empty = cap == 0; + + Header hdr = {}; + hdr.current = elems - reinterpret_cast(curr_slab); + hdr.end = hdr.current; + StoreHeader(GetHeader(slabs, shift, cpu, size_class), hdr); + + elems += cap; + const size_t bytes_used_on_curr_slab = + reinterpret_cast(elems) - reinterpret_cast(curr_slab); + if (bytes_used_on_curr_slab > (1 << ToUint8(shift))) { + TC_BUG("per-CPU memory exceeded, have %v, need %v", 1 << ToUint8(shift), + bytes_used_on_curr_slab); } - } - // Check for less than 90% usage of the reserved memory - if (bytes_used * 10 < 9 * mem_size) { - Log(kLog, __FILE__, __LINE__, "Bytes used per cpu of available", bytes_used, - mem_size); } } +#if TCMALLOC_INTERNAL_PERCPU_USE_RSEQ template -void TcmallocSlab::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - - // TODO(ckennelly): Consolidate this logic with Drain. - // Phase 1: verify no header is locked - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - CHECK_CONDITION(!hdr.IsLocked()); - } - - // Phase 2: Stop concurrent mutations. Locking ensures that there exists no - // value of current such that begin < current. - for (bool done = false; !done;) { - for (size_t cl = 0; cl < NumClasses; ++cl) { - // Note: this reinterpret_cast and write in Lock lead to undefined - // behavior, because the actual object type is std::atomic. But - // C++ does not allow to legally express what we need here: atomic writes - // of different sizes. - reinterpret_cast(GetHeader(cpu, cl))->Lock(); +std::pair TcmallocSlab::CacheCpuSlabSlow() { + TC_ASSERT(!(tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK)); + int vcpu = -1; + for (;;) { + tcmalloc_slabs = TCMALLOC_CACHED_SLABS_MASK; + CompilerBarrier(); + vcpu = VirtualCpu::Synchronize(); + auto slabs_and_shift = slabs_and_shift_.load(std::memory_order_relaxed); + const auto [slabs, shift] = slabs_and_shift.Get(); + void* start = CpuMemoryStart(slabs, shift, vcpu); + uintptr_t new_val = + reinterpret_cast(start) | TCMALLOC_CACHED_SLABS_MASK; + if (!StoreCurrentCpu(&tcmalloc_slabs, new_val)) { + continue; + } + // If ResizeSlabs is concurrently modifying slabs_and_shift_, we may + // cache the offset with the shift that won't match slabs pointer used + // by Push/Pop operations later. To avoid this, we check stopped_ after + // the calculation. Coupled with setting of stopped_ and a Fence + // in ResizeSlabs, this prevents possibility of mismatching shift/slabs. + CompilerBarrier(); + if (stopped_[vcpu].load(std::memory_order_acquire)) { + tcmalloc_slabs = 0; + return {-1, true}; } - FenceCpu(cpu, virtual_cpu_id_offset); - done = true; - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - if (!hdr.IsLocked()) { - // Header was overwritten by Grow/Shrink. Retry. - done = false; - break; - } + // Ensure that we've cached the current slabs pointer. + // Without this check the following bad interleaving is possible. + // Thread 1 executes ResizeSlabs, stops all CPUs and executes Fence. + // Now thread 2 executes CacheCpuSlabSlow, reads old slabs and caches + // the pointer. Now thread 1 stores the new slabs pointer and resets + // stopped_[cpu]. Now thread 2 resumes, checks that stopped_[cpu] is not + // set and proceeds with using the old slabs pointer. Since we use + // acquire/release on stopped_[cpu], if this thread observes reset + // stopped_[cpu], it's also guaranteed to observe the new value of slabs + // and retry. In the very unlikely case that slabs are resized twice in + // between (to new slabs and then back to old slabs), the check below will + // not lead to a retry, but changing slabs back also implies another Fence, + // so this thread won't have old slabs cached already (Fence invalidates + // the cached pointer). + if (slabs_and_shift != slabs_and_shift_.load(std::memory_order_relaxed)) { + continue; } + return {vcpu, true}; } +} +#endif - // Phase 3: Initialize prefetch target and compute the offsets for the - // boundaries of each size class' cache. - void** elems = CpuMemoryStart(cpu)->mem; - uint16_t begin[NumClasses]; - for (size_t cl = 0; cl < NumClasses; ++cl) { - size_t cap = capacity(cl); - CHECK_CONDITION(static_cast(cap) == cap); - - if (cap) { - // In Pop() we prefetch the item a subsequent Pop() would return; this is - // slow if it's not a valid pointer. To avoid this problem when popping - // the last item, keep one fake item before the actual ones (that points, - // safely, to itself.) - *elems = elems; - elems++; +template +void TcmallocSlab::DrainCpu(void* slabs, Shift shift, int cpu, + DrainHandler drain_handler) { + TC_ASSERT(stopped_[cpu].load(std::memory_order_relaxed)); + for (size_t size_class = 1; size_class < NumClasses; ++size_class) { + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + auto* hdrp = GetHeader(slabs, shift, cpu, size_class); + Header hdr = LoadHeader(hdrp); + if (hdr.current == 0) { + continue; } + const size_t size = hdr.current - begin; + const size_t cap = hdr.end - begin; - size_t offset = elems - reinterpret_cast(CpuMemoryStart(cpu)); - CHECK_CONDITION(static_cast(offset) == offset); - begin[cl] = offset; - - elems += cap; - CHECK_CONDITION(reinterpret_cast(elems) - - reinterpret_cast(CpuMemoryStart(cpu)) <= - (1 << shift_)); + void** batch = + reinterpret_cast(CpuMemoryStart(slabs, shift, cpu)) + begin; + TSANAcquireBatch(batch, size); + drain_handler(cpu, size_class, batch, size, cap); + hdr.current = begin; + hdr.end = begin; + StoreHeader(hdrp, hdr); } +} - // Phase 4: Store current. No restartable sequence will proceed - // (successfully) as !(begin < current) for all size classes. - for (size_t cl = 0; cl < NumClasses; ++cl) { - std::atomic* hdrp = GetHeader(cpu, cl); +template +void TcmallocSlab::DrainOldSlabs( + void* slabs, Shift shift, int cpu, + const std::array& old_begins, + DrainHandler drain_handler) { + for (size_t size_class = 1; size_class < NumClasses; ++size_class) { + uint16_t begin = old_begins[size_class]; + auto* hdrp = GetHeader(slabs, shift, cpu, size_class); Header hdr = LoadHeader(hdrp); - hdr.current = begin[cl]; + if (hdr.current == 0) { + continue; + } + const size_t size = hdr.current - begin; + const size_t cap = hdr.end - begin; + + void** batch = + reinterpret_cast(CpuMemoryStart(slabs, shift, cpu)) + begin; + TSANAcquireBatch(batch, size); + drain_handler(cpu, size_class, batch, size, cap); + hdr.current = begin; + hdr.end = begin; StoreHeader(hdrp, hdr); } - FenceCpu(cpu, virtual_cpu_id_offset); - - // Phase 5: Allow access to this cache. - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr; - hdr.current = begin[cl]; - hdr.begin = begin[cl]; - hdr.end = begin[cl]; - hdr.end_copy = begin[cl]; - StoreHeader(GetHeader(cpu, cl), hdr); - } } template -void TcmallocSlab::Destroy(void(free)(void*)) { - free(slabs_); - slabs_ = nullptr; -} +ResizeSlabsInfo TcmallocSlab::UpdateMaxCapacities( + void* new_slabs, absl::FunctionRef capacity, + absl::FunctionRef update_capacity, + absl::FunctionRef populated, DrainHandler drain_handler, + PerSizeClassMaxCapacity* new_max_capacity, int classes_to_resize) { + // Phase 1: Stop all CPUs and initialize any CPUs in the new slab that have + // already been populated in the old slab. + const auto [old_slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + std::array old_begins; + for (int size_class = 1; size_class < NumClasses; ++size_class) { + old_begins[size_class] = + begins_[size_class].load(std::memory_order_relaxed); + } -template -size_t TcmallocSlab::ShrinkOtherCache(int cpu, size_t cl, - size_t len, void* ctx, - ShrinkHandler f) { - ASSERT(cpu >= 0); - ASSERT(cpu < absl::base_internal::NumCPUs()); - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - - // Phase 1: Collect begin as it will be overwritten by the lock. - std::atomic* hdrp = GetHeader(cpu, cl); - Header hdr = LoadHeader(hdrp); - CHECK_CONDITION(!hdr.IsLocked()); - const uint16_t begin = hdr.begin; - - // Phase 2: stop concurrent mutations. - for (bool done = false; !done;) { - reinterpret_cast(GetHeader(cpu, cl))->Lock(); - FenceCpu(cpu, virtual_cpu_id_offset); - done = true; - - hdr = LoadHeader(GetHeader(cpu, cl)); - if (!hdr.IsLocked()) { - // Header was overwritten by Grow/Shrink. Retry. - done = false; - } + const int num_cpus = NumCPUs(); + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed)); + stopped_[cpu].store(true, std::memory_order_relaxed); } + FenceAllCpus(); - // Phase 3: If we do not have len number of items to shrink, we try - // to pop items from the list first to create enough capacity that can be - // shrunk. If we pop items, we also execute callbacks. - // - // We can't write all 4 fields at once with a single write, because Pop does - // several non-atomic loads of the fields. Consider that a concurrent Pop - // loads old current (still pointing somewhere in the middle of the region); - // then we update all fields with a single write; then Pop loads the updated - // begin which allows it to proceed; then it decrements current below begin. - // - // So we instead first just update current--our locked begin/end guarantee - // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop - // is using the old current, and can safely update begin/end to be an empty - // slab. - - const uint16_t unused = hdr.end_copy - hdr.current; - if (unused < len) { - const uint16_t expected_pop = len - unused; - const uint16_t actual_pop = - std::min(expected_pop, hdr.current - begin); - void** batch = - reinterpret_cast(GetHeader(cpu, 0) + hdr.current - actual_pop); - f(ctx, cl, batch, actual_pop); - hdr.current -= actual_pop; - StoreHeader(hdrp, hdr); - FenceCpu(cpu, virtual_cpu_id_offset); + // Phase 2: Update max capacity of the size classes. + for (int i = 0; i < classes_to_resize; ++i) { + size_t size_class = new_max_capacity[i].size_class; + size_t cap = new_max_capacity[i].max_capacity; + update_capacity(size_class, cap); } - // Phase 4: Shrink the capacity. Use a copy of begin and end_copy to - // restore the header, shrink it, and return the length by which the - // region was shrunk. - hdr.begin = begin; - const uint16_t to_shrink = - std::min(len, hdr.end_copy - hdr.current); - hdr.end_copy -= to_shrink; - hdr.end = hdr.end_copy; - StoreHeader(hdrp, hdr); - return to_shrink; + // Phase 3: Initialize slabs. + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + if (!populated(cpu)) continue; + InitCpuImpl(new_slabs, shift, cpu, capacity); + } + InitSlabs(new_slabs, shift, capacity); + + // Phase 4: Re-start all CPUs. + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + stopped_[cpu].store(false, std::memory_order_release); + } + + // Phase 5: Return pointers from the old slab to the TransferCache. + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + if (!populated(cpu)) continue; + DrainOldSlabs(old_slabs, shift, cpu, old_begins, drain_handler); + } + return {old_slabs, GetSlabsAllocSize(shift, num_cpus)}; } template -void TcmallocSlab::Drain(int cpu, void* ctx, DrainHandler f) { - CHECK_CONDITION(cpu >= 0); - CHECK_CONDITION(cpu < absl::base_internal::NumCPUs()); - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - - // Push/Pop/Grow/Shrink can be executed concurrently with Drain. - // That's not an expected case, but it must be handled for correctness. - // Push/Pop/Grow/Shrink can only be executed on and use rseq primitives. - // Push only updates current. Pop only updates current and end_copy - // (it mutates only current but uses 4 byte write for performance). - // Grow/Shrink mutate end and end_copy using 64-bit stores. - - // We attempt to stop all concurrent operations by writing 0xffff to begin - // and 0 to end. However, Grow/Shrink can overwrite our write, so we do this - // in a loop until we know that the header is in quiescent state. - - // Phase 1: collect all begin's (these are not mutated by anybody else). - uint16_t begin[NumClasses]; - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - CHECK_CONDITION(!hdr.IsLocked()); - begin[cl] = hdr.begin; +auto TcmallocSlab::ResizeSlabs( + Shift new_shift, void* new_slabs, + absl::FunctionRef capacity, + absl::FunctionRef populated, DrainHandler drain_handler) + -> ResizeSlabsInfo { + // Phase 1: Collect begins, stop all CPUs and initialize any CPUs in the new + // slab that have already been populated in the old slab. + const auto [old_slabs, old_shift] = + GetSlabsAndShift(std::memory_order_relaxed); + std::array old_begins; + for (int size_class = 1; size_class < NumClasses; ++size_class) { + old_begins[size_class] = + begins_[size_class].load(std::memory_order_relaxed); } - // Phase 2: stop concurrent mutations. - for (bool done = false; !done;) { - for (size_t cl = 0; cl < NumClasses; ++cl) { - // Note: this reinterpret_cast and write in Lock lead to undefined - // behavior, because the actual object type is std::atomic. But - // C++ does not allow to legally express what we need here: atomic writes - // of different sizes. - reinterpret_cast(GetHeader(cpu, cl))->Lock(); - } - FenceCpu(cpu, virtual_cpu_id_offset); - done = true; - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - if (!hdr.IsLocked()) { - // Header was overwritten by Grow/Shrink. Retry. - done = false; - break; - } + TC_ASSERT_NE(new_shift, old_shift); + const int num_cpus = NumCPUs(); + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed)); + stopped_[cpu].store(true, std::memory_order_relaxed); + if (populated(cpu)) { + InitCpuImpl(new_slabs, new_shift, cpu, capacity); } } + FenceAllCpus(); - // Phase 3: execute callbacks. - for (size_t cl = 0; cl < NumClasses; ++cl) { - Header hdr = LoadHeader(GetHeader(cpu, cl)); - // We overwrote begin and end, instead we use our local copy of begin - // and end_copy. - size_t n = hdr.current - begin[cl]; - size_t cap = hdr.end_copy - begin[cl]; - void** batch = reinterpret_cast(GetHeader(cpu, 0) + begin[cl]); - f(ctx, cl, batch, n, cap); + // Phase 2: Atomically update slabs and shift. + InitSlabs(new_slabs, new_shift, capacity); + + // Phase 3: Re-start all CPUs. + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + stopped_[cpu].store(false, std::memory_order_release); } - // Phase 4: reset current to beginning of the region. - // We can't write all 4 fields at once with a single write, because Pop does - // several non-atomic loads of the fields. Consider that a concurrent Pop - // loads old current (still pointing somewhere in the middle of the region); - // then we update all fields with a single write; then Pop loads the updated - // begin which allows it to proceed; then it decrements current below begin. - // - // So we instead first just update current--our locked begin/end guarantee - // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop - // is using the old current, and can safely update begin/end to be an empty - // slab. - for (size_t cl = 0; cl < NumClasses; ++cl) { - std::atomic* hdrp = GetHeader(cpu, cl); - Header hdr = LoadHeader(hdrp); - hdr.current = begin[cl]; - StoreHeader(hdrp, hdr); + // Phase 4: Return pointers from the old slab to the TransferCache. + for (size_t cpu = 0; cpu < num_cpus; ++cpu) { + if (!populated(cpu)) continue; + DrainOldSlabs(old_slabs, old_shift, cpu, old_begins, drain_handler); } - // Phase 5: fence and reset the remaining fields to beginning of the region. - // This allows concurrent mutations again. - FenceCpu(cpu, virtual_cpu_id_offset); - for (size_t cl = 0; cl < NumClasses; ++cl) { - std::atomic* hdrp = GetHeader(cpu, cl); - Header hdr; - hdr.current = begin[cl]; - hdr.begin = begin[cl]; - hdr.end = begin[cl]; - hdr.end_copy = begin[cl]; - StoreHeader(hdrp, hdr); + return {old_slabs, GetSlabsAllocSize(old_shift, num_cpus)}; +} + +template +void* TcmallocSlab::Destroy( + absl::FunctionRef free) { + free(stopped_, sizeof(stopped_[0]) * NumCPUs(), + std::align_val_t{ABSL_CACHELINE_SIZE}); + stopped_ = nullptr; + free(begins_, sizeof(begins_[0]) * NumClasses, + std::align_val_t{ABSL_CACHELINE_SIZE}); + begins_ = nullptr; + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + free(slabs, GetSlabsAllocSize(shift, NumCPUs()), kPhysicalPageAlign); + slabs_and_shift_.store({nullptr, shift}, std::memory_order_relaxed); + return slabs; +} + +template +size_t TcmallocSlab::GrowOtherCache( + int cpu, size_t size_class, size_t len, + absl::FunctionRef max_capacity) { + TC_ASSERT(stopped_[cpu].load(std::memory_order_relaxed)); + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + const size_t max_cap = max_capacity(ToUint8(shift)); + auto* hdrp = GetHeader(slabs, shift, cpu, size_class); + Header hdr = LoadHeader(hdrp); + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + uint16_t to_grow = std::min(len, max_cap - (hdr.end - begin)); + hdr.end += to_grow; + StoreHeader(hdrp, hdr); + return to_grow; +} + +template +size_t TcmallocSlab::ShrinkOtherCache( + int cpu, size_t size_class, size_t len, ShrinkHandler shrink_handler) { + TC_ASSERT(stopped_[cpu].load(std::memory_order_relaxed)); + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + + auto* hdrp = GetHeader(slabs, shift, cpu, size_class); + Header hdr = LoadHeader(hdrp); + + // If we do not have len number of items to shrink, we try to pop items from + // the list first to create enough capacity that can be shrunk. + // If we pop items, we also execute callbacks. + const uint16_t unused = hdr.end - hdr.current; + uint16_t begin = begins_[size_class].load(std::memory_order_relaxed); + if (unused < len && hdr.current != begin) { + uint16_t pop = std::min(len - unused, hdr.current - begin); + void** batch = reinterpret_cast(CpuMemoryStart(slabs, shift, cpu)) + + hdr.current - pop; + TSANAcquireBatch(batch, pop); + shrink_handler(size_class, batch, pop); + hdr.current -= pop; } + + // Shrink the capacity. + const uint16_t to_shrink = std::min(len, hdr.end - hdr.current); + hdr.end -= to_shrink; + StoreHeader(hdrp, hdr); + return to_shrink; +} + +template +void TcmallocSlab::Drain(int cpu, DrainHandler drain_handler) { + ScopedSlabCpuStop cpu_stop(*this, cpu); + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + DrainCpu(slabs, shift, cpu, drain_handler); +} + +template +void TcmallocSlab::StopCpu(int cpu) { + TC_ASSERT(cpu >= 0 && cpu < NumCPUs(), "cpu=%d", cpu); + TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed)); + stopped_[cpu].store(true, std::memory_order_relaxed); + FenceCpu(cpu); +} + +template +void TcmallocSlab::StartCpu(int cpu) { + TC_ASSERT(cpu >= 0 && cpu < NumCPUs(), "cpu=%d", cpu); + TC_ASSERT(stopped_[cpu].load(std::memory_order_relaxed)); + stopped_[cpu].store(false, std::memory_order_release); } template PerCPUMetadataState TcmallocSlab::MetadataMemoryUsage() const { PerCPUMetadataState result; - result.virtual_size = absl::base_internal::NumCPUs() * (1ul << shift_); - result.resident_size = MInCore::residence(slabs_, result.virtual_size); + const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed); + size_t slabs_size = GetSlabsAllocSize(shift, NumCPUs()); + size_t stopped_size = NumCPUs() * sizeof(stopped_[0]); + size_t begins_size = NumClasses * sizeof(begins_[0]); + result.virtual_size = stopped_size + slabs_size + begins_size; + result.resident_size = MInCore::residence(slabs, slabs_size); return result; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc index 39f07fbe670b..1d3e42949e50 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc @@ -14,31 +14,57 @@ #include "tcmalloc/internal/percpu_tcmalloc.h" +#include #include +#include #include #include #include +#include +#include + +#include "absl/functional/function_ref.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/percpu.h" + +#if defined(__linux__) +#include +#else +#include +#endif + +#include +#include #include +#include +#include +#include +#include +#include #include // NOLINT(build/c++11) +#include +#include #include +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/internal/sysinfo.h" +#include "absl/base/call_once.h" +#include "absl/base/thread_annotations.h" #include "absl/container/fixed_array.h" #include "absl/container/flat_hash_set.h" -#include "absl/debugging/symbolize.h" #include "absl/random/random.h" #include "absl/random/seed_sequences.h" -#include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "absl/types/span.h" -#include "benchmark/benchmark.h" +#include "tcmalloc/internal/affinity.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/internal/sysinfo.h" #include "tcmalloc/internal/util.h" #include "tcmalloc/malloc_extension.h" #include "tcmalloc/testing/testutil.h" @@ -52,203 +78,112 @@ namespace { using testing::Each; using testing::UnorderedElementsAreArray; -// Choose an available CPU and executes the passed functor on it. The -// cpu that is chosen, as long as a valid disjoint remote CPU will be passed -// as arguments to it. -// -// If the functor believes that it has failed in a manner attributable to -// external modification, then it should return false and we will attempt to -// retry the operation (up to a constant limit). -void RunOnSingleCpuWithRemoteCpu(std::function test) { - constexpr int kMaxTries = 1000; - - for (int i = 0; i < kMaxTries; i++) { - auto allowed = AllowedCpus(); - - int target_cpu = allowed[0], remote_cpu; - - // We try to pass something actually within the mask, but, for most tests it - // only needs to exist. - if (allowed.size() > 1) - remote_cpu = allowed[1]; - else - remote_cpu = target_cpu ? 0 : 1; - - ScopedAffinityMask mask(target_cpu); - - // If the test function failed, assert that the mask was tampered with. - if (!test(target_cpu, remote_cpu)) - ASSERT_TRUE(mask.Tampered()); - else - return; - } +constexpr size_t kStressSlabs = 5; +constexpr size_t kStressCapacity = 4; +constexpr size_t kMaxStressCapacity = kStressCapacity * 2; - ASSERT_TRUE(false); -} +constexpr size_t kShift = 18; +typedef class TcmallocSlab TcmallocSlab; -// Equivalent to RunOnSingleCpuWithRemoteCpu, except that only the CPU the -// functor is executing on is passed. -void RunOnSingleCpu(std::function test) { - auto wrapper = [&test](int this_cpu, int unused) { return test(this_cpu); }; - RunOnSingleCpuWithRemoteCpu(wrapper); +void* AllocSlabs(absl::FunctionRef alloc, + size_t raw_shift) { + Shift shift = ToShiftType(raw_shift); + const size_t slabs_size = GetSlabsAllocSize(shift, NumCPUs()); + return alloc(slabs_size, kPhysicalPageAlign); } -constexpr size_t kStressSlabs = 4; -constexpr size_t kStressCapacity = 4; +void InitSlab(TcmallocSlab& slab, + absl::FunctionRef alloc, + absl::FunctionRef capacity, size_t raw_shift) { + void* slabs = AllocSlabs(alloc, raw_shift); + slab.Init(alloc, slabs, capacity, ToShiftType(raw_shift)); +} -constexpr size_t kShift = 18; -typedef class TcmallocSlab TcmallocSlab; +struct GetMaxCapacity { + size_t operator()(size_t size_class) const { + if (size_class >= kStressSlabs) return 0; + return max_capacities[size_class].load(std::memory_order_relaxed); + } -enum class SlabInit { - kEager, - kLazy, + const std::atomic* max_capacities; }; -class TcmallocSlabTest : public testing::TestWithParam { - protected: +class TcmallocSlabTest : public testing::Test { + public: TcmallocSlabTest() { - slab_test_ = &slab_; - metadata_bytes_ = 0; - // Ignore false-positive warning in GCC. For more information, see: // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96003 #pragma GCC diagnostic ignored "-Wnonnull" - slab_.Init( - &ByteCountingMalloc, [](size_t cl) { return kCapacity; }, - GetParam() == SlabInit::kLazy, kShift); - - for (int i = 0; i < kCapacity; ++i) { - object_ptrs_[i] = &objects_[i]; - } - } - - ~TcmallocSlabTest() override { slab_.Destroy(free); } - - template - static int ExpectOverflow(int cpu, size_t cl, void* item) { - EXPECT_EQ(cpu, current_cpu_); - EXPECT_EQ(cl, current_cl_); - EXPECT_FALSE(overflow_called_); - overflow_called_ = true; - return result; - } - - template - static void* ExpectUnderflow(int cpu, size_t cl) { - EXPECT_EQ(cpu, current_cpu_); - EXPECT_EQ(cl, current_cl_); - EXPECT_LT(result_object, kCapacity); - EXPECT_FALSE(underflow_called_); - underflow_called_ = true; - return &objects_[result_object]; - } - - template - bool PushExpectOverflow(TcmallocSlab* slab, size_t cl, void* item) { - bool res = slab->Push(cl, item, ExpectOverflow); - EXPECT_TRUE(overflow_called_); - overflow_called_ = false; - return res; + InitSlab( + slab_, + [&](size_t size, std::align_val_t align) { + return ByteCountingMalloc(size, align); + }, + [](size_t) { return kCapacity; }, kShift); } - template - void* PopExpectUnderflow(TcmallocSlab* slab, size_t cl) { - void* res = slab->Pop(cl, ExpectUnderflow); - EXPECT_TRUE(underflow_called_); - underflow_called_ = false; - return res; - } + ~TcmallocSlabTest() override { slab_.Destroy(sized_aligned_delete); } - static void* ByteCountingMalloc(size_t size) { - const size_t kPageSize = getpagesize(); - void* ptr; - CHECK_CONDITION(posix_memalign(&ptr, kPageSize, size) == 0); - if (ptr) { - // Emulate obtaining memory as if we got it from mmap (zero'd). - memset(ptr, 0, size); + void* ByteCountingMalloc(size_t size, std::align_val_t alignment) { + void* ptr = ::operator new(size, alignment); + // Emulate obtaining memory as if we got it from mmap (zero'd). + memset(ptr, 0, size); + if (static_cast(alignment) >= GetPageSize()) { madvise(ptr, size, MADV_DONTNEED); - metadata_bytes_ += size; } + metadata_bytes_ += size; return ptr; } TcmallocSlab slab_; - static constexpr size_t kCapacity = 10; - static char objects_[kCapacity]; - static void* object_ptrs_[kCapacity]; - static int current_cpu_; - static size_t current_cl_; - static bool overflow_called_; - static bool underflow_called_; - static TcmallocSlab* slab_test_; - static size_t metadata_bytes_; + size_t metadata_bytes_ = 0; }; -static int ExpectNoOverflow(int cpu, size_t cl, void* item) { - CHECK_CONDITION(false && "overflow is not expected"); - return 0; -} - -static void* ExpectNoUnderflow(int cpu, size_t cl) { - CHECK_CONDITION(false && "underflow is not expected"); - return nullptr; -} - -char TcmallocSlabTest::objects_[TcmallocSlabTest::kCapacity]; -void* TcmallocSlabTest::object_ptrs_[TcmallocSlabTest::kCapacity]; -int TcmallocSlabTest::current_cpu_; -size_t TcmallocSlabTest::current_cl_; -bool TcmallocSlabTest::overflow_called_; -bool TcmallocSlabTest::underflow_called_; -TcmallocSlab* TcmallocSlabTest::slab_test_; -size_t TcmallocSlabTest::metadata_bytes_; - -TEST_P(TcmallocSlabTest, Metadata) { +TEST_F(TcmallocSlabTest, Metadata) { PerCPUMetadataState r = slab_.MetadataMemoryUsage(); ASSERT_GT(metadata_bytes_, 0); EXPECT_EQ(r.virtual_size, metadata_bytes_); - if (GetParam() == SlabInit::kLazy) { - EXPECT_EQ(r.resident_size, 0); + EXPECT_EQ(r.resident_size, 0); - if (!IsFast()) { - GTEST_SKIP() << "Need fast percpu. Skipping."; - return; - } + if (!IsFast()) { + GTEST_SKIP() << "Need fast percpu. Skipping."; + return; + } - // Initialize a core. Verify that the increased RSS is proportional to a - // core. - slab_.InitCPU(0, [](size_t cl) { return kCapacity; }); - - r = slab_.MetadataMemoryUsage(); - // We may fault a whole hugepage, so round up the expected per-core share to - // a full hugepage. - size_t expected = r.virtual_size / absl::base_internal::NumCPUs(); - expected = (expected + kHugePageSize - 1) & ~(kHugePageSize - 1); - - // A single core may be less than the full slab for that core, since we do - // not touch every page within the slab. - EXPECT_GE(expected, r.resident_size); - - // Read stats from the slab. This will fault additional memory. - for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; ++cpu) { - // To inhibit optimization, verify the values are sensible. - for (int cl = 0; cl < kStressSlabs; ++cl) { - EXPECT_EQ(0, slab_.Length(cpu, cl)); - EXPECT_EQ(0, slab_.Capacity(cpu, cl)); - } + // Initialize a core. Verify that the increased RSS is proportional to a + // core. + slab_.InitCpu(0, [](size_t size_class) { return kCapacity; }); + + r = slab_.MetadataMemoryUsage(); + // We may fault a whole hugepage, so round up the expected per-core share to + // a full hugepage. + size_t expected = r.virtual_size / NumCPUs(); + expected = (expected + kHugePageSize - 1) & ~(kHugePageSize - 1); + + // A single core may be less than the full slab for that core, since we do + // not touch every page within the slab. + EXPECT_GE(expected, r.resident_size); + // We expect to have touched at least one page, so resident size should be a + // non-zero number of bytes. + EXPECT_GT(r.resident_size, 0); + + // Read stats from the slab. This will fault additional memory. + for (int cpu = 0, n = NumCPUs(); cpu < n; ++cpu) { + // To inhibit optimization, verify the values are sensible. + for (int size_class = 1; size_class < kStressSlabs; ++size_class) { + EXPECT_EQ(0, slab_.Length(cpu, size_class)); + EXPECT_EQ(0, slab_.Capacity(cpu, size_class)); } - - PerCPUMetadataState post_stats = slab_.MetadataMemoryUsage(); - EXPECT_LE(post_stats.resident_size, metadata_bytes_); - EXPECT_GT(post_stats.resident_size, r.resident_size); - } else { - EXPECT_EQ(r.resident_size, metadata_bytes_); } + + PerCPUMetadataState post_stats = slab_.MetadataMemoryUsage(); + EXPECT_LE(post_stats.resident_size, metadata_bytes_); + EXPECT_GT(post_stats.resident_size, r.resident_size); } -TEST_P(TcmallocSlabTest, Unit) { +TEST_F(TcmallocSlabTest, Unit) { if (MallocExtension::PerCpuCachesActive()) { // This test unregisters rseq temporarily, as to decrease flakiness. GTEST_SKIP() << "per-CPU TCMalloc is incompatible with unregistering rseq"; @@ -261,8 +196,13 @@ TEST_P(TcmallocSlabTest, Unit) { // Decide if we should expect a push or pop to be the first action on the CPU // slab to trigger initialization. - absl::FixedArray initialized(absl::base_internal::NumCPUs(), - GetParam() != SlabInit::kLazy); + absl::FixedArray initialized(NumCPUs(), false); + + void* objects[kCapacity]; + void* object_ptrs[kCapacity]; + for (int i = 0; i < kCapacity; ++i) { + object_ptrs[i] = &objects[i]; + } for (auto cpu : AllowedCpus()) { SCOPED_TRACE(cpu); @@ -270,146 +210,135 @@ TEST_P(TcmallocSlabTest, Unit) { // Temporarily fake being on the given CPU. ScopedFakeCpuId fake_cpu_id(cpu); -#if !defined(__ppc__) - if (UsingFlatVirtualCpus()) { -#if TCMALLOC_PERCPU_USE_RSEQ - __rseq_abi.vcpu_id = cpu ^ 1; -#endif - cpu = cpu ^ 1; - } -#endif - current_cpu_ = cpu; - - for (size_t cl = 0; cl < kStressSlabs; ++cl) { - SCOPED_TRACE(cl); - current_cl_ = cl; - -#ifdef __ppc__ - // This is imperfect but the window between operations below is small. We - // can make this more precise around individual operations if we see - // measurable flakiness as a result. - if (fake_cpu_id.Tampered()) break; -#endif + for (size_t size_class = 1; size_class < kStressSlabs; ++size_class) { + SCOPED_TRACE(size_class); // Check new slab state. - ASSERT_EQ(slab_.Length(cpu, cl), 0); - ASSERT_EQ(slab_.Capacity(cpu, cl), 0); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Capacity(cpu, size_class), 0); if (!initialized[cpu]) { -#pragma GCC diagnostic ignored "-Wnonnull" - void* ptr = slab_.Pop(cl, [](int cpu, size_t cl) { - slab_test_->InitCPU(cpu, [](size_t cl) { return kCapacity; }); - - return static_cast(slab_test_); - }); - - ASSERT_TRUE(ptr == slab_test_); + ASSERT_EQ(slab_.Pop(size_class), nullptr); + slab_.InitCpu(cpu, [](size_t size_class) { return kCapacity; }); initialized[cpu] = true; } - // Test overflow/underflow handlers. - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); - ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0])); - ASSERT_FALSE(PushExpectOverflow<-2>(&slab_, cl, &objects_[0])); - ASSERT_TRUE(PushExpectOverflow<0>(&slab_, cl, &objects_[0])); + // Test that operations on uncached slab fail. + ASSERT_EQ(slab_.Pop(size_class), nullptr); + EXPECT_FALSE(slab_.Push(size_class, &objects[0])); + EXPECT_FALSE(slab_.Push(size_class, &objects[0])); + EXPECT_FALSE(slab_.Push(size_class, &objects[0])); + const auto max_capacity = [](uint8_t shift) { return kCapacity; }; + ASSERT_EQ(slab_.Grow(cpu, size_class, 1, max_capacity), 0); + { + auto [got_cpu, cached] = slab_.CacheCpuSlab(); + ASSERT_TRUE(cached); + ASSERT_EQ(got_cpu, cpu); + } + { + auto [got_cpu, cached] = slab_.CacheCpuSlab(); + ASSERT_FALSE(cached); + ASSERT_EQ(got_cpu, cpu); + } // Grow capacity to kCapacity / 2. - ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2); - ASSERT_EQ(slab_.Length(cpu, cl), 0); - ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2); - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); - ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow)); - ASSERT_EQ(slab_.Length(cpu, cl), 1); - ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2); - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[0]); - ASSERT_EQ(slab_.Length(cpu, cl), 0); + ASSERT_EQ(slab_.Grow(cpu, size_class, kCapacity / 2, max_capacity), + kCapacity / 2); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Capacity(cpu, size_class), kCapacity / 2); + ASSERT_EQ(slab_.Pop(size_class), nullptr); + ASSERT_TRUE(slab_.Push(size_class, &objects[0])); + + ASSERT_EQ(slab_.Length(cpu, size_class), 1); + ASSERT_EQ(slab_.Capacity(cpu, size_class), kCapacity / 2); + ASSERT_EQ(slab_.Pop(size_class), &objects[0]); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); for (size_t i = 0; i < kCapacity / 2; ++i) { - ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow)); - ASSERT_EQ(slab_.Length(cpu, cl), i + 1); + ASSERT_TRUE(slab_.Push(size_class, &objects[i])); + ASSERT_EQ(slab_.Length(cpu, size_class), i + 1); } - ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0])); + EXPECT_FALSE(slab_.Push(size_class, &objects[0])); for (size_t i = kCapacity / 2; i > 0; --i) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]); - ASSERT_EQ(slab_.Length(cpu, cl), i - 1); + ASSERT_EQ(slab_.Pop(size_class), &objects[i - 1]); + ASSERT_EQ(slab_.Length(cpu, size_class), i - 1); } - // Ensure that Shink don't underflow capacity. - ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity / 2); - ASSERT_EQ(slab_.Capacity(cpu, cl), 0); - - // Grow capacity to kCapacity. - ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2); - // Ensure that grow don't overflow max capacity. - ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity, kCapacity), kCapacity / 2); - ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity); + + // Grow capacity to kCapacity and ensure that grow don't overflow max + // capacity. + ASSERT_EQ(slab_.Grow(cpu, size_class, kCapacity, max_capacity), + kCapacity / 2); + ASSERT_EQ(slab_.Capacity(cpu, size_class), kCapacity); for (size_t i = 0; i < kCapacity; ++i) { - ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow)); - ASSERT_EQ(slab_.Length(cpu, cl), i + 1); + ASSERT_TRUE(slab_.Push(size_class, &objects[i])); + ASSERT_EQ(slab_.Length(cpu, size_class), i + 1); } - ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0])); + EXPECT_FALSE(slab_.Push(size_class, &objects[0])); for (size_t i = kCapacity; i > 0; --i) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]); - ASSERT_EQ(slab_.Length(cpu, cl), i - 1); + ASSERT_EQ(slab_.Pop(size_class), &objects[i - 1]); + ASSERT_EQ(slab_.Length(cpu, size_class), i - 1); } - // Ensure that we can't shrink below length. - ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow)); - ASSERT_TRUE(slab_.Push(cl, &objects_[1], ExpectNoOverflow)); - ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity - 2); - ASSERT_EQ(slab_.Capacity(cpu, cl), 2); - // Test Drain. - ASSERT_EQ(slab_.Grow(cpu, cl, 2, kCapacity), 2); - slab_.Drain(cpu, &cl, - [](void* ctx, size_t cl, void** batch, size_t n, size_t cap) { - size_t mycl = *static_cast(ctx); - if (cl == mycl) { - ASSERT_EQ(n, 2); - ASSERT_EQ(cap, 4); - ASSERT_EQ(batch[0], &objects_[0]); - ASSERT_EQ(batch[1], &objects_[1]); - } else { - ASSERT_EQ(n, 0); - ASSERT_EQ(cap, 0); - } - }); - ASSERT_EQ(slab_.Length(cpu, cl), 0); - ASSERT_EQ(slab_.Capacity(cpu, cl), 0); + ASSERT_TRUE(slab_.Push(size_class, &objects[0])); + ASSERT_TRUE(slab_.Push(size_class, &objects[1])); + + slab_.Drain(cpu, [size_class, cpu, &objects]( + int cpu_arg, size_t size_class_arg, void** batch, + size_t size, size_t cap) { + ASSERT_EQ(cpu, cpu_arg); + if (size_class == size_class_arg) { + ASSERT_EQ(size, 2); + ASSERT_EQ(cap, 10); + ASSERT_EQ(batch[0], &objects[0]); + ASSERT_EQ(batch[1], &objects[1]); + } else { + ASSERT_EQ(size, 0); + ASSERT_EQ(cap, 0); + } + }); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Capacity(cpu, size_class), 0); // Test PushBatch/PopBatch. void* batch[kCapacity + 1]; for (size_t i = 0; i < kCapacity; ++i) { - batch[i] = &objects_[i]; + batch[i] = &objects[i]; } - ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0); - ASSERT_EQ(slab_.PushBatch(cl, batch, kCapacity), 0); - ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2); - ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0); + void* slabs_result[kCapacity + 1]; + ASSERT_EQ(slab_.PopBatch(size_class, batch, kCapacity), 0); + ASSERT_EQ(slab_.PushBatch(size_class, batch, kCapacity), 0); + ASSERT_EQ(slab_.Grow(cpu, size_class, kCapacity / 2, max_capacity), + kCapacity / 2); + ASSERT_EQ(slab_.PopBatch(size_class, batch, kCapacity), 0); // Push a batch of size i into empty slab. for (size_t i = 1; i < kCapacity; ++i) { const size_t expect = std::min(i, kCapacity / 2); - ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect); - ASSERT_EQ(slab_.Length(cpu, cl), expect); + ASSERT_EQ(slab_.PushBatch(size_class, batch, i), expect); + ASSERT_EQ(slab_.Length(cpu, size_class), expect); for (size_t j = 0; j < expect; ++j) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), - &objects_[j + (i - expect)]); + slabs_result[j] = slab_.Pop(size_class); } - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); + ASSERT_THAT( + std::vector(&slabs_result[0], &slabs_result[expect]), + UnorderedElementsAreArray(&object_ptrs[i - expect], expect)); + ASSERT_EQ(slab_.Pop(size_class), nullptr); } // Push a batch of size i into non-empty slab. for (size_t i = 1; i < kCapacity / 2; ++i) { const size_t expect = std::min(i, kCapacity / 2 - i); - ASSERT_EQ(slab_.PushBatch(cl, batch, i), i); - ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect); - ASSERT_EQ(slab_.Length(cpu, cl), i + expect); - for (size_t j = 0; j < expect; ++j) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), - static_cast(&objects_[j + (i - expect)])); - } - for (size_t j = 0; j < i; ++j) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), - static_cast(&objects_[j])); + ASSERT_EQ(slab_.PushBatch(size_class, batch, i), i); + ASSERT_EQ(slab_.PushBatch(size_class, batch, i), expect); + ASSERT_EQ(slab_.Length(cpu, size_class), i + expect); + // Because slabs are LIFO fill in this array from the end. + for (int j = i + expect - 1; j >= 0; --j) { + slabs_result[j] = slab_.Pop(size_class); } - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); + ASSERT_THAT(std::vector(&slabs_result[0], &slabs_result[i]), + UnorderedElementsAreArray(&object_ptrs[0], i)); + ASSERT_THAT( + std::vector(&slabs_result[i], &slabs_result[i + expect]), + UnorderedElementsAreArray(&object_ptrs[i - expect], expect)); + ASSERT_EQ(slab_.Pop(size_class), nullptr); } for (size_t i = 0; i < kCapacity + 1; ++i) { batch[i] = nullptr; @@ -417,14 +346,14 @@ TEST_P(TcmallocSlabTest, Unit) { // Pop all elements in a single batch. for (size_t i = 1; i < kCapacity / 2; ++i) { for (size_t j = 0; j < i; ++j) { - ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow)); + ASSERT_TRUE(slab_.Push(size_class, &objects[j])); } - ASSERT_EQ(slab_.PopBatch(cl, batch, i), i); - ASSERT_EQ(slab_.Length(cpu, cl), 0); - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); + ASSERT_EQ(slab_.PopBatch(size_class, batch, i), i); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Pop(size_class), nullptr); ASSERT_THAT(absl::MakeSpan(&batch[0], i), - UnorderedElementsAreArray(&object_ptrs_[0], i)); + UnorderedElementsAreArray(&object_ptrs[0], i)); ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr)); for (size_t j = 0; j < kCapacity + 1; ++j) { batch[j] = nullptr; @@ -433,22 +362,21 @@ TEST_P(TcmallocSlabTest, Unit) { // Pop half of elements in a single batch. for (size_t i = 1; i < kCapacity / 2; ++i) { for (size_t j = 0; j < i; ++j) { - ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow)); + ASSERT_TRUE(slab_.Push(size_class, &objects[j])); } size_t want = std::max(1, i / 2); - ASSERT_EQ(slab_.PopBatch(cl, batch, want), want); - ASSERT_EQ(slab_.Length(cpu, cl), i - want); + ASSERT_EQ(slab_.PopBatch(size_class, batch, want), want); + ASSERT_EQ(slab_.Length(cpu, size_class), i - want); for (size_t j = 0; j < i - want; ++j) { - ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), - static_cast(&objects_[i - want - j - 1])); + ASSERT_EQ(slab_.Pop(size_class), &objects[i - want - j - 1]); } - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); + ASSERT_EQ(slab_.Pop(size_class), nullptr); ASSERT_GE(i, want); ASSERT_THAT(absl::MakeSpan(&batch[0], want), - UnorderedElementsAreArray(&object_ptrs_[i - want], want)); + UnorderedElementsAreArray(&object_ptrs[i - want], want)); ASSERT_THAT(absl::MakeSpan(&batch[want], kCapacity - want), Each(nullptr)); for (size_t j = 0; j < kCapacity + 1; ++j) { @@ -458,242 +386,697 @@ TEST_P(TcmallocSlabTest, Unit) { // Pop 2x elements in a single batch. for (size_t i = 1; i < kCapacity / 2; ++i) { for (size_t j = 0; j < i; ++j) { - ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow)); + ASSERT_TRUE(slab_.Push(size_class, &objects[j])); } - ASSERT_EQ(slab_.PopBatch(cl, batch, i * 2), i); - ASSERT_EQ(slab_.Length(cpu, cl), 0); - ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]); + ASSERT_EQ(slab_.PopBatch(size_class, batch, i * 2), i); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Pop(size_class), nullptr); ASSERT_THAT(absl::MakeSpan(&batch[0], i), - UnorderedElementsAreArray(&object_ptrs_[0], i)); + UnorderedElementsAreArray(&object_ptrs[0], i)); ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr)); for (size_t j = 0; j < kCapacity + 1; ++j) { batch[j] = nullptr; } } - ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity / 2), kCapacity / 2); + + slab_.Drain(cpu, + [size_class, cpu](int cpu_arg, size_t size_class_arg, + void** batch, size_t size, size_t cap) { + ASSERT_EQ(cpu, cpu_arg); + if (size_class == size_class_arg) { + ASSERT_EQ(size, 0); + ASSERT_EQ(cap, 5); + } else { + ASSERT_EQ(size, 0); + ASSERT_EQ(cap, 0); + } + }); + ASSERT_EQ(slab_.Length(cpu, size_class), 0); + ASSERT_EQ(slab_.Capacity(cpu, size_class), 0); + slab_.UncacheCpuSlab(); } } } -INSTANTIATE_TEST_SUITE_P(Instant, TcmallocSlabTest, - testing::Values(SlabInit::kEager, SlabInit::kLazy)); +void* allocator(size_t bytes, std::align_val_t alignment) { + void* ptr = ::operator new(bytes, alignment); + memset(ptr, 0, bytes); + return ptr; +} -static void StressThread(size_t thread_id, TcmallocSlab* slab, - std::vector* block, - std::vector* mutexes, - std::atomic* capacity, - std::atomic* stop) { - EXPECT_TRUE(IsFast()); +TEST_F(TcmallocSlabTest, ResizeMaxCapacities) { + if (MallocExtension::PerCpuCachesActive()) { + // This test unregisters rseq temporarily, as to decrease flakiness. + GTEST_SKIP() << "per-CPU TCMalloc is incompatible with unregistering rseq"; + } - struct Handler { - static int Overflow(int cpu, size_t cl, void* item) { - EXPECT_GE(cpu, 0); - EXPECT_LT(cpu, absl::base_internal::NumCPUs()); - EXPECT_LT(cl, kStressSlabs); - EXPECT_NE(item, nullptr); - return -1; - } + if (!IsFast()) { + GTEST_SKIP() << "Need fast percpu. Skipping."; + return; + } + constexpr int kCpu = 1; + constexpr int kSizeClassToGrow = 1; + constexpr int kSizeClassToShrink = 2; + ASSERT_LT(kSizeClassToShrink, kStressSlabs); + ASSERT_LT(kSizeClassToGrow, kStressSlabs); + + ScopedFakeCpuId fake_cpu_id(kCpu); + slab_.InitCpu(kCpu, [](size_t size_class) { return kCapacity; }); + { + auto [got_cpu, cached] = slab_.CacheCpuSlab(); + ASSERT_TRUE(cached); + ASSERT_EQ(got_cpu, kCpu); + } - static void* Underflow(int cpu, size_t cl) { - EXPECT_GE(cpu, 0); - EXPECT_LT(cpu, absl::base_internal::NumCPUs()); - EXPECT_LT(cl, kStressSlabs); - return nullptr; - } + size_t max_capacity[kStressSlabs] = {0}; + max_capacity[kSizeClassToShrink] = kCapacity; + max_capacity[kSizeClassToGrow] = kCapacity; + + // Make sure that the slab may grow the available maximum capacity. + EXPECT_EQ(slab_.Grow(kCpu, kSizeClassToGrow, max_capacity[kSizeClassToGrow], + [&](uint8_t) { return max_capacity[kSizeClassToGrow]; }), + max_capacity[kSizeClassToGrow]); + EXPECT_EQ( + slab_.Grow(kCpu, kSizeClassToShrink, max_capacity[kSizeClassToShrink], + [&](uint8_t) { return max_capacity[kSizeClassToShrink]; }), + max_capacity[kSizeClassToShrink]); + + for (int i = 0; i < kCapacity; ++i) { + PerSizeClassMaxCapacity new_max_capacity[2]; + new_max_capacity[0] = PerSizeClassMaxCapacity{ + .size_class = kSizeClassToGrow, + .max_capacity = max_capacity[kSizeClassToGrow] + 1}; + new_max_capacity[1] = PerSizeClassMaxCapacity{ + .size_class = kSizeClassToShrink, + .max_capacity = max_capacity[kSizeClassToShrink] - 1}; + void* slabs = AllocSlabs(allocator, kShift); + const auto [old_slabs, old_slabs_size] = slab_.UpdateMaxCapacities( + slabs, [&](size_t size_class) { return max_capacity[size_class]; }, + [&](int size, uint16_t cap) { max_capacity[size] = cap; }, + [](int cpu) { return cpu == kCpu; }, + [&](int cpu, size_t size_class, void** batch, size_t size, size_t cap) { + EXPECT_EQ(size, 0); + }, + new_max_capacity, + /*classes_to_resize=*/2); + ASSERT_NE(old_slabs, nullptr); + mprotect(old_slabs, old_slabs_size, PROT_READ | PROT_WRITE); + sized_aligned_delete(old_slabs, old_slabs_size, + std::align_val_t{EXEC_PAGESIZE}); + + // Make sure that the capacity is zero as UpdateMaxCapacity should + // initialize slabs. + EXPECT_EQ(slab_.Capacity(kCpu, kSizeClassToGrow), 0); + EXPECT_EQ(slab_.Capacity(kCpu, kSizeClassToShrink), 0); + + // Make sure that the slab may grow the available maximum capacity. + EXPECT_EQ( + slab_.Grow(kCpu, kSizeClassToGrow, max_capacity[kSizeClassToGrow], + [&](uint8_t) { return max_capacity[kSizeClassToGrow]; }), + max_capacity[kSizeClassToGrow]); + EXPECT_EQ( + slab_.Grow(kCpu, kSizeClassToShrink, max_capacity[kSizeClassToShrink], + [&](uint8_t) { return max_capacity[kSizeClassToShrink]; }), + max_capacity[kSizeClassToShrink]); + } + + EXPECT_EQ(max_capacity[kSizeClassToShrink], 0); + EXPECT_EQ(max_capacity[kSizeClassToGrow], 2 * kCapacity); +} + +TEST_F(TcmallocSlabTest, ShrinkEmptyCache) { + if (MallocExtension::PerCpuCachesActive()) { + // This test unregisters rseq temporarily, as to decrease flakiness. + GTEST_SKIP() << "per-CPU TCMalloc is incompatible with unregistering rseq"; + } + + if (!IsFast()) { + GTEST_SKIP() << "Need fast percpu. Skipping."; + return; + } + constexpr int kCpu = 1; + constexpr int kSizeClass = 1; + slab_.InitCpu(kCpu, [](size_t size_class) { return kCapacity; }); + slab_.StopCpu(kCpu); + EXPECT_EQ( + slab_.ShrinkOtherCache(kCpu, kSizeClass, /*len=*/1, + [](size_t size_class, void** batch, size_t n) { + EXPECT_LT(size_class, kStressSlabs); + EXPECT_LE(n, kStressCapacity); + EXPECT_GT(n, 0); + for (size_t i = 0; i < n; ++i) { + EXPECT_NE(batch[i], nullptr); + } + }), + 0); + slab_.StartCpu(kCpu); +} + +TEST_F(TcmallocSlabTest, SimulatedMadviseFailure) { + if (!IsFast()) { + GTEST_SKIP() << "Need fast percpu. Skipping."; + return; + } + + // Initialize a core. + slab_.InitCpu(0, [](size_t size_class) { return kCapacity; }); + + auto trigger_resize = [&](size_t shift) { + // We are deliberately simulating madvise failing, so ignore the return + // value. + auto alloc = [&](size_t size, std::align_val_t alignment) { + return ByteCountingMalloc(size, alignment); + }; + void* slabs = AllocSlabs(alloc, shift); + (void)slab_.ResizeSlabs( + subtle::percpu::ToShiftType(shift), slabs, + [](size_t) { return kCapacity / 2; }, [](int cpu) { return cpu == 0; }, + [&](int cpu, size_t size_class, void** batch, size_t size, size_t cap) { + EXPECT_EQ(size, 0); + EXPECT_EQ(cap, 0); + }); }; + // We need to switch from one size (kShift) to another (kShift - 1) and back. + trigger_resize(kShift - 1); + trigger_resize(kShift); +} + +struct Context { + TcmallocSlab* slab; + std::vector>* blocks; + absl::Span mutexes; + std::atomic* capacity; + std::atomic* stop; + absl::Span init; + absl::Span> has_init; + std::atomic* max_capacity; + + GetMaxCapacity GetMaxCapacityFunctor() const { return {max_capacity}; } +}; + +void InitCpuOnce(Context& ctx, int cpu) { + if (cpu < 0) { + cpu = ctx.slab->CacheCpuSlab().first; + if (cpu < 0) { + return; + } + } + absl::base_internal::LowLevelCallOnce(&ctx.init[cpu], [&]() { + absl::MutexLock lock(&ctx.mutexes[cpu]); + ctx.slab->InitCpu(cpu, ctx.GetMaxCapacityFunctor()); + ctx.has_init[cpu].store(true, std::memory_order_relaxed); + }); +} + +int GetResizedMaxCapacities(Context& ctx, + PerSizeClassMaxCapacity* new_max_capacity) { + std::atomic* max_capacity = ctx.max_capacity; + absl::BitGen rnd; + size_t to_shrink = absl::Uniform(rnd, 0, kStressSlabs); + size_t to_grow = absl::Uniform(rnd, 0, kStressSlabs); + if (to_shrink == to_grow || max_capacity[to_shrink] == 0 || + max_capacity[to_grow].load(std::memory_order_relaxed) == + kMaxStressCapacity - 1) + return 0; + new_max_capacity[0] = PerSizeClassMaxCapacity{ + .size_class = to_shrink, + .max_capacity = + max_capacity[to_shrink].load(std::memory_order_relaxed) - 1}; + new_max_capacity[1] = PerSizeClassMaxCapacity{ + .size_class = to_grow, + .max_capacity = + max_capacity[to_grow].load(std::memory_order_relaxed) + 1}; + return 2; +} + +// TODO(b/213923453): move to an environment style of test, as in +// FakeTransferCacheEnvironment. +void StressThread(size_t thread_id, + Context& ctx) ABSL_NO_THREAD_SAFETY_ANALYSIS { + EXPECT_TRUE(IsFast()); + + std::vector& block = (*ctx.blocks)[thread_id]; + + const int num_cpus = NumCPUs(); absl::BitGen rnd(absl::SeedSeq({thread_id})); - while (!*stop) { - size_t cl = absl::Uniform(rnd, 0, kStressSlabs); + while (!*ctx.stop) { + size_t size_class = absl::Uniform(rnd, 1, kStressSlabs); const int what = absl::Uniform(rnd, 0, 91); if (what < 10) { - if (!block->empty()) { - if (slab->Push(cl, block->back(), &Handler::Overflow)) { - block->pop_back(); + if (!block.empty()) { + if (ctx.slab->Push(size_class, block.back())) { + block.pop_back(); + } else { + InitCpuOnce(ctx, -1); } } } else if (what < 20) { - if (void* item = slab->Pop(cl, &Handler::Underflow)) { - block->push_back(item); + if (void* item = ctx.slab->Pop(size_class)) { + // Ensure that we never return a null item which could be indicative + // of a bug in lazy InitCpu initialization (b/148973091, b/147974701). + EXPECT_NE(item, nullptr); + block.push_back(item); + } else { + InitCpuOnce(ctx, -1); } } else if (what < 30) { - if (!block->empty()) { + if (!block.empty()) { void* batch[kStressCapacity]; size_t n = absl::Uniform( - rnd, 0, std::min(block->size(), kStressCapacity)) + + rnd, 0, std::min(block.size(), kStressCapacity)) + 1; for (size_t i = 0; i < n; ++i) { - batch[i] = block->back(); - block->pop_back(); + batch[i] = block.back(); + block.pop_back(); } - size_t pushed = slab->PushBatch(cl, batch, n); + size_t pushed = ctx.slab->PushBatch(size_class, batch, n); EXPECT_LE(pushed, n); for (size_t i = 0; i < n - pushed; ++i) { - block->push_back(batch[i]); + block.push_back(batch[i]); } } } else if (what < 40) { void* batch[kStressCapacity]; size_t n = absl::Uniform(rnd, 0, kStressCapacity) + 1; - size_t popped = slab->PopBatch(cl, batch, n); + size_t popped = ctx.slab->PopBatch(size_class, batch, n); EXPECT_LE(popped, n); for (size_t i = 0; i < popped; ++i) { - block->push_back(batch[i]); + block.push_back(batch[i]); } } else if (what < 50) { size_t n = absl::Uniform(rnd, 0, kStressCapacity) + 1; for (;;) { - size_t c = capacity->load(); + size_t c = ctx.capacity->load(); n = std::min(n, c); if (n == 0) { break; } - if (capacity->compare_exchange_weak(c, c - n)) { + if (ctx.capacity->compare_exchange_weak(c, c - n)) { break; } } + size_t res = 0; if (n != 0) { - size_t res = slab->Grow(slab->GetCurrentVirtualCpuUnsafe(), cl, n, - kStressCapacity); - EXPECT_LE(res, n); - capacity->fetch_add(n - res); + const int cpu = ctx.slab->CacheCpuSlab().first; + if (cpu >= 0) { + // Grow mutates the header array and must be operating on + // an initialized core. + InitCpuOnce(ctx, cpu); + + res = ctx.slab->Grow(cpu, size_class, n, [&](uint8_t shift) { + return ctx.GetMaxCapacityFunctor()(size_class); + }); + EXPECT_LE(res, n); + } + ctx.capacity->fetch_add(n - res); } } else if (what < 60) { - size_t n = - slab->Shrink(slab->GetCurrentVirtualCpuUnsafe(), cl, - absl::Uniform(rnd, 0, kStressCapacity) + 1); - capacity->fetch_add(n); + int cpu = absl::Uniform(rnd, 0, num_cpus); + absl::MutexLock lock(&ctx.mutexes[cpu]); + size_t len = ctx.slab->Length(cpu, size_class); + EXPECT_LE(len, kMaxStressCapacity); + size_t cap = ctx.slab->Capacity(cpu, size_class); + EXPECT_LE(cap, kMaxStressCapacity); + EXPECT_LE(len, cap); } else if (what < 70) { - size_t len = slab->Length( - absl::Uniform(rnd, 0, absl::base_internal::NumCPUs()), cl); - EXPECT_LE(len, kStressCapacity); + int cpu = absl::Uniform(rnd, 0, num_cpus); + + // ShrinkOtherCache mutates the header array and must be operating on an + // initialized core. + InitCpuOnce(ctx, cpu); + + absl::MutexLock lock(&ctx.mutexes[cpu]); + size_t to_shrink = absl::Uniform(rnd, 0, kStressCapacity) + 1; + ctx.slab->StopCpu(cpu); + size_t total_shrunk = ctx.slab->ShrinkOtherCache( + cpu, size_class, to_shrink, + [&block](size_t size_class, void** batch, size_t n) { + EXPECT_LT(size_class, kStressSlabs); + EXPECT_LE(n, kStressCapacity); + EXPECT_GT(n, 0); + for (size_t i = 0; i < n; ++i) { + EXPECT_NE(batch[i], nullptr); + block.push_back(batch[i]); + } + }); + ctx.slab->StartCpu(cpu); + EXPECT_LE(total_shrunk, to_shrink); + EXPECT_LE(0, total_shrunk); + ctx.capacity->fetch_add(total_shrunk); } else if (what < 80) { - size_t cap = slab->Capacity( - absl::Uniform(rnd, 0, absl::base_internal::NumCPUs()), cl); - EXPECT_LE(cap, kStressCapacity); - } else if (what < 90) { - struct Context { - std::vector* block; - std::atomic* capacity; - }; - Context ctx = {block, capacity}; - int cpu = absl::Uniform(rnd, 0, absl::base_internal::NumCPUs()); - if (mutexes->at(cpu).TryLock()) { - size_t to_shrink = absl::Uniform(rnd, 0, kStressCapacity) + 1; - size_t total_shrunk = slab->ShrinkOtherCache( - cpu, cl, to_shrink, &ctx, - [](void* arg, size_t cl, void** batch, size_t n) { - Context* ctx = static_cast(arg); - EXPECT_LT(cl, kStressSlabs); - EXPECT_LE(n, kStressCapacity); - for (size_t i = 0; i < n; ++i) { - EXPECT_NE(batch[i], nullptr); - ctx->block->push_back(batch[i]); - } - }); - EXPECT_LE(total_shrunk, to_shrink); - EXPECT_LE(0, total_shrunk); - capacity->fetch_add(total_shrunk); - mutexes->at(cpu).Unlock(); + size_t to_grow = absl::Uniform(rnd, 0, kStressCapacity) + 1; + for (;;) { + size_t c = ctx.capacity->load(); + to_grow = std::min(to_grow, c); + if (to_grow == 0) { + break; + } + if (ctx.capacity->compare_exchange_weak(c, c - to_grow)) { + break; + } + } + if (to_grow != 0) { + int cpu = absl::Uniform(rnd, 0, num_cpus); + + // GrowOtherCache mutates the header array and must be operating on an + // initialized core. + InitCpuOnce(ctx, cpu); + + absl::MutexLock lock(&ctx.mutexes[cpu]); + ctx.slab->StopCpu(cpu); + size_t grown = ctx.slab->GrowOtherCache( + cpu, size_class, to_grow, + [&](uint8_t) { return ctx.GetMaxCapacityFunctor()(size_class); }); + ctx.slab->StartCpu(cpu); + EXPECT_LE(grown, to_grow); + EXPECT_GE(grown, 0); + ctx.capacity->fetch_add(to_grow - grown); } } else { - struct Context { - std::vector* block; - std::atomic* capacity; - }; - Context ctx = {block, capacity}; - int cpu = absl::Uniform(rnd, 0, absl::base_internal::NumCPUs()); - if (mutexes->at(cpu).TryLock()) { - slab->Drain( - cpu, &ctx, - [](void* arg, size_t cl, void** batch, size_t n, size_t cap) { - Context* ctx = static_cast(arg); - EXPECT_LT(cl, kStressSlabs); - EXPECT_LE(n, kStressCapacity); - EXPECT_LE(cap, kStressCapacity); - for (size_t i = 0; i < n; ++i) { + int cpu = absl::Uniform(rnd, 0, num_cpus); + // Flip coin on whether to unregister rseq on this thread. + const bool unregister = absl::Bernoulli(rnd, 0.5); + + // Drain mutates the header array and must be operating on an initialized + // core. + InitCpuOnce(ctx, cpu); + + { + absl::MutexLock lock(&ctx.mutexes[cpu]); + std::optional scoped_rseq; + if (unregister) { + scoped_rseq.emplace(); + TC_ASSERT(!IsFastNoInit()); + } + + ctx.slab->Drain( + cpu, [&block, &ctx, cpu](int cpu_arg, size_t size_class, + void** batch, size_t size, size_t cap) { + EXPECT_EQ(cpu, cpu_arg); + EXPECT_LT(size_class, kStressSlabs); + EXPECT_LE(size, kMaxStressCapacity); + EXPECT_LE(cap, kMaxStressCapacity); + for (size_t i = 0; i < size; ++i) { EXPECT_NE(batch[i], nullptr); - ctx->block->push_back(batch[i]); + block.push_back(batch[i]); } - ctx->capacity->fetch_add(cap); + ctx.capacity->fetch_add(cap); }); - mutexes->at(cpu).Unlock(); } + + // Verify we re-registered with rseq as required. + TC_ASSERT(IsFastNoInit()); } } } -static void* allocator(size_t bytes) { - void* ptr = malloc(bytes); - if (ptr) { - memset(ptr, 0, bytes); +void ResizeMaxCapacitiesThread( + Context& ctx, TcmallocSlab::DrainHandler drain_handler, + absl::Span> old_slabs_span) + ABSL_NO_THREAD_SAFETY_ANALYSIS { + absl::BitGen rnd; + const size_t num_cpus = NumCPUs(); + + while (!*ctx.stop) { + for (size_t cpu = 0; cpu < num_cpus; ++cpu) ctx.mutexes[cpu].Lock(); + PerSizeClassMaxCapacity new_max_capacity[2]; + int to_resize = GetResizedMaxCapacities(ctx, new_max_capacity); + size_t old_slabs_idx = 0; + + uint8_t shift = ctx.slab->GetShift(); + void* slabs = AllocSlabs(allocator, shift); + const auto [old_slabs, old_slabs_size] = ctx.slab->UpdateMaxCapacities( + slabs, ctx.GetMaxCapacityFunctor(), + [&](int size, uint16_t cap) { + ctx.max_capacity[size].store(cap, std::memory_order_relaxed); + }, + [&](size_t cpu) { + return ctx.has_init[cpu].load(std::memory_order_relaxed); + }, + drain_handler, new_max_capacity, to_resize); + for (size_t cpu = 0; cpu < num_cpus; ++cpu) ctx.mutexes[cpu].Unlock(); + ASSERT_NE(old_slabs, nullptr); + // We sometimes don't madvise away the old slabs in order to simulate + // madvise failing. + const bool simulate_madvise_failure = absl::Bernoulli(rnd, 0.1); + if (!simulate_madvise_failure) { + // Verify that we do not write to an old slab, as this may indicate a bug. + mprotect(old_slabs, old_slabs_size, PROT_READ); + // It's important that we do this here in order to uncover any potential + // correctness issues due to madvising away the old slabs. + // TODO(b/214241843): we should be able to just do one MADV_DONTNEED once + // the kernel enables huge zero pages. + madvise(old_slabs, old_slabs_size, MADV_NOHUGEPAGE); + madvise(old_slabs, old_slabs_size, MADV_DONTNEED); + + // Verify that old_slabs is now non-resident. + const int fd = signal_safe_open("/proc/self/pageflags", O_RDONLY); + if (fd < 0) continue; + + // /proc/self/pageflags is an array. Each entry is a bitvector of size 64. + // To index the array, divide the virtual address by the pagesize. The + // 64b word has bit fields set. + const uintptr_t start_addr = reinterpret_cast(old_slabs); + constexpr size_t kPhysicalPageSize = EXEC_PAGESIZE; + for (uintptr_t addr = start_addr; addr < start_addr + old_slabs_size; + addr += kPhysicalPageSize) { + ASSERT_EQ(addr % kPhysicalPageSize, 0); + // Offset in /proc/self/pageflags. + const off64_t offset = addr / kPhysicalPageSize * 8; + uint64_t entry = 0; + // Ignore false-positive warning in GCC. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattribute-warning" +#endif + const int64_t bytes_read = pread(fd, &entry, sizeof(entry), offset); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + ASSERT_EQ(bytes_read, sizeof(entry)); + constexpr uint64_t kExpectedBits = + (uint64_t{1} << KPF_ZERO_PAGE) | (uint64_t{1} << KPF_NOPAGE); + ASSERT_NE(entry & kExpectedBits, 0) + << entry << " " << addr << " " << start_addr; + } + signal_safe_close(fd); + } + + // Delete the old slab from 100 iterations ago. + if (old_slabs_span[old_slabs_idx].first != nullptr) { + auto [old_slabs, old_slabs_size] = old_slabs_span[old_slabs_idx]; + + mprotect(old_slabs, old_slabs_size, PROT_READ | PROT_WRITE); + sized_aligned_delete(old_slabs, old_slabs_size, + std::align_val_t{EXEC_PAGESIZE}); + } + old_slabs_span[old_slabs_idx] = {old_slabs, old_slabs_size}; + if (++old_slabs_idx == old_slabs_span.size()) old_slabs_idx = 0; + } +} + +constexpr size_t kResizeInitialShift = 14; +constexpr size_t kResizeMaxShift = 18; + +void ResizeSlabsThread(Context& ctx, TcmallocSlab::DrainHandler drain_handler, + absl::Span> old_slabs_span) + ABSL_NO_THREAD_SAFETY_ANALYSIS { + absl::BitGen rnd; + const size_t num_cpus = NumCPUs(); + size_t shift = kResizeInitialShift; + size_t old_slabs_idx = 0; + for (int i = 0; i < 10; ++i) { + if (shift == kResizeInitialShift) { + ++shift; + } else if (shift == kResizeMaxShift) { + --shift; + } else { + const bool grow = absl::Bernoulli(rnd, 0.5); + if (grow) { + ++shift; + } else { + --shift; + } + } + for (size_t cpu = 0; cpu < num_cpus; ++cpu) ctx.mutexes[cpu].Lock(); + void* slabs = AllocSlabs(allocator, shift); + const auto [old_slabs, old_slabs_size] = ctx.slab->ResizeSlabs( + ToShiftType(shift), slabs, ctx.GetMaxCapacityFunctor(), + [&](size_t cpu) { + return ctx.has_init[cpu].load(std::memory_order_relaxed); + }, + drain_handler); + for (size_t cpu = 0; cpu < num_cpus; ++cpu) ctx.mutexes[cpu].Unlock(); + ASSERT_NE(old_slabs, nullptr); + // We sometimes don't madvise away the old slabs in order to simulate + // madvise failing. + const bool simulate_madvise_failure = absl::Bernoulli(rnd, 0.1); + if (!simulate_madvise_failure) { + // Verify that we do not write to an old slab, as this may indicate a bug. + mprotect(old_slabs, old_slabs_size, PROT_READ); + // It's important that we do this here in order to uncover any potential + // correctness issues due to madvising away the old slabs. + // TODO(b/214241843): we should be able to just do one MADV_DONTNEED once + // the kernel enables huge zero pages. + madvise(old_slabs, old_slabs_size, MADV_NOHUGEPAGE); + madvise(old_slabs, old_slabs_size, MADV_DONTNEED); + + // Verify that old_slabs is now non-resident. + const int fd = signal_safe_open("/proc/self/pageflags", O_RDONLY); + if (fd < 0) continue; + + // /proc/self/pageflags is an array. Each entry is a bitvector of size 64. + // To index the array, divide the virtual address by the pagesize. The + // 64b word has bit fields set. + const uintptr_t start_addr = reinterpret_cast(old_slabs); + constexpr size_t kPhysicalPageSize = EXEC_PAGESIZE; + for (uintptr_t addr = start_addr; addr < start_addr + old_slabs_size; + addr += kPhysicalPageSize) { + ASSERT_EQ(addr % kPhysicalPageSize, 0); + // Offset in /proc/self/pageflags. + const off64_t offset = addr / kPhysicalPageSize * 8; + uint64_t entry = 0; +// Ignore false-positive warning in GCC. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattribute-warning" +#endif + const int64_t bytes_read = pread(fd, &entry, sizeof(entry), offset); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + ASSERT_EQ(bytes_read, sizeof(entry)); + constexpr uint64_t kExpectedBits = + (uint64_t{1} << KPF_ZERO_PAGE) | (uint64_t{1} << KPF_NOPAGE); + ASSERT_NE(entry & kExpectedBits, 0) + << entry << " " << addr << " " << start_addr; + } + signal_safe_close(fd); + } + + // Delete the old slab from 100 iterations ago. + if (old_slabs_span[old_slabs_idx].first != nullptr) { + auto [old_slabs, old_slabs_size] = old_slabs_span[old_slabs_idx]; + + mprotect(old_slabs, old_slabs_size, PROT_READ | PROT_WRITE); + sized_aligned_delete(old_slabs, old_slabs_size, + std::align_val_t{EXEC_PAGESIZE}); + } + old_slabs_span[old_slabs_idx] = {old_slabs, old_slabs_size}; + if (++old_slabs_idx == old_slabs_span.size()) old_slabs_idx = 0; } - return ptr; } -TEST(TcmallocSlab, Stress) { +class StressThreadTest : public testing::TestWithParam> { +}; + +TEST_P(StressThreadTest, Stress) { // The test creates 2 * NumCPUs() threads each executing all possible - // operations on TcmallocSlab. After that we verify that no objects - // lost/duplicated and that total capacity is preserved. + // operations on TcmallocSlab. Depending on the test param, we may grow the + // slabs a few times while stress threads are running. After that we verify + // that no objects lost/duplicated and that total capacity is preserved. if (!IsFast()) { GTEST_SKIP() << "Need fast percpu. Skipping."; return; } - EXPECT_LE(kStressSlabs, kStressSlabs); + const bool resize = std::get<0>(GetParam()); + const bool pin_cpu = std::get<1>(GetParam()); + TcmallocSlab slab; - slab.Init( - allocator, - [](size_t cl) { return cl < kStressSlabs ? kStressCapacity : 0; }, false, - kShift); + size_t shift = resize ? kResizeInitialShift : kShift; std::vector threads; - const int n_threads = 2 * absl::base_internal::NumCPUs(); + const size_t num_cpus = NumCPUs(); + const size_t n_stress_threads = 2 * num_cpus; + const size_t n_threads = n_stress_threads + resize; + std::atomic max_capacity[kStressSlabs]; + + for (size_t size_class = 0; size_class < kStressSlabs; ++size_class) { + max_capacity[size_class].store(kStressCapacity, std::memory_order_relaxed); + } + + // once_flag's protect InitCpu on a CPU. + std::vector init(num_cpus); + // Tracks whether init has occurred on a CPU for use in ResizeSlabs. + std::vector> has_init(num_cpus); // Mutexes protect Drain operation on a CPU. - std::vector mutexes(absl::base_internal::NumCPUs()); + std::vector mutexes(num_cpus); // Give each thread an initial set of local objects. - std::vector> blocks(n_threads); + std::vector> blocks(n_stress_threads); for (size_t i = 0; i < blocks.size(); ++i) { for (size_t j = 0; j < kStressCapacity; ++j) { - blocks[i].push_back(reinterpret_cast(i * kStressCapacity + j + 1)); + blocks[i].push_back(reinterpret_cast( + (i * kStressCapacity + j + 1) * sizeof(void*))); } } std::atomic stop(false); // Total capacity shared between all size classes and all CPUs. - const int kTotalCapacity = blocks.size() * kStressCapacity * 3 / 4; + const size_t kTotalCapacity = blocks.size() * kStressCapacity * 3 / 4; std::atomic capacity(kTotalCapacity); - // Create threads and let them work for 5 seconds. + Context ctx = {&slab, + &blocks, + absl::MakeSpan(mutexes), + &capacity, + &stop, + absl::MakeSpan(init), + absl::MakeSpan(has_init), + &max_capacity[0]}; + InitSlab(slab, allocator, ctx.GetMaxCapacityFunctor(), shift); + // Create threads and let them work for 5 seconds while we may or not also be + // resizing the slab. threads.reserve(n_threads); - for (size_t t = 0; t < n_threads; ++t) { - threads.push_back(std::thread(StressThread, t, &slab, &blocks[t], &mutexes, - &capacity, &stop)); + for (size_t t = 0; t < n_stress_threads; ++t) { + threads.push_back(std::thread(StressThread, t, std::ref(ctx))); + } + // Collect objects and capacity from all slabs in Drain in ResizeSlabs. + absl::flat_hash_set objects; + const auto drain_handler = [&objects, &ctx](int cpu, size_t size_class, + void** batch, size_t size, + size_t cap) { + for (size_t i = 0; i < size; ++i) { + objects.insert(batch[i]); + } + ctx.capacity->fetch_add(cap); + }; + + std::array, 100> max_cap_slabs_array{}; + threads.push_back(std::thread(ResizeMaxCapacitiesThread, std::ref(ctx), + std::ref(drain_handler), + absl::MakeSpan(max_cap_slabs_array))); + + // Keep track of old slabs so we can free the memory. We technically could + // have a sleeping StressThread access any of the old slabs, but it's very + // inefficient to keep all the old slabs around so we just keep 100. + std::array, 100> old_slabs_arr{}; + if (resize) { + threads.push_back(std::thread(ResizeSlabsThread, std::ref(ctx), + std::ref(drain_handler), + absl::MakeSpan(old_slabs_arr))); + } + if (pin_cpu) { + // Regression test for a livelock when a thread keeps running on cpu 0. + absl::SleepFor(absl::Seconds(1)); + CpuSet cpus; + cpus.Zero(); + cpus.Set(0); + (void)cpus.SetAffinity(0); + absl::SleepFor(absl::Seconds(1)); + } else { + absl::SleepFor(absl::Seconds(5)); } - absl::SleepFor(absl::Seconds(5)); stop = true; for (auto& t : threads) { t.join(); } - // Collect objects and capacity from all slabs. - std::set objects; - struct Context { - std::set* objects; - std::atomic* capacity; - }; - Context ctx = {&objects, &capacity}; - for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { - slab.Drain(cpu, &ctx, - [](void* arg, size_t cl, void** batch, size_t n, size_t cap) { - Context* ctx = static_cast(arg); - for (size_t i = 0; i < n; ++i) { - ctx->objects->insert(batch[i]); - } - ctx->capacity->fetch_add(cap); - }); - for (size_t cl = 0; cl < kStressSlabs; ++cl) { - EXPECT_EQ(slab.Length(cpu, cl), 0); - EXPECT_EQ(slab.Capacity(cpu, cl), 0); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + slab.Drain(cpu, drain_handler); + for (size_t size_class = 1; size_class < kStressSlabs; ++size_class) { + EXPECT_EQ(slab.Length(cpu, size_class), 0); + EXPECT_EQ(slab.Capacity(cpu, size_class), 0); } } for (const auto& b : blocks) { @@ -703,16 +1086,39 @@ TEST(TcmallocSlab, Stress) { } EXPECT_EQ(objects.size(), blocks.size() * kStressCapacity); EXPECT_EQ(capacity.load(), kTotalCapacity); - slab.Destroy(free); + void* deleted_slabs = slab.Destroy(sized_aligned_delete); + + for (const auto& [old_slabs, old_slabs_size] : max_cap_slabs_array) { + if (old_slabs == nullptr || old_slabs == deleted_slabs) continue; + + mprotect(old_slabs, old_slabs_size, PROT_READ | PROT_WRITE); + sized_aligned_delete(old_slabs, old_slabs_size, + std::align_val_t{EXEC_PAGESIZE}); + } + + for (const auto& [old_slabs, old_slabs_size] : old_slabs_arr) { + if (old_slabs == nullptr || old_slabs == deleted_slabs) continue; + + mprotect(old_slabs, old_slabs_size, PROT_READ | PROT_WRITE); + sized_aligned_delete(old_slabs, old_slabs_size, + std::align_val_t{EXEC_PAGESIZE}); + } } +INSTANTIATE_TEST_SUITE_P( + Group, StressThreadTest, testing::Combine(testing::Bool(), testing::Bool()), + [](const testing::TestParamInfo info) { + return std::string(std::get<0>(info.param) ? "" : "No") + "Resize_" + + (std::get<1>(info.param) ? "" : "No") + "Pin"; + }); + TEST(TcmallocSlab, SMP) { // For the other tests here to be meaningful, we need multiple cores. - ASSERT_GT(absl::base_internal::NumCPUs(), 1); + ASSERT_GT(NumCPUs(), 1); } #if ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE -static int FilterElfHeader(struct dl_phdr_info* info, size_t size, void* data) { +int FilterElfHeader(struct dl_phdr_info* info, size_t size, void* data) { *reinterpret_cast(data) = reinterpret_cast(info->dlpi_addr); // No further iteration wanted. @@ -726,6 +1132,10 @@ TEST(TcmallocSlab, CriticalSectionMetadata) { GTEST_SKIP() << "--gc-sections cannot be inhibited on this compiler."; #endif +#if !TCMALLOC_INTERNAL_PERCPU_USE_RSEQ + GTEST_SKIP() << "rseq is not enabled in this build."; +#endif + // We expect that restartable sequence critical sections (rseq_cs) are in the // __rseq_cs section (by convention, not hard requirement). Additionally, for // each entry in that section, there should be a pointer to it in @@ -794,57 +1204,78 @@ TEST(TcmallocSlab, CriticalSectionMetadata) { #endif } -static void BM_PushPop(benchmark::State& state) { - CHECK_CONDITION(IsFast()); - RunOnSingleCpu([&](int this_cpu) { - const int kBatchSize = 32; - TcmallocSlab slab; +void BM_PushPop(benchmark::State& state) { + TC_CHECK(IsFast()); + constexpr int kCpu = 0; + constexpr size_t kSizeClass = 0; + // Fake being on the given CPU. This allows Grow to succeed for + // kCpu/kSizeClass, and then we Push/Pop repeatedly on kCpu/kSizeClass. + // Note that no other thread has access to `slab` so we don't need to worry + // about races. + ScopedFakeCpuId fake_cpu_id(kCpu); + constexpr int kBatchSize = 32; + TcmallocSlab slab; #pragma GCC diagnostic ignored "-Wnonnull" - slab.Init( - allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, - kShift); - - CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) == - kBatchSize); - void* batch[kBatchSize]; - for (int i = 0; i < kBatchSize; i++) { - batch[i] = &batch[i]; + const auto get_capacity = [](size_t size_class) -> size_t { + return kBatchSize; + }; + InitSlab(slab, allocator, get_capacity, kShift); + for (int cpu = 0, n = NumCPUs(); cpu < n; ++cpu) { + slab.InitCpu(cpu, get_capacity); + } + auto [cpu, _] = slab.CacheCpuSlab(); + TC_CHECK_EQ(cpu, kCpu); + + TC_CHECK_EQ(slab.Grow(kCpu, kSizeClass, kBatchSize, + [](uint8_t shift) { return kBatchSize; }), + kBatchSize); + void* batch[kBatchSize]; + for (int i = 0; i < kBatchSize; i++) { + batch[i] = &batch[i]; + } + for (auto _ : state) { + for (size_t x = 0; x < kBatchSize; x++) { + TC_CHECK(slab.Push(kSizeClass, batch[x])); } - for (auto _ : state) { - for (size_t x = 0; x < kBatchSize; x++) { - CHECK_CONDITION(slab.Push(0, batch[x], ExpectNoOverflow)); - } - for (size_t x = 0; x < kBatchSize; x++) { - CHECK_CONDITION(slab.Pop(0, ExpectNoUnderflow) == - batch[kBatchSize - x - 1]); - } + for (size_t x = 0; x < kBatchSize; x++) { + TC_CHECK(slab.Pop(kSizeClass) == batch[kBatchSize - x - 1]); } - return true; - }); + } } BENCHMARK(BM_PushPop); -static void BM_PushPopBatch(benchmark::State& state) { - CHECK_CONDITION(IsFast()); - RunOnSingleCpu([&](int this_cpu) { - const int kBatchSize = 32; - TcmallocSlab slab; - slab.Init( - allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, - kShift); - CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) == - kBatchSize); - void* batch[kBatchSize]; - for (int i = 0; i < kBatchSize; i++) { - batch[i] = &batch[i]; - } - for (auto _ : state) { - CHECK_CONDITION(slab.PushBatch(0, batch, kBatchSize) == kBatchSize); - CHECK_CONDITION(slab.PopBatch(0, batch, kBatchSize) == kBatchSize); - } - return true; - }); +void BM_PushPopBatch(benchmark::State& state) { + TC_CHECK(IsFast()); + constexpr int kCpu = 0; + constexpr size_t kSizeClass = 0; + // Fake being on the given CPU. This allows Grow to succeed for + // kCpu/kSizeClass, and then we Push/PopBatch repeatedly on kCpu/kSizeClass. + // Note that no other thread has access to `slab` so we don't need to worry + // about races. + ScopedFakeCpuId fake_cpu_id(kCpu); + constexpr int kBatchSize = 32; + TcmallocSlab slab; + const auto get_capacity = [](size_t size_class) -> size_t { + return kBatchSize; + }; + InitSlab(slab, allocator, get_capacity, kShift); + for (int cpu = 0, n = NumCPUs(); cpu < n; ++cpu) { + slab.InitCpu(cpu, get_capacity); + } + auto [cpu, _] = slab.CacheCpuSlab(); + TC_CHECK_EQ(cpu, kCpu); + TC_CHECK_EQ(slab.Grow(kCpu, kSizeClass, kBatchSize, + [](uint8_t shift) { return kBatchSize; }), + kBatchSize); + void* batch[kBatchSize]; + for (int i = 0; i < kBatchSize; i++) { + batch[i] = &batch[i]; + } + for (auto _ : state) { + TC_CHECK_EQ(slab.PushBatch(kSizeClass, batch, kBatchSize), kBatchSize); + TC_CHECK_EQ(slab.PopBatch(kSizeClass, batch, kBatchSize), kBatchSize); + } } BENCHMARK(BM_PushPopBatch); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_test.cc new file mode 100644 index 000000000000..64488e7b43be --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_test.cc @@ -0,0 +1,74 @@ +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/percpu.h" + +#include + +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/log/absl_check.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc::tcmalloc_internal::subtle::percpu { +namespace { + +ABSL_CONST_INIT std::atomic alarms{0}; + +void sa_alrm(int sig) { + alarms.fetch_add(1, std::memory_order_relaxed); + TC_CHECK(IsFast()); +} + +TEST(PerCpu, SignalHandling) { + if (!IsFast()) { + GTEST_SKIP() << "per-CPU unavailable"; + } + + struct sigaction sig; + memset(&sig, 0, sizeof(sig)); // sa_flags == 0 => SA_RESTART not set + sig.sa_handler = sa_alrm; + ABSL_CHECK_EQ(sigaction(SIGALRM, &sig, nullptr), + 0); // install signal handler + + constexpr absl::Duration interval = absl::Microseconds(1); + struct timeval timeval = absl::ToTimeval(interval); + + struct itimerval signal_interval; + signal_interval.it_value = timeval; + signal_interval.it_interval = timeval; + + setitimer(ITIMER_REAL, &signal_interval, nullptr); + + for (int i = 0; i < 100000; ++i) { + UnregisterRseq(); + TC_CHECK(IsFast()); + } + + timeval = absl::ToTimeval(absl::ZeroDuration()); + signal_interval.it_value = timeval; + signal_interval.it_interval = timeval; + + setitimer(ITIMER_REAL, &signal_interval, nullptr); + + EXPECT_GT(alarms.load(std::memory_order_relaxed), 0); +} + +} // namespace +} // namespace tcmalloc::tcmalloc_internal::subtle::percpu diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/prefetch.h b/contrib/libs/tcmalloc/tcmalloc/internal/prefetch.h new file mode 100644 index 000000000000..47aaa6dd0799 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/prefetch.h @@ -0,0 +1,116 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_PREFETCH_H_ +#define TCMALLOC_INTERNAL_PREFETCH_H_ + +#include + +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Move data into the cache before it is read, or "prefetch" it. +// +// The value of `addr` is the address of the memory to prefetch. If +// the target and compiler support it, data prefetch instructions are +// generated. If the prefetch is done some time before the memory is +// read, it may be in the cache by the time the read occurs. +// +// The function names specify the temporal locality heuristic applied, +// using the names of Intel prefetch instructions: +// +// T0 - high degree of temporal locality; data should be left in as +// many levels of the cache possible +// T1 - moderate degree of temporal locality +// T2 - low degree of temporal locality +// Nta - no temporal locality, data need not be left in the cache +// after the read +// W - prefetch data in preparation for a write; may prefetch data +// to the local CPU and invalidate other cached copies +// +// Incorrect or gratuitous use of these functions can degrade +// performance, so use them only when representative benchmarks show +// an improvement. +// +// Example usage: +// +// tcmalloc_internal::PrefetchT0(addr); +// +#if defined(__GNUC__) +// See __builtin_prefetch: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. +inline void PrefetchT0(const void* addr) { __builtin_prefetch(addr, 0, 3); } +inline void PrefetchT1(const void* addr) { __builtin_prefetch(addr, 0, 2); } +inline void PrefetchT2(const void* addr) { __builtin_prefetch(addr, 0, 1); } +inline void PrefetchNta(const void* addr) { __builtin_prefetch(addr, 0, 0); } +// Wrappers for prefetch with intent to modify. +// [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., /*rw=*/1) +// unless -march=broadwell or newer; this is not generally the default, so +// manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel +// processors and has been present on AMD processors since the K6-2. +inline void PrefetchW(const void* addr) { +#if defined(__x86_64__) && !defined(__PRFCHW__) + asm("prefetchw %0" : : "m"(*static_cast(addr))); +#else + __builtin_prefetch(addr, /*rw=*/1, 0); +#endif +} +inline void PrefetchWT0(const void* addr) { +#if defined(__x86_64__) && !defined(__PRFCHW__) + asm("prefetchw %0" : : "m"(*static_cast(addr))); +#else + __builtin_prefetch(addr, 1, 3); +#endif +} +inline void PrefetchWT1(const void* addr) { +#if defined(__x86_64__) && !defined(__PRFCHW__) + asm("prefetchw %0" : : "m"(*static_cast(addr))); +#else + __builtin_prefetch(addr, 1, 2); +#endif +} +inline void PrefetchWT2(const void* addr) { +#if defined(__x86_64__) && !defined(__PRFCHW__) + asm("prefetchw %0" : : "m"(*static_cast(addr))); +#else + __builtin_prefetch(addr, 1, 1); +#endif +} +inline void PrefetchWNta(const void* addr) { +#if defined(__x86_64__) && !defined(__PRFCHW__) + asm("prefetchw %0" : : "m"(*static_cast(addr))); +#else + __builtin_prefetch(addr, 1, 0); +#endif +} +#else +inline void PrefetchT0(const void*) {} +inline void PrefetchT1(const void*) {} +inline void PrefetchT2(const void*) {} +inline void PrefetchNta(const void*) {} +inline void PrefetchWT0(const void*) {} +inline void PrefetchWT1(const void*) {} +inline void PrefetchWT2(const void*) {} +inline void PrefetchWT3(const void*) {} +#endif + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_PREFETCH_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/prefetch_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/prefetch_test.cc new file mode 100644 index 000000000000..2ab36a460d9f --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/prefetch_test.cc @@ -0,0 +1,72 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/prefetch.h" + +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +int number = 42; + +TEST(Prefetch, TemporalLocalityNone) { + PrefetchNta(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, TemporalLocalityLow) { + PrefetchT2(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, TemporalLocalityMedium) { + PrefetchT1(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, TemporalLocalityHigh) { + PrefetchT0(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, PrefetchForWrite) { + PrefetchW(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, WriteTemporalLocalityNone) { + PrefetchWNta(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, WriteTemporalLocalityLow) { + PrefetchWT2(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, WriteTemporalLocalityMedium) { + PrefetchWT1(&number); + EXPECT_EQ(number, 42); +} + +TEST(Prefetch, WriteTemporalLocalityHigh) { + PrefetchWT0(&number); + EXPECT_EQ(number, 42); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc index 5a5586cfff83..b2af93cec788 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc @@ -18,10 +18,12 @@ #include #include +#include #include #include #include "absl/strings/str_format.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/util.h" @@ -29,26 +31,12 @@ GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { -ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, nullptr); } - ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer* buffer) { - Init(pid, buffer); -} - -void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) { if (pid == 0) { pid = getpid(); } pid_ = pid; - if (!buffer) { - // If the user didn't pass in any buffer storage, allocate it - // now. This is the normal case; the signal handler passes in a - // static buffer. - buffer = dynamic_buffer_ = new Buffer; - } else { - dynamic_buffer_ = nullptr; - } ibuf_ = buffer->buf; @@ -64,7 +52,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) { // Use the main thread's "local" view to ensure adequate performance. int path_length = absl::SNPrintF(ibuf_, Buffer::kBufSize, "/proc/%d/task/%d/maps", pid, pid); - CHECK_CONDITION(path_length < Buffer::kBufSize); + TC_CHECK_LT(path_length, Buffer::kBufSize); // No error logging since this can be called from the crash dump // handler at awkward moments. Users should call Valid() before @@ -81,7 +69,6 @@ ProcMapsIterator::~ProcMapsIterator() { // the manpage for close(2), this is widespread yet not fully portable, which // is unfortunate. POSIX explicitly leaves this behavior as unspecified. if (fd_ >= 0) close(fd_); - delete dynamic_buffer_; } bool ProcMapsIterator::Valid() const { return fd_ != -1; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h index c5c763a1e864..ce68f217fe7b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,10 +16,11 @@ #ifndef TCMALLOC_INTERNAL_PROC_MAPS_H_ #define TCMALLOC_INTERNAL_PROC_MAPS_H_ -#include #include #include +#include + #include "tcmalloc/internal/config.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -33,12 +35,9 @@ class ProcMapsIterator { char buf[kBufSize]; }; - // Create a new iterator for the specified pid. pid can be 0 for "self". - explicit ProcMapsIterator(pid_t pid); - // Create an iterator with specified storage (for use in signal handler). - // "buffer" should point to a ProcMapsIterator::Buffer buffer can be null in - // which case a buffer will be allocated. + // + // pid can be 0 for "self". ProcMapsIterator(pid_t pid, Buffer* buffer); // Returns true if the iterator successfully initialized; @@ -50,8 +49,6 @@ class ProcMapsIterator { ~ProcMapsIterator(); private: - void Init(pid_t pid, Buffer* buffer); - char* ibuf_; // input buffer char* stext_; // start of text char* etext_; // end of text @@ -60,7 +57,6 @@ class ProcMapsIterator { int fd_; // filehandle on /proc/*/maps pid_t pid_; char flags_[10]; - Buffer* dynamic_buffer_; // dynamically-allocated Buffer }; } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile.proto b/contrib/libs/tcmalloc/tcmalloc/internal/profile.proto new file mode 100644 index 000000000000..d557a741f327 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile.proto @@ -0,0 +1,233 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Profile is a common stacktrace profile format. +// +// Measurements represented with this format should follow the +// following conventions: +// +// - Consumers should treat unset optional fields as if they had been +// set with their default value. +// +// - When possible, measurements should be stored in "unsampled" form +// that is most useful to humans. There should be enough +// information present to determine the original sampled values. +// +// - On-disk, the serialized proto must be gzip-compressed. +// +// - The profile is represented as a set of samples, where each sample +// references a sequence of locations, and where each location belongs +// to a mapping. +// - There is a N->1 relationship from sample.location_id entries to +// locations. For every sample.location_id entry there must be a +// unique Location with that id. +// - There is an optional N->1 relationship from locations to +// mappings. For every nonzero Location.mapping_id there must be a +// unique Mapping with that id. + +syntax = "proto3"; + +package tcmalloc.tcmalloc_internal.perftools.profiles; + +option java_package = "com.google.perftools.profiles"; +option java_outer_classname = "ProfileProto"; + +message Profile { + // A description of the samples associated with each Sample.value. + // For a cpu profile this might be: + // [["cpu","nanoseconds"]] or [["wall","seconds"]] or [["syscall","count"]] + // For a heap profile, this might be: + // [["allocations","count"], ["space","bytes"]], + // If one of the values represents the number of events represented + // by the sample, by convention it should be at index 0 and use + // sample_type.unit == "count". + repeated ValueType sample_type = 1; + // The set of samples recorded in this profile. + repeated Sample sample = 2; + // Mapping from address ranges to the image/binary/library mapped + // into that address range. mapping[0] will be the main binary. + repeated Mapping mapping = 3; + // Locations referenced by samples. + repeated Location location = 4; + // Functions referenced by locations. + repeated Function function = 5; + // A common table for strings referenced by various messages. + // string_table[0] must always be "". + repeated string string_table = 6; + // frames with Function.function_name fully matching the following + // regexp will be dropped from the samples, along with their successors. + int64 drop_frames = 7; // Index into string table. + // frames with Function.function_name fully matching the following + // regexp will be kept, even if it matches drop_frames. + int64 keep_frames = 8; // Index into string table. + + // The following fields are informational, do not affect + // interpretation of results. + + // Time of collection (UTC) represented as nanoseconds past the epoch. + int64 time_nanos = 9; + // Duration of the profile, if a duration makes sense. + int64 duration_nanos = 10; + // The kind of events between sampled occurrences. + // e.g [ "cpu","cycles" ] or [ "heap","bytes" ] + ValueType period_type = 11; + // The number of events between sampled occurrences. + int64 period = 12; + // Free-form text associated with the profile. The text is displayed as is + // to the user by the tools that read profiles (e.g. by pprof). This field + // should not be used to store any machine-readable information, it is only + // for human-friendly content. The profile must stay functional if this field + // is cleaned. + repeated int64 comment = 13; // Indices into string table. + // Index into the string table of the type of the preferred sample + // value. If unset, clients should default to the last sample value. + int64 default_sample_type = 14; + // Documentation link for this profile. The URL must be absolute, + // e.g., http://pprof.example.com/cpu-profile.html + // + // The URL may be missing if the profile was generated by older code or code + // that did not bother to supply a link. + int64 doc_url = 15; // Index into string table. +} + +// ValueType describes the semantics and measurement units of a value. +message ValueType { + int64 type = 1; // Index into string table. + int64 unit = 2; // Index into string table. +} + +// Each Sample records values encountered in some program +// context. The program context is typically a stack trace, perhaps +// augmented with auxiliary information like the thread-id, some +// indicator of a higher level request being handled etc. +message Sample { + // The ids recorded here correspond to a Profile.location.id. + // The leaf is at location_id[0]. + repeated uint64 location_id = 1; + // The type and unit of each value is defined by the corresponding + // entry in Profile.sample_type. All samples must have the same + // number of values, the same as the length of Profile.sample_type. + // When aggregating multiple samples into a single sample, the + // result has a list of values that is the element-wise sum of the + // lists of the originals. + repeated int64 value = 2; + // label includes additional context for this sample. It can include + // things like a thread id, allocation size, etc. + // + // NOTE: While possible, having multiple values for the same label key is + // strongly discouraged and should never be used. Most tools (e.g. pprof) do + // not have good (or any) support for multi-value labels. And an even more + // discouraged case is having a string label and a numeric label of the same + // name on a sample. Again, possible to express, but should not be used. + repeated Label label = 3; +} + +message Label { + // Index into string table. An annotation for a sample (e.g. + // "allocation_size") with an associated value. + // Keys with "pprof::" prefix are reserved for internal use by pprof. + int64 key = 1; + + // At most one of the following must be present + int64 str = 2; // Index into string table + int64 num = 3; + + // Should only be present when num is present. + // Specifies the units of num. + // Use arbitrary string (for example, "requests") as a custom count unit. + // If no unit is specified, consumer may apply heuristic to deduce the unit. + // Consumers may also interpret units like "bytes" and "kilobytes" as memory + // units and units like "seconds" and "nanoseconds" as time units, + // and apply appropriate unit conversions to these. + int64 num_unit = 4; // Index into string table +} + +message Mapping { + // Unique nonzero id for the mapping. + uint64 id = 1; + // Address at which the binary (or DLL) is loaded into memory. + uint64 memory_start = 2; + // The limit of the address range occupied by this mapping. + uint64 memory_limit = 3; + // Offset in the binary that corresponds to the first mapped address. + uint64 file_offset = 4; + // The object this entry is loaded from. This can be a filename on + // disk for the main binary and shared libraries, or virtual + // abstractions like "[vdso]". + int64 filename = 5; // Index into string table + // A string that uniquely identifies a particular program version + // with high probability. E.g., for binaries generated by GNU tools, + // it could be the contents of the .note.gnu.build-id field. + int64 build_id = 6; // Index into string table + + // The following fields indicate the resolution of symbolic info. + bool has_functions = 7; + bool has_filenames = 8; + bool has_line_numbers = 9; + bool has_inline_frames = 10; +} + +// Describes function and line table debug information. +message Location { + // Unique nonzero id for the location. A profile could use + // instruction addresses or any integer sequence as ids. + uint64 id = 1; + // The id of the corresponding profile.Mapping for this location. + // It can be unset if the mapping is unknown or not applicable for + // this profile type. + uint64 mapping_id = 2; + // The instruction address for this location, if available. It + // should be within [Mapping.memory_start...Mapping.memory_limit] + // for the corresponding mapping. A non-leaf address may be in the + // middle of a call instruction. It is up to display tools to find + // the beginning of the instruction if necessary. + uint64 address = 3; + // Multiple line indicates this location has inlined functions, + // where the last entry represents the caller into which the + // preceding entries were inlined. + // + // E.g., if memcpy() is inlined into printf: + // line[0].function_name == "memcpy" + // line[1].function_name == "printf" + repeated Line line = 4; + // Provides an indication that multiple symbols map to this location's + // address, for example due to identical code folding by the linker. In that + // case the line information above represents one of the multiple + // symbols. This field must be recomputed when the symbolization state of the + // profile changes. + bool is_folded = 5; +} + +message Line { + // The id of the corresponding profile.Function for this line. + uint64 function_id = 1; + // Line number in source code. + int64 line = 2; + // Column number in source code. + int64 column = 3; +} + +message Function { + // Unique nonzero id for the function. + uint64 id = 1; + // Name of the function, in human-readable form if available. + int64 name = 2; // Index into string table + // Name of the function, as identified by the system. + // For instance, it can be a C++ mangled name. + int64 system_name = 3; // Index into string table + // Source file containing the function. + int64 filename = 4; // Index into string table + // Line number in source file. + int64 start_line = 5; +} diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.cc b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.cc new file mode 100644 index 000000000000..a74e3c7ff711 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.cc @@ -0,0 +1,927 @@ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/profile_builder.h" + +#include +#include +#include + +#include "absl/base/casts.h" +#include "absl/container/flat_hash_map.h" +#include "absl/hash/hash.h" +#include "absl/status/statusor.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "tcmalloc/internal/pageflags.h" +#include "tcmalloc/malloc_extension.h" + +#if defined(__linux__) +#include +#include +#endif // defined(__linux__) +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "tcmalloc/internal/profile.pb.h" +#include "absl/base/attributes.h" +#include "absl/base/macros.h" +#include "absl/status/status.h" +#include "absl/strings/escaping.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/residency.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +#if defined(__linux__) +// Returns the Phdr of the first segment of the given type. +const ElfW(Phdr) * + GetFirstSegment(const dl_phdr_info* const info, const int segment_type) { + for (int i = 0; i < info->dlpi_phnum; ++i) { + if (info->dlpi_phdr[i].p_type == segment_type) { + return &info->dlpi_phdr[i]; + } + } + return nullptr; +} + +// Return DT_SONAME for the given image. If there is no PT_DYNAMIC or if +// PT_DYNAMIC does not contain DT_SONAME, return nullptr. +static const char* GetSoName(const dl_phdr_info* const info) { + const ElfW(Phdr)* const pt_dynamic = GetFirstSegment(info, PT_DYNAMIC); + if (pt_dynamic == nullptr) { + return nullptr; + } + const ElfW(Dyn)* dyn = + reinterpret_cast(info->dlpi_addr + pt_dynamic->p_vaddr); + const ElfW(Dyn)* dt_strtab = nullptr; + const ElfW(Dyn)* dt_strsz = nullptr; + const ElfW(Dyn)* dt_soname = nullptr; + for (; dyn->d_tag != DT_NULL; ++dyn) { + if (dyn->d_tag == DT_SONAME) { + dt_soname = dyn; + } else if (dyn->d_tag == DT_STRTAB) { + dt_strtab = dyn; + } else if (dyn->d_tag == DT_STRSZ) { + dt_strsz = dyn; + } + } + if (dt_soname == nullptr) { + return nullptr; + } + TC_CHECK_NE(dt_strtab, nullptr); + TC_CHECK_NE(dt_strsz, nullptr); + const char* const strtab = + reinterpret_cast(info->dlpi_addr + dt_strtab->d_un.d_val); + TC_CHECK_LT(dt_soname->d_un.d_val, dt_strsz->d_un.d_val); + return strtab + dt_soname->d_un.d_val; +} +#endif // defined(__linux__) + +struct SampleMergedData { + int64_t count = 0; + int64_t sum = 0; + std::optional resident_size; + std::optional swapped_size; + std::optional stale_size; + std::optional locked_size; + std::optional stale_scan_period; +}; + +// The equality and hash methods of Profile::Sample only use a subset of its +// member fields. +struct SampleEqWithSubFields { + bool operator()(const Profile::Sample& a, const Profile::Sample& b) const { + auto fields = [](const Profile::Sample& s) { + return std::tie(s.depth, s.requested_size, s.requested_alignment, + s.requested_size_returning, s.allocated_size, + s.access_hint, s.access_allocated, s.guarded_status, + s.type); + }; + return fields(a) == fields(b) && + std::equal(a.stack, a.stack + a.depth, b.stack, b.stack + b.depth); + } +}; + +struct SampleHashWithSubFields { + size_t operator()(const Profile::Sample& s) const { + return absl::HashOf( + absl::MakeConstSpan(s.stack, s.depth), s.depth, s.requested_size, + s.requested_alignment, s.requested_size_returning, s.allocated_size, + s.access_hint, s.access_allocated, s.guarded_status, s.type); + } +}; + +using SampleMergedMap = + absl::flat_hash_map; + +SampleMergedMap MergeProfileSamplesAndMaybeGetResidencyInfo( + const tcmalloc::Profile& profile, PageFlagsBase* pageflags, + Residency* residency) { + SampleMergedMap map; + + profile.Iterate([&](const tcmalloc::Profile::Sample& entry) { + SampleMergedData& data = map[entry]; + data.count += entry.count; + data.sum += entry.sum; + if (residency) { + auto residency_info = + residency->Get(entry.span_start_address, entry.allocated_size); + // As long as `residency_info` provides data in some samples, the merged + // data will have their sums. + // NOTE: The data here is comparable to `tcmalloc::Profile::Sample::sum`, + // not to `tcmalloc::Profile::Sample::requested_size` (it's pre-multiplied + // by count and represents all of the resident memory). + if (residency_info.has_value()) { + size_t resident_size = entry.count * residency_info->bytes_resident; + size_t swapped_size = entry.count * residency_info->bytes_swapped; + if (!data.resident_size.has_value()) { + data.resident_size = resident_size; + data.swapped_size = swapped_size; + } else { + data.resident_size.value() += resident_size; + data.swapped_size.value() += swapped_size; + } + } + } + + if (pageflags) { + auto page_stats = + pageflags->Get(entry.span_start_address, entry.allocated_size); + if (page_stats.has_value()) { + if (!data.stale_size.has_value()) { + data.stale_size.emplace(); + } + data.stale_size.value() += entry.count * page_stats->bytes_stale; + + if (!data.locked_size.has_value()) { + data.locked_size.emplace(); + } + data.locked_size.value() += entry.count * page_stats->bytes_locked; + + if (!data.stale_scan_period.has_value()) { + data.stale_scan_period = page_stats->stale_scan_seconds; + } else if (*data.stale_scan_period != page_stats->stale_scan_seconds) { + // multiple values for stale_scan_seconds, so we don't know what it + // is; explicitly set to the default of zero. + data.stale_scan_period = 0; + } + } + } + }); + return map; +} + +} // namespace + +#if defined(__linux__) +// Extracts the linker provided build ID from the PT_NOTE segment found in info. +// +// On failure, returns an empty string. +std::string GetBuildId(const dl_phdr_info* const info) { + std::string result; + + // pt_note contains entries (of type ElfW(Nhdr)) starting at + // info->dlpi_addr + pt_note->p_vaddr + // with length + // pt_note->p_memsz + // + // The length of each entry is given by + // Align(sizeof(ElfW(Nhdr)) + nhdr->n_namesz) + Align(nhdr->n_descsz) + for (int i = 0; i < info->dlpi_phnum; ++i) { + const ElfW(Phdr)* pt_note = &info->dlpi_phdr[i]; + if (pt_note->p_type != PT_NOTE) continue; + + const char* note = + reinterpret_cast(info->dlpi_addr + pt_note->p_vaddr); + const char* const last = note + pt_note->p_filesz; + const ElfW(Word) align = pt_note->p_align; + while (note < last) { + const ElfW(Nhdr)* const nhdr = reinterpret_cast(note); + if (note + sizeof(*nhdr) > last) { + // Corrupt PT_NOTE + break; + } + + // Both the start and end of the descriptor are aligned by sh_addralign + // (= p_align). + const ElfW(Word) desc_start = + (sizeof(*nhdr) + nhdr->n_namesz + align - 1) & -align; + const ElfW(Word) size = + desc_start + ((nhdr->n_descsz + align - 1) & -align); + + // Beware of wrap-around. + if (nhdr->n_namesz >= static_cast(-align) || + nhdr->n_descsz >= static_cast(-align) || + desc_start < sizeof(*nhdr) || size < desc_start || + size > last - note) { + // Corrupt PT_NOTE + break; + } + + if (nhdr->n_type == NT_GNU_BUILD_ID) { + const char* const note_name = note + sizeof(*nhdr); + // n_namesz is the length of note_name. + if (nhdr->n_namesz == 4 && memcmp(note_name, "GNU\0", 4) == 0) { + if (!result.empty()) { + // Repeated build-ids. Ignore them. + return ""; + } + result = absl::BytesToHexString( + absl::string_view(note + desc_start, nhdr->n_descsz)); + } + } + note += size; + } + } + + return result; +} +#endif // defined(__linux__) + +ABSL_CONST_INIT const absl::string_view kProfileDropFrames = + // POSIX entry points. + "calloc|" + "cfree|" + "malloc|" + "free|" + "memalign|" + "do_memalign|" + "(__)?posix_memalign|" + "pvalloc|" + "valloc|" + "realloc|" + "aligned_alloc|" + "sdallocx|" + + // TCMalloc. + "tcmalloc::.*|" + "TCMallocInternalCalloc|" + "TCMallocInternalCfree|" + "TCMallocInternalMalloc|" + "TCMallocInternalFree|" + "TCMallocInternalMemalign|" + "TCMallocInternalAlignedAlloc|" + "TCMallocInternalPosixMemalign|" + "TCMallocInternalPvalloc|" + "TCMallocInternalValloc|" + "TCMallocInternalRealloc|" + "TCMallocInternalNew(Array)?(Aligned)?(Nothrow)?|" + "TCMallocInternalDelete(Array)?(Sized)?(Aligned)?(Nothrow)?|" + "TCMallocInternalSdallocx|" + "(tcmalloc_)?size_returning_operator_new(_hot_cold)?(_nothrow)?|" + + // Lifetime (deallocation) profiler routines. + ".*deallocationz::DeallocationProfiler.*|" + + // libstdc++ memory allocation routines + "__gnu_cxx::new_allocator::allocate|" + "__gnu_cxx::new_allocator::deallocate|" + "__malloc_alloc_template::allocate|" + "std::__new_allocator::allocate|" + "std::__new_allocator::deallocate|" + "_M_allocate|" + + // libc++ memory allocation routines + "std::__(u|1)::__libcpp_allocate|" + "std::__(u|1)::__libcpp_deallocate|" + "std::__(u|1)::__libcpp_operator_new|" + "std::__(u|1)::__libcpp_operator_delete|" + "std::__(u|1)::allocator::allocate|" + "std::__(u|1)::allocator::deallocate|" + "std::__(u|1)::allocator_traits::allocate|" + "std::__(u|1)::allocator_traits::deallocate|" + "std::__(u|1)::__builtin_new_allocator::__allocate_bytes|" + "std::__(u|1)::__do_deallocate_handle_size|" + "std::__(u|1)::__allocate_at_least|" + "std::__(u|1)::__allocation_guard::(~)?__allocation_guard|" + "std::__(u|1)::__split_buffer::(~)?__split_buffer|" + + // Other misc. memory allocation routines + "(::)?do_malloc_pages|" + "(::)?do_realloc|" + "__builtin_(vec_)?delete|" + "__builtin_(vec_)?new|" + "__libc_calloc|" + "__libc_malloc|" + "__libc_memalign|" + "__libc_realloc|" + "__size_returning_new|" + "__size_returning_new_hot_cold|" + "__size_returning_new_aligned|" + "__size_returning_new_aligned_hot_cold|" + "slow_alloc|" + "fast_alloc|" + "AllocSmall|" + "operator new|" + "operator delete"; + +ProfileBuilder::ProfileBuilder() + : profile_(std::make_unique()) { + // string_table[0] must be "" + profile_->add_string_table(""); +} + +int ProfileBuilder::InternString(absl::string_view sv) { + if (sv.empty()) { + return 0; + } + + const int index = profile_->string_table_size(); + const auto inserted = strings_.emplace(sv, index); + if (!inserted.second) { + // Failed to insert -- use existing id. + return inserted.first->second; + } + profile_->add_string_table(inserted.first->first); + return index; +} + +int ProfileBuilder::InternLocation(const void* ptr) { + uintptr_t address = absl::bit_cast(ptr); + + // Avoid assigning location ID 0 by incrementing by 1. + const int index = profile_->location_size() + 1; + const auto inserted = locations_.emplace(address, index); + if (!inserted.second) { + // Failed to insert -- use existing id. + return inserted.first->second; + } + perftools::profiles::Location& location = *profile_->add_location(); + TC_ASSERT_EQ(inserted.first->second, index); + location.set_id(index); + location.set_address(address); + + if (mappings_.empty()) { + return index; + } + + // Find the mapping ID. + auto it = mappings_.upper_bound(address); + if (it != mappings_.begin()) { + --it; + } + + // If *it contains address, add mapping to location. + const int mapping_index = it->second; + const perftools::profiles::Mapping& mapping = + profile_->mapping(mapping_index); + const int mapping_id = mapping.id(); + TC_ASSERT(it->first == mapping.memory_start()); + + if (it->first <= address && address < mapping.memory_limit()) { + location.set_mapping_id(mapping_id); + } + + return index; +} + +void ProfileBuilder::InternCallstack(absl::Span stack, + perftools::profiles::Sample& sample) { + // Profile addresses are raw stack unwind addresses, so they should be + // adjusted by -1 to land inside the call instruction (although potentially + // misaligned). + for (const void* frame : stack) { + int id = InternLocation( + absl::bit_cast(absl::bit_cast(frame) - 1)); + sample.add_location_id(id); + } + TC_ASSERT_EQ(sample.location_id().size(), stack.size()); +} + +void ProfileBuilder::AddCurrentMappings() { +#if defined(__linux__) + auto dl_iterate_callback = +[](dl_phdr_info* info, size_t size, void* data) { + // Skip dummy entry introduced since glibc 2.18. + if (info->dlpi_phdr == nullptr && info->dlpi_phnum == 0) { + return 0; + } + + ProfileBuilder& builder = *static_cast(data); + const bool is_main_executable = builder.profile_->mapping_size() == 0; + + // Evaluate all the loadable segments. + for (int i = 0; i < info->dlpi_phnum; ++i) { + if (info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + const ElfW(Phdr)* pt_load = &info->dlpi_phdr[i]; + + TC_CHECK_NE(pt_load, nullptr); + + // Extract data. + const size_t memory_start = info->dlpi_addr + pt_load->p_vaddr; + const size_t memory_limit = memory_start + pt_load->p_memsz; + const size_t file_offset = pt_load->p_offset; + + // Storage for path to executable as dlpi_name isn't populated for the + // main executable. +1 to allow for the null terminator that readlink + // does not add. + char self_filename[PATH_MAX + 1]; + const char* filename = info->dlpi_name; + if (filename == nullptr || filename[0] == '\0') { + // This is either the main executable or the VDSO. The main executable + // is always the first entry processed by callbacks. + if (is_main_executable) { + // This is the main executable. + ssize_t ret = readlink("/proc/self/exe", self_filename, + sizeof(self_filename) - 1); + if (ret >= 0 && ret < sizeof(self_filename)) { + self_filename[ret] = '\0'; + filename = self_filename; + } + } else { + // This is the VDSO. + filename = GetSoName(info); + } + } + + char resolved_path[PATH_MAX]; + absl::string_view resolved_filename; + if (realpath(filename, resolved_path)) { + resolved_filename = resolved_path; + } else { + resolved_filename = filename; + } + + const std::string build_id = GetBuildId(info); + + // Add to profile. + builder.AddMapping(memory_start, memory_limit, file_offset, + resolved_filename, build_id); + } + // Keep going. + return 0; + }; + + dl_iterate_phdr(dl_iterate_callback, this); +#endif // defined(__linux__) +} + +int ProfileBuilder::AddMapping(uintptr_t memory_start, uintptr_t memory_limit, + uintptr_t file_offset, + absl::string_view filename, + absl::string_view build_id) { + perftools::profiles::Mapping& mapping = *profile_->add_mapping(); + const int mapping_id = profile_->mapping_size(); + mapping.set_id(mapping_id); + mapping.set_memory_start(memory_start); + mapping.set_memory_limit(memory_limit); + mapping.set_file_offset(file_offset); + mapping.set_filename(InternString(filename)); + mapping.set_build_id(InternString(build_id)); + + mappings_.emplace(memory_start, mapping_id - 1); + return mapping_id; +} + +static absl::Status MakeLifetimeProfileProto(const tcmalloc::Profile& profile, + ProfileBuilder* builder) { + TC_CHECK_NE(builder, nullptr); + perftools::profiles::Profile& converted = builder->profile(); + perftools::profiles::ValueType* period_type = converted.mutable_period_type(); + + period_type->set_type(builder->InternString("space")); + period_type->set_unit(builder->InternString("bytes")); + + for (const auto& [type, unit] : {std::pair{"allocated_objects", "count"}, + {"allocated_space", "bytes"}, + {"deallocated_objects", "count"}, + {"deallocated_space", "bytes"}, + {"censored_allocated_objects", "count"}, + {"censored_allocated_space", "bytes"}}) { + perftools::profiles::ValueType* sample_type = converted.add_sample_type(); + sample_type->set_type(builder->InternString(type)); + sample_type->set_unit(builder->InternString(unit)); + } + + converted.set_default_sample_type(builder->InternString("deallocated_space")); + converted.set_duration_nanos(absl::ToInt64Nanoseconds(profile.Duration())); + + if (auto start = profile.StartTime(); start.has_value()) { + converted.set_time_nanos(absl::ToUnixNanos(*start)); + } + + converted.set_drop_frames(builder->InternString(kProfileDropFrames)); + + // Common intern string ids which are going to be used for each sample. + const int count_id = builder->InternString("count"); + const int bytes_id = builder->InternString("bytes"); + const int request_id = builder->InternString("request"); + const int alignment_id = builder->InternString("alignment"); + const int nanoseconds_id = builder->InternString("nanoseconds"); + const int avg_lifetime_id = builder->InternString("avg_lifetime"); + const int stddev_lifetime_id = builder->InternString("stddev_lifetime"); + const int min_lifetime_id = builder->InternString("min_lifetime"); + const int max_lifetime_id = builder->InternString("max_lifetime"); + const int active_cpu_id = builder->InternString("active CPU"); + const int active_vcpu_id = builder->InternString("active vCPU"); + const int active_l3_id = builder->InternString("active L3"); + const int active_numa_id = builder->InternString("active NUMA"); + const int same_id = builder->InternString("same"); + const int different_id = builder->InternString("different"); + const int active_thread_id = builder->InternString("active thread"); + const int callstack_pair_id = builder->InternString("callstack-pair-id"); + const int none_id = builder->InternString("none"); + + profile.Iterate([&](const tcmalloc::Profile::Sample& entry) { + perftools::profiles::Sample& sample = *converted.add_sample(); + + TC_CHECK_LE(entry.depth, ABSL_ARRAYSIZE(entry.stack)); + builder->InternCallstack(absl::MakeSpan(entry.stack, entry.depth), sample); + + auto add_label = [&](int key, int unit, size_t value) { + perftools::profiles::Label& label = *sample.add_label(); + label.set_key(key); + label.set_num(value); + label.set_num_unit(unit); + }; + + auto add_positive_label = [&](int key, int unit, size_t value) { + if (value <= 0) return; + add_label(key, unit, value); + }; + + auto add_optional_string_label = + [&](int key, const std::optional& optional_result, int result1, + int result2) { + perftools::profiles::Label& label = *sample.add_label(); + label.set_key(key); + + if (!optional_result.has_value()) { + label.set_str(none_id); + } else if (optional_result.value()) { + label.set_str(result1); + } else { + label.set_str(result2); + } + }; + + // The following three fields are common across profiles. + add_positive_label(bytes_id, bytes_id, entry.allocated_size); + add_positive_label(request_id, bytes_id, entry.requested_size); + add_positive_label(alignment_id, bytes_id, entry.requested_alignment); + + // The following fields are specific to lifetime (deallocation) profiler. + add_positive_label(callstack_pair_id, count_id, entry.profile_id); + add_positive_label(avg_lifetime_id, nanoseconds_id, + absl::ToInt64Nanoseconds(entry.avg_lifetime)); + add_positive_label(stddev_lifetime_id, nanoseconds_id, + absl::ToInt64Nanoseconds(entry.stddev_lifetime)); + add_positive_label(min_lifetime_id, nanoseconds_id, + absl::ToInt64Nanoseconds(entry.min_lifetime)); + add_positive_label(max_lifetime_id, nanoseconds_id, + absl::ToInt64Nanoseconds(entry.max_lifetime)); + + add_optional_string_label(active_cpu_id, + entry.allocator_deallocator_physical_cpu_matched, + same_id, different_id); + add_optional_string_label(active_vcpu_id, + entry.allocator_deallocator_virtual_cpu_matched, + same_id, different_id); + add_optional_string_label(active_l3_id, + entry.allocator_deallocator_l3_matched, same_id, + different_id); + add_optional_string_label(active_numa_id, + entry.allocator_deallocator_numa_matched, same_id, + different_id); + add_optional_string_label(active_thread_id, + entry.allocator_deallocator_thread_matched, + same_id, different_id); + + int64_t count = abs(entry.count); + int64_t weight = entry.sum; + + // Handle censored allocations first since we distinguish + // the samples based on the is_censored flag. + if (entry.is_censored) { + sample.add_value(0); + sample.add_value(0); + sample.add_value(0); + sample.add_value(0); + sample.add_value(count); + sample.add_value(weight); + } else if (entry.count > 0) { // for allocation, e.count is positive + sample.add_value(count); + sample.add_value(weight); + sample.add_value(0); + sample.add_value(0); + sample.add_value(0); + sample.add_value(0); + } else { // for deallocation, e.count is negative + sample.add_value(0); + sample.add_value(0); + sample.add_value(count); + sample.add_value(weight); + sample.add_value(0); + sample.add_value(0); + } + }); + return absl::OkStatus(); +} + +std::unique_ptr ProfileBuilder::Finalize() && { + return std::move(profile_); +} + +absl::StatusOr> MakeProfileProto( + const ::tcmalloc::Profile& profile, PageFlagsBase* pageflags, + Residency* residency) { + if (profile.Type() == ProfileType::kDoNotUse) { +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_LEAK_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + return absl::UnimplementedError( + "Program was built with sanitizers enabled, which do not support heap " + "profiling"); +#else + return absl::InvalidArgumentError( + "Empty heap profile: TCMalloc appears disabled or unavailable (e.g. a " + "custom allocator may be linked)"); +#endif + } + + ProfileBuilder builder; + builder.AddCurrentMappings(); + + if (profile.Type() == ProfileType::kLifetimes) { + absl::Status error = MakeLifetimeProfileProto(profile, &builder); + if (!error.ok()) { + return error; + } + return std::move(builder).Finalize(); + } + + const int alignment_id = builder.InternString("alignment"); + const int bytes_id = builder.InternString("bytes"); + const int count_id = builder.InternString("count"); + const int objects_id = builder.InternString("objects"); + const int request_id = builder.InternString("request"); + const int size_returning_id = builder.InternString("size_returning"); + const int stale_scan_period_id = builder.InternString("stale_scan_period"); + const int seconds_id = builder.InternString("seconds"); + const int space_id = builder.InternString("space"); + const int resident_space_id = builder.InternString("resident_space"); + const int swapped_space_id = builder.InternString("swapped_space"); + const int stale_space_id = builder.InternString("stale_space"); + const int locked_space_id = builder.InternString("locked_space"); + const int access_hint_id = builder.InternString("access_hint"); + const int access_allocated_id = builder.InternString("access_allocated"); + const int cold_id = builder.InternString("cold"); + const int hot_id = builder.InternString("hot"); + const int allocation_type_id = builder.InternString("allocation type"); + const int new_id = builder.InternString("new"); + const int malloc_id = builder.InternString("malloc"); + const int aligned_malloc_id = builder.InternString("aligned malloc"); + + perftools::profiles::Profile& converted = builder.profile(); + + perftools::profiles::ValueType& period_type = + *converted.mutable_period_type(); + period_type.set_type(space_id); + period_type.set_unit(bytes_id); + converted.set_drop_frames(builder.InternString(kProfileDropFrames)); + converted.set_duration_nanos(absl::ToInt64Nanoseconds(profile.Duration())); + + if (auto start = profile.StartTime(); start.has_value()) { + converted.set_time_nanos(absl::ToUnixNanos(*start)); + } + + { + perftools::profiles::ValueType& sample_type = *converted.add_sample_type(); + sample_type.set_type(objects_id); + sample_type.set_unit(count_id); + } + + { + perftools::profiles::ValueType& sample_type = *converted.add_sample_type(); + sample_type.set_type(space_id); + sample_type.set_unit(bytes_id); + } + + const bool exporting_residency = + (profile.Type() == tcmalloc::ProfileType::kHeap); + if (exporting_residency) { + perftools::profiles::ValueType* sample_type = converted.add_sample_type(); + sample_type->set_type(resident_space_id); + sample_type->set_unit(bytes_id); + + sample_type = converted.add_sample_type(); + sample_type->set_type(swapped_space_id); + sample_type->set_unit(bytes_id); + + sample_type = converted.add_sample_type(); + sample_type->set_type(stale_space_id); + sample_type->set_unit(bytes_id); + + sample_type = converted.add_sample_type(); + sample_type->set_type(locked_space_id); + sample_type->set_unit(bytes_id); + } + + int default_sample_type_id; + switch (profile.Type()) { + case tcmalloc::ProfileType::kFragmentation: + case tcmalloc::ProfileType::kHeap: + case tcmalloc::ProfileType::kPeakHeap: + default_sample_type_id = space_id; + break; + case tcmalloc::ProfileType::kAllocations: + default_sample_type_id = objects_id; + break; + default: + return absl::InvalidArgumentError("Unexpected profile format"); + } + + converted.set_default_sample_type(default_sample_type_id); + + SampleMergedMap samples = MergeProfileSamplesAndMaybeGetResidencyInfo( + profile, pageflags, residency); + for (const auto& [entry, data] : samples) { + perftools::profiles::Profile& profile = builder.profile(); + perftools::profiles::Sample& sample = *profile.add_sample(); + + TC_CHECK_LE(entry.depth, ABSL_ARRAYSIZE(entry.stack)); + builder.InternCallstack(absl::MakeSpan(entry.stack, entry.depth), sample); + + sample.add_value(data.count); + sample.add_value(data.sum); + if (exporting_residency) { + sample.add_value(data.resident_size.value_or(0)); + sample.add_value(data.swapped_size.value_or(0)); + sample.add_value(data.stale_size.value_or(0)); + sample.add_value(data.locked_size.value_or(0)); + } + + // add fields that are common to all memory profiles + auto add_label = [&](int key, int unit, size_t value) { + perftools::profiles::Label& label = *sample.add_label(); + label.set_key(key); + label.set_num(value); + label.set_num_unit(unit); + }; + + auto add_positive_label = [&](int key, int unit, size_t value) { + if (value <= 0) return; + add_label(key, unit, value); + }; + + add_positive_label(bytes_id, bytes_id, entry.allocated_size); + add_positive_label(request_id, bytes_id, entry.requested_size); + add_positive_label(alignment_id, bytes_id, entry.requested_alignment); + add_positive_label(size_returning_id, 0, entry.requested_size_returning); + add_positive_label(stale_scan_period_id, seconds_id, + data.stale_scan_period.value_or(0)); + + auto add_access_label = [&](int key, + tcmalloc::Profile::Sample::Access access) { + switch (access) { + case tcmalloc::Profile::Sample::Access::Hot: { + perftools::profiles::Label& access_label = *sample.add_label(); + access_label.set_key(key); + access_label.set_str(hot_id); + break; + } + case tcmalloc::Profile::Sample::Access::Cold: { + perftools::profiles::Label& access_label = *sample.add_label(); + access_label.set_key(key); + access_label.set_str(cold_id); + break; + } + default: + break; + } + }; + + add_label(access_hint_id, access_hint_id, + static_cast(entry.access_hint)); + add_access_label(access_allocated_id, entry.access_allocated); + + perftools::profiles::Label& type_label = *sample.add_label(); + type_label.set_key(allocation_type_id); + + switch (entry.type) { + case Profile::Sample::AllocationType::New: + type_label.set_str(new_id); + break; + case Profile::Sample::AllocationType::Malloc: + type_label.set_str(malloc_id); + break; + case Profile::Sample::AllocationType::AlignedMalloc: + type_label.set_str(aligned_malloc_id); + break; + } + + const int guarded_status_id = builder.InternString("guarded_status"); + const int larger_than_one_page_id = + builder.InternString("LargerThanOnePage"); + const int disabled_id = builder.InternString("Disabled"); + const int rate_limited_id = builder.InternString("RateLimited"); + const int too_small_id = builder.InternString("TooSmall"); + const int no_available_slots_id = builder.InternString("NoAvailableSlots"); + const int m_protect_failed_id = builder.InternString("MProtectFailed"); + const int filtered_id = builder.InternString("Filtered"); + const int unknown_id = builder.InternString("Unknown"); + const int not_attempted_id = builder.InternString("NotAttempted"); + const int requested_id = builder.InternString("Requested"); + const int required_id = builder.InternString("Required"); + const int guarded_id = builder.InternString("Guarded"); + + perftools::profiles::Label& guarded_status_label = *sample.add_label(); + guarded_status_label.set_key(guarded_status_id); + switch (entry.guarded_status) { + case Profile::Sample::GuardedStatus::LargerThanOnePage: + guarded_status_label.set_str(larger_than_one_page_id); + break; + case Profile::Sample::GuardedStatus::Disabled: + guarded_status_label.set_str(disabled_id); + break; + case Profile::Sample::GuardedStatus::RateLimited: + guarded_status_label.set_str(rate_limited_id); + break; + case Profile::Sample::GuardedStatus::TooSmall: + guarded_status_label.set_str(too_small_id); + break; + case Profile::Sample::GuardedStatus::NoAvailableSlots: + guarded_status_label.set_str(no_available_slots_id); + break; + case Profile::Sample::GuardedStatus::MProtectFailed: + guarded_status_label.set_str(m_protect_failed_id); + break; + case Profile::Sample::GuardedStatus::Filtered: + guarded_status_label.set_str(filtered_id); + break; + case Profile::Sample::GuardedStatus::Unknown: + guarded_status_label.set_str(unknown_id); + break; + case Profile::Sample::GuardedStatus::NotAttempted: + guarded_status_label.set_str(not_attempted_id); + break; + case Profile::Sample::GuardedStatus::Requested: + guarded_status_label.set_str(requested_id); + break; + case Profile::Sample::GuardedStatus::Required: + guarded_status_label.set_str(required_id); + break; + case Profile::Sample::GuardedStatus::Guarded: + guarded_status_label.set_str(guarded_id); + break; + } + } + + return std::move(builder).Finalize(); +} + +absl::Status ProfileBuilder::SetDocURL(absl::string_view url) { + if (!url.empty() && !absl::StartsWith(url, "http://") && + !absl::StartsWith(url, "https://")) { + return absl::InternalError( + absl::StrCat("setting invalid profile doc URL '", url, "'")); + } + profile_->set_doc_url(InternString(url)); + return absl::OkStatus(); +} + +absl::StatusOr> MakeProfileProto( + const ::tcmalloc::Profile& profile) { + // Used to populate residency info in heap profile. + std::optional pageflags; + std::optional residency; + + PageFlags* p = nullptr; + Residency* r = nullptr; + + if (profile.Type() == ProfileType::kHeap) { + p = &pageflags.emplace(); + r = &residency.emplace(); + } + + return MakeProfileProto(profile, p, r); +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.h b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.h new file mode 100644 index 000000000000..c52843361d1c --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder.h @@ -0,0 +1,102 @@ +#pragma clang system_header +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_PROFILE_BUILDER_H_ +#define TCMALLOC_INTERNAL_PROFILE_BUILDER_H_ + +#if defined(__linux__) +#include +#include +#endif // defined(__linux__) + +#include +#include +#include + +// #include "tcmalloc/internal/profile.pb.h" +#include "absl/container/btree_map.h" +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +#if defined(__linux__) +std::string GetBuildId(const dl_phdr_info* info); +#endif // defined(__linux__) + +// ProfileBuilder manages building up a profile.proto instance and populating +// common parts using the string/pointer table conventions expected by pprof. +class ProfileBuilder { + public: + ProfileBuilder(); + + perftools::profiles::Profile& profile() { return *profile_; } + + // Adds the current process mappings to the profile. + void AddCurrentMappings(); + + // Adds a single mapping to the profile and to lookup cache and returns the + // resulting ID. + int AddMapping(uintptr_t memory_start, uintptr_t memory_limit, + uintptr_t file_offset, absl::string_view filename, + absl::string_view build_id); + + // Add documentation URL. + absl::Status SetDocURL(absl::string_view url); + + // Interns sv in the profile's string table and returns the resulting ID. + int InternString(absl::string_view sv); + // Interns a location in the profile's location table and returns the + // resulting ID. + int InternLocation(const void* ptr); + + // Interns a callstack and adds the IDs to the provided sample. + void InternCallstack(absl::Span stack, + perftools::profiles::Sample& sample); + + std::unique_ptr Finalize() &&; + + private: + std::unique_ptr profile_; + // mappings_ stores the start address of each mapping in profile_->mapping() + // to its index. + absl::btree_map mappings_; + absl::flat_hash_map strings_; + absl::flat_hash_map locations_; +}; + +extern const absl::string_view kProfileDropFrames; + +absl::StatusOr> MakeProfileProto( + const ::tcmalloc::Profile& profile); + +class PageFlagsBase; +class PageFlags; +class Residency; + +// Exposed to facilitate testing. +absl::StatusOr> MakeProfileProto( + const ::tcmalloc::Profile& profile, PageFlagsBase* pageflags, + Residency* residency); + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_PROFILE_BUILDER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_fuzz.cc new file mode 100644 index 000000000000..a33200cbc158 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_fuzz.cc @@ -0,0 +1,54 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/profile_builder.h" + +#if defined(__linux__) +#include +#include +#endif // defined(__linux__) + +#include + +#include "fuzztest/fuzztest.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +void ParseBuildID(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); +#if defined(__linux__) + ElfW(Phdr) note; + note.p_type = PT_NOTE; + note.p_vaddr = reinterpret_cast(nullptr); + note.p_filesz = size; + note.p_memsz = size; + note.p_align = 4; + + dl_phdr_info info = {}; + info.dlpi_name = "test"; + info.dlpi_addr = reinterpret_cast(data); + info.dlpi_phdr = ¬e; + info.dlpi_phnum = 1; + + GetBuildId(&info); +#endif // defined(__linux__) +} + +FUZZ_TEST(ProfileBuilderTest, ParseBuildID) + ; + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_no_tcmalloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_no_tcmalloc_test.cc new file mode 100644 index 000000000000..a8ef13b04ae2 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_no_tcmalloc_test.cc @@ -0,0 +1,46 @@ +// Copyright 2024 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/status/status.h" +#include "absl/status/status_matchers.h" +#include "tcmalloc/internal/profile_builder.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +using ::absl_testing::StatusIs; +using ::testing::HasSubstr; + +TEST(ProfileBuilderNoTCMallocTest, StatusErrorTest) { + auto profile_st = + MakeProfileProto(MallocExtension::SnapshotCurrent(ProfileType::kHeap)); +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_LEAK_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + EXPECT_THAT(profile_st, + StatusIs(absl::StatusCode::kUnimplemented, + HasSubstr("Program was built with sanitizers enabled"))); +#else + EXPECT_THAT(profile_st, StatusIs(absl::StatusCode::kInvalidArgument, + HasSubstr("Empty heap profile"))); +#endif +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_test.cc new file mode 100644 index 000000000000..a69b2151aa62 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/profile_builder_test.cc @@ -0,0 +1,1031 @@ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/profile_builder.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tcmalloc/internal/profile.pb.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/meta/type_traits.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/environment.h" +#include "tcmalloc/internal/fake_profile.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/internal/pageflags.h" +#include "tcmalloc/internal/residency.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +using ::testing::AnyOf; +using ::testing::Each; +using ::testing::IsSupersetOf; +using ::testing::Key; +using ::testing::Not; +using ::testing::Pair; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; + +class StubPageFlags final : public PageFlagsBase { + public: + StubPageFlags() = default; + ~StubPageFlags() override = default; + std::optional Get(const void* addr, size_t size) override { + PageStats ret; + uintptr_t uaddr = reinterpret_cast(addr); + if (stale_bytes_.find(uaddr) != stale_bytes_.end()) { + ret.bytes_stale = stale_bytes_[uaddr]; + } + if (locked_bytes_.find(uaddr) != locked_bytes_.end()) { + ret.bytes_locked = locked_bytes_[uaddr]; + } + ret.stale_scan_seconds = stale_scan_period_; + return ret; + } + + void set_bytes_stale(const void* addr, int stale) { + uintptr_t uaddr = reinterpret_cast(addr); + stale_bytes_[uaddr] = stale; + } + + void set_bytes_locked(const void* addr, int locked) { + uintptr_t uaddr = reinterpret_cast(addr); + locked_bytes_[uaddr] = locked; + } + + void set_stale_scan_period(const void* addr, uint64_t seconds_stale) { + stale_scan_period_ = seconds_stale; + } + + private: + absl::flat_hash_map stale_bytes_; + absl::flat_hash_map locked_bytes_; + uint64_t stale_scan_period_; +}; + +// Returns the fully resolved path of this program. +std::string RealPath() { + char path[PATH_MAX]; + if (realpath("/proc/self/exe", path)) { + return path; + } + return ""; +} + +TEST(ProfileBuilderTest, Mappings) { + ProfileBuilder builder; + builder.AddCurrentMappings(); + auto profile = std::move(builder).Finalize(); + + absl::flat_hash_set filenames; + absl::flat_hash_set mapping_ids; + for (const auto& mapping : profile->mapping()) { + const int filename_id = mapping.filename(); + ASSERT_GE(filename_id, 0); + ASSERT_LT(filename_id, profile->string_table_size()); + + const absl::string_view filename = profile->string_table(filename_id); + filenames.emplace(filename); + mapping_ids.insert(mapping.id()); + } + + // Check for duplicates in mapping IDs. + EXPECT_EQ(mapping_ids.size(), profile->mapping_size()); + EXPECT_THAT(filenames, testing::Contains(RealPath())); + + // Ensure that no mapping ID is ID "0". + EXPECT_THAT(mapping_ids, Not(testing::Contains(0))); +} + +TEST(ProfileBuilderTest, LocationTableNoMappings) { + const uintptr_t kAddress = uintptr_t{0x150}; + + ProfileBuilder builder; + const int loc1 = + builder.InternLocation(absl::bit_cast(kAddress)); + auto profile = std::move(builder).Finalize(); + + // There should be no mappings. + EXPECT_TRUE(profile->mapping().empty()); + + // There should be 1 location. + ASSERT_EQ(profile->location().size(), 1); + const auto& location = profile->location(0); + EXPECT_EQ(location.id(), loc1); + EXPECT_EQ(location.mapping_id(), 0); + EXPECT_EQ(location.address(), kAddress); +} + +TEST(ProfileBuilderTest, DocURL) { + // Try good and bad URLs. + constexpr absl::string_view good1 = "http://example.com/foo"; + constexpr absl::string_view good2 = "https://example.com/foo"; + constexpr absl::string_view good3 = ""; + constexpr absl::string_view bad = "example.com/foo"; + for (absl::string_view url : {good1, good2, good3, bad}) { + ProfileBuilder b; + absl::Status error = b.SetDocURL(url); + if (url == bad) { + EXPECT_FALSE(error.ok()) << url; + } else { + EXPECT_TRUE(error.ok()) << url; + } + } +} + +TEST(ProfileBuilderTest, LocationTable) { + ProfileBuilder builder; + + // Verify we add mapping information to locations correctly. + builder.AddMapping(uintptr_t{0x200}, uintptr_t{0x300}, uintptr_t{0x123}, + "foo.so", "abababab"); + + // loc1/loc3 should lack mappings, loc2 should have a mapping. + const int loc1 = + builder.InternLocation(absl::bit_cast(uintptr_t{0x150})); + const int loc2 = + builder.InternLocation(absl::bit_cast(uintptr_t{0x250})); + const int loc3 = + builder.InternLocation(absl::bit_cast(uintptr_t{0x350})); + + auto profile = std::move(builder).Finalize(); + + // There should be one mapping. + ASSERT_EQ(profile->mapping().size(), 1); + const auto mapping = profile->mapping(0); + EXPECT_EQ(mapping.memory_start(), 0x200); + EXPECT_EQ(mapping.memory_limit(), 0x300); + EXPECT_EQ(mapping.file_offset(), 0x123); + EXPECT_EQ(profile->string_table(mapping.filename()), "foo.so"); + EXPECT_EQ(profile->string_table(mapping.build_id()), "abababab"); + + struct SimpleLocation { + uint64_t id; + uint64_t mapping_id; + uint64_t address; + + bool operator==(const SimpleLocation& rhs) const { + return std::tie(id, mapping_id, address) == + std::tie(rhs.id, rhs.mapping_id, rhs.address); + } + }; + std::vector actual; + for (auto location : profile->location()) { + SimpleLocation& l = actual.emplace_back(); + l.id = location.id(); + l.mapping_id = location.mapping_id(); + l.address = location.address(); + } + std::vector expected = { + {static_cast(loc1), 0, 0x150}, + {static_cast(loc2), mapping.id(), 0x250}, + {static_cast(loc3), 0, 0x350}, + }; + + EXPECT_THAT(actual, testing::UnorderedElementsAreArray(expected)); +} + +TEST(ProfileBuilderTest, StringTable) { + auto profile = ProfileBuilder().Finalize(); + + ASSERT_FALSE(profile->string_table().empty()); + // The first entry should be the empty string. + EXPECT_EQ(profile->string_table(0), ""); + + // There should be no duplicates. + absl::flat_hash_set strings; + strings.reserve(profile->string_table_size()); + strings.insert(profile->string_table().begin(), + profile->string_table().end()); + EXPECT_EQ(strings.size(), profile->string_table_size()); +} + +// A helper type alias for a list of samples and their labels. +using SampleLabels = std::vector< + std::vector>>>; + +void CheckAndExtractSampleLabels(const perftools::profiles::Profile& converted, + SampleLabels& extracted) { + // Strings + ASSERT_FALSE(converted.string_table().empty()); + + // Mappings: Build a lookup table from mapping ID to index in mapping array. + ASSERT_FALSE(converted.mapping().empty()); + absl::flat_hash_map mappings; + for (int i = 0, n = converted.mapping().size(); i < n; i++) { + mappings.emplace(converted.mapping(i).id(), i); + } + + // Locations + ASSERT_FALSE(converted.location().empty()); + absl::flat_hash_map addresses; + absl::flat_hash_set interned_addresses; + int location_with_mapping_found = 0; + for (const auto& location : converted.location()) { + uintptr_t address = location.address(); + if (location.mapping_id() > 0) { + ASSERT_THAT( + mappings, + testing::Contains(testing::Key(testing::Eq(location.mapping_id())))); + const int mapping_index = mappings.at(location.mapping_id()); + ASSERT_LT(mapping_index, converted.mapping_size()); + const auto& mapping = converted.mapping(mapping_index); + + location_with_mapping_found++; + + // Confirm address actually falls within [mapping.memory_start(), + // mapping.memory_limit()). + EXPECT_LE(mapping.memory_start(), address); + EXPECT_LT(address, mapping.memory_limit()); + } + + EXPECT_TRUE(interned_addresses.insert(location.id()).second) + << "Duplicate interned location ID found"; + } + // Expect that we find at least 2 locations with a mapping. + EXPECT_GE(location_with_mapping_found, 2); + // Expect that no location has ID "0." + EXPECT_THAT(interned_addresses, Not(testing::Contains(0))); + + // Samples + for (const auto& s : converted.sample()) { + EXPECT_FALSE(s.location_id().empty()); + // No duplicates + EXPECT_THAT( + absl::flat_hash_set(s.location_id().begin(), s.location_id().end()) + .size(), + s.location_id().size()); + // Interned locations should appear in the location list. + EXPECT_THAT(s.location_id(), testing::IsSubsetOf(interned_addresses)); + + EXPECT_EQ(converted.sample_type().size(), s.value().size()); + extracted.emplace_back(); + auto& labels = extracted.back(); + for (const auto& l : s.label()) { + if (l.str() != 0) { + labels.emplace_back(converted.string_table(l.key()), + converted.string_table(l.str())); + } else { + labels.emplace_back(converted.string_table(l.key()), + static_cast(l.num())); + } + } + } +} + +perftools::profiles::Profile MakeTestProfile( + std::optional start_time, const absl::Duration duration, + const ProfileType profile_type) { + std::vector samples; + StubPageFlags* p = nullptr; + Residency* r = nullptr; + std::optional pageflags; + std::optional residency; + if (profile_type == ProfileType::kHeap) { + p = &pageflags.emplace(); + r = &residency.emplace(); + } + + { // We have three samples here that will be merged. The second sample has + // `span_start_address` as nullptr, so `resident_space` in the + // profile is contributed by the other two samples. + Profile::Sample sample; + + sample.sum = 1234; + sample.count = 2; + sample.requested_size = 2; + sample.requested_alignment = 4; + sample.requested_size_returning = true; + sample.allocated_size = 16; + + std::vector bytes(sample.allocated_size); + sample.span_start_address = bytes.data(); + + // This stack is mostly artificial, but we include a real symbol from the + // binary to confirm that at least one location was indexed into its + // mapping. + sample.depth = 5; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = absl::bit_cast(uintptr_t{0x23451}); + sample.stack[2] = absl::bit_cast(uintptr_t{0x34512}); + sample.stack[3] = absl::bit_cast(uintptr_t{0x45123}); + sample.stack[4] = reinterpret_cast(&ProfileAccessor::MakeProfile); + sample.access_hint = hot_cold_t{254}; + sample.access_allocated = Profile::Sample::Access::Cold; + sample.type = Profile::Sample::AllocationType::New; + samples.push_back(sample); + + Profile::Sample sample2 = sample; + sample2.span_start_address = nullptr; + samples.push_back(sample2); + + Profile::Sample sample3 = sample; + sample3.span_start_address = bytes.data(); + samples.push_back(sample3); + } + + { + // We have two samples here. For the second sample, we remove the mappings + // for the page starting at the pointer, so no resident info is available + // for the sample. + Profile::Sample sample; + + size_t kSize = GetPageSize(); + void* ptr1 = mmap(nullptr, kSize, PROT_WRITE | PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + CHECK_NE(ptr1, MAP_FAILED) << errno; + void* ptr2 = mmap(nullptr, kSize, PROT_WRITE | PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + CHECK_NE(ptr2, MAP_FAILED) << errno; + CHECK_EQ(munmap(ptr2, kSize), 0) << errno; + + sample.sum = 2345; + sample.count = 5; + sample.requested_size = 4; + sample.requested_alignment = 0; + sample.requested_size_returning = false; + sample.allocated_size = 8; + sample.span_start_address = ptr1; + + // This stack is mostly artificial, but we include a real symbol from the + // binary to confirm that at least one location was indexed into its + // mapping. + sample.depth = 4; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = absl::bit_cast(uintptr_t{0x23451}); + sample.stack[2] = absl::bit_cast(uintptr_t{0x45123}); + sample.stack[3] = reinterpret_cast(&RealPath); + sample.access_hint = hot_cold_t{1}; + sample.access_allocated = Profile::Sample::Access::Hot; + sample.type = Profile::Sample::AllocationType::Malloc; + + if (profile_type == ProfileType::kHeap) { + // Total stale bytes for this sample = 5 * 17 = 85. + // Total locked bytes for this sample = 5 * 11 = 55. + p->set_bytes_stale(sample.span_start_address, /*stale=*/17); + p->set_bytes_locked(sample.span_start_address, /*locked=*/11); + p->set_stale_scan_period(sample.span_start_address, + /*seconds_stale=*/10); + } + + samples.push_back(sample); + + Profile::Sample sample2 = sample; + sample2.span_start_address = ptr2; + samples.push_back(sample2); + } + + { + // This sample does not set `span_start_address`, so `resident_space` + // is 0. + auto& sample = samples.emplace_back(); + + sample.sum = 2345; + sample.count = 8; + sample.requested_size = 16; + sample.requested_alignment = 0; + sample.requested_size_returning = true; + sample.allocated_size = 16; + // This stack is mostly artificial, but we include a real symbol from the + // binary to confirm that at least one location was indexed into its + // mapping. + sample.depth = 3; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = absl::bit_cast(uintptr_t{0x23451}); + sample.stack[2] = reinterpret_cast(&RealPath); + sample.access_hint = hot_cold_t{0}; + sample.access_allocated = Profile::Sample::Access::Hot; + sample.type = Profile::Sample::AllocationType::AlignedMalloc; + } + + { // We have three samples here that will be merged (if guarded_status is not + // considered). The second and third samples have different + // guarded_status-es. + Profile::Sample sample; + + sample.sum = 1235; + sample.count = 2; + sample.requested_size = 2; + sample.requested_alignment = 4; + sample.requested_size_returning = true; + sample.allocated_size = 16; + + std::vector bytes(sample.allocated_size); + sample.span_start_address = bytes.data(); + + // This stack is mostly artificial, but we include a real symbol from the + // binary to confirm that at least one location was indexed into its + // mapping. + sample.depth = 5; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = absl::bit_cast(uintptr_t{0x23451}); + sample.stack[2] = absl::bit_cast(uintptr_t{0x34512}); + sample.stack[3] = absl::bit_cast(uintptr_t{0x45123}); + sample.stack[4] = reinterpret_cast(&ProfileAccessor::MakeProfile); + sample.access_hint = hot_cold_t{253}; + sample.access_allocated = Profile::Sample::Access::Cold; + sample.guarded_status = Profile::Sample::GuardedStatus::RateLimited; + sample.type = Profile::Sample::AllocationType::New; + samples.push_back(sample); + + Profile::Sample sample2 = sample; + sample2.guarded_status = Profile::Sample::GuardedStatus::Filtered; + samples.push_back(sample2); + + Profile::Sample sample3 = sample; + sample3.guarded_status = Profile::Sample::GuardedStatus::Guarded; + samples.push_back(sample3); + } + auto fake_profile = std::make_unique(); + fake_profile->SetType(profile_type); + fake_profile->SetDuration(duration); + fake_profile->SetStartTime(start_time); + fake_profile->SetSamples(std::move(samples)); + Profile profile = ProfileAccessor::MakeProfile(std::move(fake_profile)); + auto converted_or = MakeProfileProto(profile, p, r); + CHECK_OK(converted_or.status()); + return **converted_or; +} + +TEST(ProfileConverterTest, NonHeapProfileDoesntHaveResidency) { + const absl::Time start_time = absl::Now(); + constexpr absl::Duration kDuration = absl::Milliseconds(1500); + const auto& converted = + MakeTestProfile(start_time, kDuration, ProfileType::kPeakHeap); + + // Two sample types: [objects, count] and [space, bytes] + std::vector> extracted_sample_type; + absl::flat_hash_set sample_types; + for (const auto& s : converted.sample_type()) { + auto& labels = extracted_sample_type.emplace_back(); + labels.first = converted.string_table(s.type()); + labels.second = converted.string_table(s.unit()); + + ASSERT_TRUE(sample_types.insert(s.type()).second) + << "Duplicate sample type #" << s.type() << ": " + << converted.string_table(s.type()); + } + // Require that the default_sample_type appeared in sample_type. + EXPECT_THAT(sample_types, testing::Contains(converted.default_sample_type())); + EXPECT_THAT( + extracted_sample_type, + UnorderedElementsAre(Pair("objects", "count"), Pair("space", "bytes"))); + EXPECT_EQ(converted.time_nanos(), absl::ToUnixNanos(start_time)); + + absl::flat_hash_map label_to_units; + for (const auto& s : converted.sample()) { + for (const auto& l : s.label()) { + if (l.num_unit() != 0) { + const std::string unit = converted.string_table(l.num_unit()); + auto it = label_to_units.insert({converted.string_table(l.key()), unit}) + .first; + // We expect units to be consistent for the same key, across samples. + EXPECT_EQ(it->second, unit); + } + } + } + + EXPECT_THAT( + label_to_units, + Each(Key(Not(AnyOf("sampled_resident_bytes", + "swapped_bytes"))))); +} + +TEST(ProfileConverterTest, HeapProfile) { + constexpr absl::Duration kDuration = absl::Milliseconds(1500); + const auto& converted = + MakeTestProfile(std::nullopt, kDuration, ProfileType::kHeap); + + // Two sample types: [objects, count] and [space, bytes] + std::vector> extracted_sample_type; + absl::flat_hash_set sample_types; + for (const auto& s : converted.sample_type()) { + auto& labels = extracted_sample_type.emplace_back(); + labels.first = converted.string_table(s.type()); + labels.second = converted.string_table(s.unit()); + + ASSERT_TRUE(sample_types.insert(s.type()).second) + << "Duplicate sample type #" << s.type() << ": " + << converted.string_table(s.type()); + } + // Require that the default_sample_type appeared in sample_type. + EXPECT_THAT(sample_types, testing::Contains(converted.default_sample_type())); + + constexpr int kNumSamples = 6; + // This is slightly redundant with the next line, but we need to loop over + // each of the samples later. + EXPECT_THAT(extracted_sample_type, SizeIs(kNumSamples)); + EXPECT_THAT( + extracted_sample_type, + UnorderedElementsAre( + Pair("objects", "count"), Pair("space", "bytes"), + Pair("resident_space", "bytes"), Pair("stale_space", "bytes"), + Pair("locked_space", "bytes"), Pair("swapped_space", "bytes"))); + + SampleLabels extracted_labels; + { + SCOPED_TRACE("Profile"); + ASSERT_NO_FATAL_FAILURE( + CheckAndExtractSampleLabels(converted, extracted_labels)); + } + + absl::flat_hash_map label_to_units; + std::vector> extracted_samples; + for (const auto& s : converted.sample()) { + extracted_samples.emplace_back(); + for (const auto& l : s.label()) { + if (l.num_unit() != 0) { + const std::string unit = converted.string_table(l.num_unit()); + auto it = label_to_units.insert({converted.string_table(l.key()), unit}) + .first; + // We expect units to be consistent for the same key, across samples. + EXPECT_EQ(it->second, unit); + } + } + ASSERT_EQ(s.value_size(), converted.sample_type_size()); + for (int i = 0; i < s.value_size(); ++i) { + extracted_samples.back().emplace( + converted.string_table(converted.sample_type(i).type()), s.value(i)); + } + } + + EXPECT_THAT(label_to_units, + IsSupersetOf({Pair("bytes", "bytes"), Pair("request", "bytes"), + Pair("alignment", "bytes"), + Pair("access_hint", "access_hint")})); + + EXPECT_THAT( + label_to_units, + Each(Key(Not(AnyOf("sampled_resident_bytes", + "swapped_bytes"))))); + + EXPECT_THAT( + extracted_labels, + UnorderedElementsAre( + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("stale_scan_period", 10), + Pair("access_hint", 254), Pair("access_allocated", "cold"), + Pair("size_returning", 1), Pair("guarded_status", "Unknown"), + Pair("allocation type", "new")), + UnorderedElementsAre(Pair("bytes", 8), Pair("request", 4), + Pair("stale_scan_period", 10), + Pair("access_hint", 1), + Pair("access_allocated", "hot"), + Pair("guarded_status", "Unknown"), + Pair("allocation type", "malloc")), + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 16), + Pair("stale_scan_period", 10), + Pair("access_hint", 0), Pair("access_allocated", "hot"), + Pair("size_returning", 1), Pair("guarded_status", "Unknown"), + Pair("allocation type", "aligned malloc")), + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("stale_scan_period", 10), + Pair("access_hint", 253), Pair("access_allocated", "cold"), + Pair("size_returning", 1), Pair("guarded_status", "RateLimited"), + Pair("allocation type", "new")), + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("stale_scan_period", 10), + Pair("access_hint", 253), Pair("access_allocated", "cold"), + Pair("size_returning", 1), Pair("guarded_status", "Filtered"), + Pair("allocation type", "new")), + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("stale_scan_period", 10), + Pair("access_hint", 253), Pair("access_allocated", "cold"), + Pair("size_returning", 1), Pair("guarded_status", "Guarded"), + Pair("allocation type", "new")))); + + EXPECT_THAT(extracted_samples, + UnorderedElementsAre(IsSupersetOf({ + Pair("resident_space", 64), + Pair("swapped_space", 0), + Pair("space", 3702), + Pair("objects", 6), + }), + IsSupersetOf({ + Pair("resident_space", 40), + Pair("swapped_space", 0), + Pair("space", 4690), + Pair("objects", 10), + }), + IsSupersetOf({ + Pair("resident_space", 0), + Pair("swapped_space", 0), + Pair("space", 2345), + Pair("objects", 8), + }), + IsSupersetOf({ + Pair("resident_space", 32), + Pair("swapped_space", 0), + Pair("space", 1235), + Pair("objects", 2), + }), + IsSupersetOf({ + Pair("resident_space", 32), + Pair("swapped_space", 0), + Pair("space", 1235), + Pair("objects", 2), + }), + IsSupersetOf({ + Pair("resident_space", 32), + Pair("swapped_space", 0), + Pair("space", 1235), + Pair("objects", 2), + }))); + + ASSERT_EQ(converted.sample().size(), kNumSamples); + // The addresses for the samples at stack[0], stack[1] should match. + for (int i = 0; i < kNumSamples; ++i) { + ASSERT_GE(converted.sample(i).location_id().size(), 2) << i; + if (i > 0) { + EXPECT_EQ(converted.sample(i - 1).location_id(0), + converted.sample(i).location_id(0)) + << i; + EXPECT_EQ(converted.sample(i - 1).location_id(1), + converted.sample(i).location_id(1)) + << i; + } + } + + EXPECT_THAT(converted.string_table(converted.drop_frames()), + testing::HasSubstr("TCMallocInternalNew")); + // No keep frames. + EXPECT_EQ(converted.string_table(converted.keep_frames()), ""); + + EXPECT_EQ(converted.duration_nanos(), absl::ToInt64Nanoseconds(kDuration)); + EXPECT_EQ(converted.time_nanos(), 0); + + // Period type [space, bytes] + EXPECT_EQ(converted.string_table(converted.period_type().type()), "space"); + EXPECT_EQ(converted.string_table(converted.period_type().unit()), "bytes"); + + // Period not set + EXPECT_EQ(converted.period(), 0); +} + +// This test is to check that profile of type other than `kHeap` should not have +// residency info available, even if samples' `span_start_address` is not null. +TEST(ProfileBuilderTest, PeakHeapProfile) { + const absl::Time start_time = absl::Now(); + constexpr absl::Duration kDuration = absl::Milliseconds(1500); + auto fake_profile = std::make_unique(); + fake_profile->SetType(ProfileType::kPeakHeap); + fake_profile->SetDuration(kDuration); + fake_profile->SetStartTime(start_time); + + std::vector samples; + + { + auto& sample = samples.emplace_back(); + sample.sum = 1234; + sample.count = 2; + sample.requested_size = 2; + sample.requested_alignment = 4; + sample.requested_size_returning = true; + sample.allocated_size = 16; + + std::vector bytes(sample.allocated_size); + sample.span_start_address = bytes.data(); + + sample.depth = 3; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = absl::bit_cast(uintptr_t{0x45123}); + sample.stack[2] = reinterpret_cast(&ProfileAccessor::MakeProfile); + sample.access_hint = hot_cold_t{254}; + sample.access_allocated = Profile::Sample::Access::Cold; + } + + { + auto& sample = samples.emplace_back(); + sample.sum = 2345; + sample.count = 5; + sample.requested_size = 4; + sample.requested_alignment = 0; + sample.requested_size_returning = false; + sample.allocated_size = 8; + + std::vector bytes(sample.allocated_size); + sample.span_start_address = bytes.data(); + + sample.depth = 2; + sample.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + sample.stack[1] = reinterpret_cast(&RealPath); + sample.access_hint = hot_cold_t{1}; + sample.access_allocated = Profile::Sample::Access::Hot; + } + + fake_profile->SetSamples(std::move(samples)); + Profile profile = ProfileAccessor::MakeProfile(std::move(fake_profile)); + auto converted_or = MakeProfileProto(profile); + ASSERT_TRUE(converted_or.ok()); + const auto& converted = **converted_or; + + SampleLabels extracted; + { + SCOPED_TRACE("Profile"); + ASSERT_NO_FATAL_FAILURE(CheckAndExtractSampleLabels(converted, extracted)); + } + + EXPECT_THAT( + extracted, + UnorderedElementsAre( + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("access_hint", 254), Pair("access_allocated", "cold"), + Pair("size_returning", 1), Pair("guarded_status", "Unknown"), + Pair("allocation type", "new")), + UnorderedElementsAre(Pair("bytes", 8), Pair("request", 4), + Pair("access_hint", 1), + Pair("access_allocated", "hot"), + Pair("guarded_status", "Unknown"), + Pair("allocation type", "new")))); + + ASSERT_GE(converted.sample().size(), 2); + ASSERT_GE(converted.sample(0).location_id().size(), 2); + ASSERT_GE(converted.sample(1).location_id().size(), 2); + EXPECT_EQ(converted.sample(0).location_id(0), + converted.sample(1).location_id(0)); + + EXPECT_EQ(converted.time_nanos(), absl::ToUnixNanos(start_time)); +} + +TEST(ProfileBuilderTest, LifetimeProfile) { + const absl::Time start_time = absl::Now(); + constexpr absl::Duration kDuration = absl::Milliseconds(1500); + auto fake_profile = std::make_unique(); + fake_profile->SetType(ProfileType::kLifetimes); + fake_profile->SetDuration(kDuration); + fake_profile->SetStartTime(start_time); + + std::vector samples; + { + // The allocation sample. + Profile::Sample alloc1{ + .sum = 123, + .count = 2, + // Common information we retain in the lifetime profile. + .requested_size = 2, + .requested_alignment = 4, + .allocated_size = 16, + // Lifetime specific information in each sample. + .profile_id = 33, + .avg_lifetime = absl::Nanoseconds(77), + .stddev_lifetime = absl::Nanoseconds(22), + .min_lifetime = absl::Nanoseconds(55), + .max_lifetime = absl::Nanoseconds(99), + .allocator_deallocator_physical_cpu_matched = true, + .allocator_deallocator_virtual_cpu_matched = true, + .allocator_deallocator_l3_matched = true, + .allocator_deallocator_numa_matched = true, + .allocator_deallocator_thread_matched = false, + }; + // This stack is mostly artificial, but we include a couple of real symbols + // from the binary to confirm that the locations are indexed into the + // mappings. + alloc1.depth = 6; + alloc1.stack[0] = absl::bit_cast(uintptr_t{0x12345}); + alloc1.stack[1] = absl::bit_cast(uintptr_t{0x23451}); + alloc1.stack[2] = absl::bit_cast(uintptr_t{0x34512}); + alloc1.stack[3] = absl::bit_cast(uintptr_t{0x45123}); + alloc1.stack[4] = reinterpret_cast(&ProfileAccessor::MakeProfile); + alloc1.stack[5] = reinterpret_cast(&RealPath); + + samples.push_back(alloc1); + + // The deallocation sample contains the same information with a negative + // count to denote deallocaiton. The stack can be different, or empty if the + // deallocation has not been observed (once b/236755869 is implemented). + Profile::Sample dealloc1 = alloc1; + dealloc1.count = -dealloc1.count; + samples.push_back(dealloc1); + + // Also add a censored sample with a different profile id. + Profile::Sample censored_alloc1 = alloc1; + censored_alloc1.is_censored = true; + // The *_matched fields are unset for censored allocations since we did not + // observe the deallocation. + censored_alloc1.allocator_deallocator_physical_cpu_matched = std::nullopt; + censored_alloc1.allocator_deallocator_virtual_cpu_matched = std::nullopt; + censored_alloc1.allocator_deallocator_l3_matched = std::nullopt; + censored_alloc1.allocator_deallocator_numa_matched = std::nullopt; + censored_alloc1.allocator_deallocator_thread_matched = std::nullopt; + censored_alloc1.profile_id++; + samples.push_back(censored_alloc1); + } + + fake_profile->SetSamples(std::move(samples)); + Profile profile = ProfileAccessor::MakeProfile(std::move(fake_profile)); + auto converted_or = MakeProfileProto(profile); + ASSERT_TRUE(converted_or.ok()); + const perftools::profiles::Profile& converted = **converted_or; + const auto& string_table = converted.string_table(); + + // Checks for lifetime (deallocation) profile specific fields. + ASSERT_EQ(converted.sample_type_size(), 6); + EXPECT_EQ(string_table.at(converted.sample_type(0).type()), + "allocated_objects"); + EXPECT_EQ(string_table.at(converted.sample_type(1).type()), + "allocated_space"); + EXPECT_EQ(string_table.at(converted.sample_type(2).type()), + "deallocated_objects"); + EXPECT_EQ(string_table.at(converted.sample_type(3).type()), + "deallocated_space"); + EXPECT_EQ(string_table.at(converted.sample_type(4).type()), + "censored_allocated_objects"); + EXPECT_EQ(string_table.at(converted.sample_type(5).type()), + "censored_allocated_space"); + + ASSERT_EQ(converted.sample_size(), 3); + // For the alloc sample, the values are in indices 0, 1. + EXPECT_EQ(converted.sample(0).value(0), 2); + EXPECT_EQ(converted.sample(0).value(1), 123); + EXPECT_EQ(converted.sample(0).value(2), 0); + EXPECT_EQ(converted.sample(0).value(3), 0); + EXPECT_EQ(converted.sample(0).value(4), 0); + EXPECT_EQ(converted.sample(0).value(5), 0); + // For the dealloc sample, the values are in indices 2, 3. + EXPECT_EQ(converted.sample(1).value(0), 0); + EXPECT_EQ(converted.sample(1).value(1), 0); + EXPECT_EQ(converted.sample(1).value(2), 2); + EXPECT_EQ(converted.sample(1).value(3), 123); + EXPECT_EQ(converted.sample(1).value(4), 0); + EXPECT_EQ(converted.sample(1).value(5), 0); + // For the censored alloc sample, the values are in indices 4, 5. + EXPECT_EQ(converted.sample(2).value(0), 0); + EXPECT_EQ(converted.sample(2).value(1), 0); + EXPECT_EQ(converted.sample(2).value(2), 0); + EXPECT_EQ(converted.sample(2).value(3), 0); + EXPECT_EQ(converted.sample(2).value(4), 2); + EXPECT_EQ(converted.sample(2).value(5), 123); + + // Check the location and mapping fields and extract sample, label pairs. + SampleLabels extracted; + { + SCOPED_TRACE("LifetimeProfile"); + ASSERT_NO_FATAL_FAILURE(CheckAndExtractSampleLabels(converted, extracted)); + } + + // TODO(b/248332543): Reduce implementation duplication so that properties + // (allocation type) of the original allocation are captured without this + // duplication. + EXPECT_THAT( + extracted, + UnorderedElementsAre( + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("callstack-pair-id", 33), Pair("avg_lifetime", 77), + Pair("stddev_lifetime", 22), Pair("min_lifetime", 55), + Pair("max_lifetime", 99), + Pair("active CPU", "same"), Pair("active vCPU", "same"), + Pair("active L3", "same"), Pair("active NUMA", "same"), + Pair("active thread", "different")), + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("callstack-pair-id", 33), Pair("avg_lifetime", 77), + Pair("stddev_lifetime", 22), Pair("min_lifetime", 55), + Pair("max_lifetime", 99), + Pair("active CPU", "same"), Pair("active vCPU", "same"), + Pair("active L3", "same"), Pair("active NUMA", "same"), + Pair("active thread", "different")), + // Check the contents of the censored sample. + UnorderedElementsAre( + Pair("bytes", 16), Pair("request", 2), Pair("alignment", 4), + Pair("callstack-pair-id", 34), Pair("avg_lifetime", 77), + Pair("stddev_lifetime", 22), Pair("min_lifetime", 55), + Pair("max_lifetime", 99), + Pair("active CPU", "none"), Pair("active vCPU", "none"), + Pair("active L3", "none"), Pair("active NUMA", "none"), + Pair("active thread", "none")))); + + // Checks for common fields. + EXPECT_THAT(converted.string_table(converted.drop_frames()), + testing::HasSubstr("TCMallocInternalNew")); + // No keep frames. + EXPECT_EQ(converted.string_table(converted.keep_frames()), ""); + + EXPECT_EQ(converted.duration_nanos(), absl::ToInt64Nanoseconds(kDuration)); + EXPECT_EQ(converted.time_nanos(), absl::ToUnixNanos(start_time)); + + // Period type [space, bytes] + EXPECT_EQ(converted.string_table(converted.period_type().type()), "space"); + EXPECT_EQ(converted.string_table(converted.period_type().unit()), "bytes"); + + // Period not set + EXPECT_EQ(converted.period(), 0); +} + +TEST(BuildId, CorruptImage_b180635896) { + std::string image_path; + const char* srcdir = thread_safe_getenv("TEST_SRCDIR"); + if (srcdir) { + absl::StrAppend(&image_path, srcdir, "/"); + } + const char* workspace = thread_safe_getenv("TEST_WORKSPACE"); + if (workspace) { + absl::StrAppend(&image_path, workspace, "/"); + } + absl::StrAppend(&image_path, + "tcmalloc/internal/testdata/b180635896.so"); + + int fd = open(image_path.c_str(), O_RDONLY); + ASSERT_TRUE(fd != -1) << "open: " << errno << " " << image_path; + void* p = mmap(nullptr, /*size*/ 4096, PROT_READ, MAP_PRIVATE, fd, /*off*/ 0); + ASSERT_TRUE(p != MAP_FAILED) << "mmap: " << errno; + close(fd); + + const ElfW(Ehdr)* const ehdr = reinterpret_cast(p); + dl_phdr_info info = {}; + info.dlpi_name = image_path.c_str(); + info.dlpi_addr = reinterpret_cast(p); + info.dlpi_phdr = + reinterpret_cast(info.dlpi_addr + ehdr->e_phoff); + info.dlpi_phnum = ehdr->e_phnum; + + EXPECT_EQ(GetBuildId(&info), ""); + munmap(p, 4096); +} + +// There are two PT_NOTE segments, one with .note.gnu.property and the other +// with .note.gnu.build-id. Test that we correctly skip .note.gnu.property. +// +// .note.gnu.property intentionally contains two NT_GNU_PROPERTY_TYPE_0 notes +// to simulate outputs from old linkers (no NT_GNU_PROPERTY_TYPE_0 merging). +// Test that we correctly parse and skip the notes. +TEST(BuildId, GnuProperty) { + std::string image_path; + const char* srcdir = thread_safe_getenv("TEST_SRCDIR"); + if (srcdir) { + absl::StrAppend(&image_path, srcdir, "/"); + } + const char* workspace = thread_safe_getenv("TEST_WORKSPACE"); + if (workspace) { + absl::StrAppend(&image_path, workspace, "/"); + } + absl::StrAppend(&image_path, + "tcmalloc/internal/testdata/gnu-property.so"); + + int fd = open(image_path.c_str(), O_RDONLY); + ASSERT_TRUE(fd != -1) << "open: " << errno << " " << image_path; + void* p = mmap(nullptr, /*size*/ 4096, PROT_READ, MAP_PRIVATE, fd, /*off*/ 0); + ASSERT_TRUE(p != MAP_FAILED) << "mmap: " << errno; + close(fd); + + const ElfW(Ehdr)* const ehdr = reinterpret_cast(p); + dl_phdr_info info = {}; + info.dlpi_name = image_path.c_str(); + info.dlpi_addr = reinterpret_cast(p); + info.dlpi_phdr = + reinterpret_cast(info.dlpi_addr + ehdr->e_phoff); + info.dlpi_phnum = ehdr->e_phnum; + + EXPECT_EQ(GetBuildId(&info), "1f2a67344247b1cb91260e53c03817f9"); + munmap(p, 4096); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h index 25b863934f7d..f1bb01153050 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,11 +20,12 @@ #include #include -#include +#include #include #include #include "absl/numeric/bits.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" @@ -45,6 +47,8 @@ class Bitmap { // Returns the number of set bits [index, ..., index + n - 1]. size_t CountBits(size_t index, size_t n) const; + // Returns the total number of set bits. + size_t CountBits() const; // Returns whether the bitmap is entirely zero or not. bool IsZero() const; @@ -58,7 +62,7 @@ class Bitmap { // If there is at least one free range at or after , // put it in *index, *length and return true; else return false. - bool NextFreeRange(size_t start, size_t *index, size_t *length) const; + bool NextFreeRange(size_t start, size_t* index, size_t* length) const; // Returns index of the first {true, false} bit >= index, or N if none. size_t FindSet(size_t index) const; @@ -102,26 +106,28 @@ class RangeTracker { size_t size() const; // Number of bits marked size_t used() const; - // Number of bits clear - size_t total_free() const; // Longest contiguous range of clear bits. size_t longest_free() const; // Count of live allocations. size_t allocs() const; // REQUIRES: there is a free range of at least n bits - // (i.e. n <= longest_free()) - // finds and marks n free bits, returning index of the first bit. - // Chooses by best fit. + // (i.e. n <= longest_free()). + // + // Finds and marks n free bits, returning index of the first bit. Chooses by + // best fit. size_t FindAndMark(size_t n); + // REQUIRES: the range [index, index + n) is fully unmarked. + void Mark(size_t index, size_t n); + // REQUIRES: the range [index, index + n) is fully marked, and // was the returned value from a call to FindAndMark. // Unmarks it. void Unmark(size_t index, size_t n); // If there is at least one free range at or after , // put it in *index, *length and return true; else return false. - bool NextFreeRange(size_t start, size_t *index, size_t *length) const; + bool NextFreeRange(size_t start, size_t* index, size_t* length) const; void Clear(); @@ -174,11 +180,6 @@ inline size_t RangeTracker::used() const { return nused_; } -template -inline size_t RangeTracker::total_free() const { - return N - used(); -} - template inline size_t RangeTracker::longest_free() const { return longest_free_; @@ -191,7 +192,7 @@ inline size_t RangeTracker::allocs() const { template inline size_t RangeTracker::FindAndMark(size_t n) { - ASSERT(n > 0); + TC_ASSERT_GT(n, 0); // We keep the two longest ranges in the bitmap since we might allocate // from one. @@ -221,7 +222,7 @@ inline size_t RangeTracker::FindAndMark(size_t n) { index += len; } - CHECK_CONDITION(best_index < N); + TC_CHECK_LT(best_index, N); bits_.SetRange(best_index, n); if (best_len == longest_len) { @@ -235,11 +236,36 @@ inline size_t RangeTracker::FindAndMark(size_t n) { return best_index; } +// REQUIRES: the range [index, index + n) is fully unmarked. +// Marks it. +template +inline void RangeTracker::Mark(size_t index, size_t n) { + TC_ASSERT_GE(bits_.FindSet(index), index + n); + bits_.SetRange(index, n); + nused_ += n; + nallocs_++; + + size_t longest_len = 0; + size_t scan_index = 0, scan_len; + + // We just marked a range as used. This might change the longest free range + // recorded in longest_free_. Recompute. + while (bits_.NextFreeRange(scan_index, &scan_index, &scan_len)) { + if (scan_len > longest_len) { + longest_len = scan_len; + } + + scan_index += scan_len; + } + + longest_free_ = longest_len; +} + // REQUIRES: the range [index, index + n) is fully marked. // Unmarks it. template inline void RangeTracker::Unmark(size_t index, size_t n) { - ASSERT(bits_.FindClear(index) >= index + n); + TC_ASSERT(bits_.FindClear(index) >= index + n); bits_.ClearRange(index, n); nused_ -= n; nallocs_--; @@ -256,8 +282,8 @@ inline void RangeTracker::Unmark(size_t index, size_t n) { // If there is at least one free range at or after , // put it in *index, *length and return true; else return false. template -inline bool RangeTracker::NextFreeRange(size_t start, size_t *index, - size_t *length) const { +inline bool RangeTracker::NextFreeRange(size_t start, size_t* index, + size_t* length) const { return bits_.NextFreeRange(start, index, length); } @@ -272,12 +298,12 @@ inline void RangeTracker::Clear() { // Count the set bits [from, to) in the i-th word to Value. template inline size_t Bitmap::CountWordBits(size_t i, size_t from, size_t to) const { - ASSERT(from < kWordSize); - ASSERT(to <= kWordSize); + TC_ASSERT_LT(from, kWordSize); + TC_ASSERT_LE(to, kWordSize); const size_t all_ones = ~static_cast(0); // how many bits are we setting? const size_t n = to - from; - ASSERT(0 < n && n <= kWordSize); + TC_ASSERT(0 < n && n <= kWordSize); const size_t mask = (all_ones >> (kWordSize - n)) << from; ASSUME(i < kWords); @@ -288,12 +314,12 @@ inline size_t Bitmap::CountWordBits(size_t i, size_t from, size_t to) const { template template inline void Bitmap::SetWordBits(size_t i, size_t from, size_t to) { - ASSERT(from < kWordSize); - ASSERT(to <= kWordSize); + TC_ASSERT_LT(from, kWordSize); + TC_ASSERT_LE(to, kWordSize); const size_t all_ones = ~static_cast(0); // how many bits are we setting? const size_t n = to - from; - ASSERT(n > 0 && n <= kWordSize); + TC_ASSERT(n > 0 && n <= kWordSize); const size_t mask = (all_ones >> (kWordSize - n)) << from; ASSUME(i < kWords); if (Value) { @@ -305,7 +331,7 @@ inline void Bitmap::SetWordBits(size_t i, size_t from, size_t to) { template inline bool Bitmap::GetBit(size_t i) const { - ASSERT(i < N); + TC_ASSERT_LT(i, N); size_t word = i / kWordSize; size_t offset = i % kWordSize; ASSUME(word < kWords); @@ -314,7 +340,7 @@ inline bool Bitmap::GetBit(size_t i) const { template inline void Bitmap::SetBit(size_t i) { - ASSERT(i < N); + TC_ASSERT_LT(i, N); size_t word = i / kWordSize; size_t offset = i % kWordSize; ASSUME(word < kWords); @@ -323,13 +349,18 @@ inline void Bitmap::SetBit(size_t i) { template inline void Bitmap::ClearBit(size_t i) { - ASSERT(i < N); + TC_ASSERT_LT(i, N); size_t word = i / kWordSize; size_t offset = i % kWordSize; ASSUME(word < kWords); bits_[word] &= ~(size_t{1} << offset); } +template +inline size_t Bitmap::CountBits() const { + return CountBits(0, N); +} + template inline size_t Bitmap::CountBits(size_t index, size_t n) const { ASSUME(index + n <= N); @@ -386,7 +417,7 @@ inline void Bitmap::ClearLowestBit() { template template inline void Bitmap::SetRangeValue(size_t index, size_t n) { - ASSERT(index + n <= N); + TC_ASSERT_LE(index + n, N); size_t word = index / kWordSize; size_t offset = index % kWordSize; size_t k = offset + n; @@ -403,8 +434,8 @@ inline void Bitmap::SetRangeValue(size_t index, size_t n) { } template -inline bool Bitmap::NextFreeRange(size_t start, size_t *index, - size_t *length) const { +inline bool Bitmap::NextFreeRange(size_t start, size_t* index, + size_t* length) const { if (start >= N) return false; size_t i = FindClear(start); if (i == N) return false; @@ -444,7 +475,7 @@ inline void Bitmap::Clear() { template template inline size_t Bitmap::FindValue(size_t index) const { - ASSERT(index < N); + TC_ASSERT_LT(index, N); size_t offset = index % kWordSize; size_t word = index / kWordSize; ASSUME(word < kWords); @@ -473,7 +504,7 @@ inline size_t Bitmap::FindValue(size_t index) const { template template inline ssize_t Bitmap::FindValueBackwards(size_t index) const { - ASSERT(index < N); + TC_ASSERT_LT(index, N); size_t offset = index % kWordSize; ssize_t word = index / kWordSize; ASSUME(word < kWords); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc index 278fc9ef1ec7..b83e0429c297 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc @@ -13,13 +13,15 @@ // limitations under the License. #include -#include +#include +#include #include #include "absl/base/attributes.h" #include "absl/random/distributions.h" #include "absl/random/random.h" #include "benchmark/benchmark.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/range_tracker.h" GOOGLE_MALLOC_SECTION_BEGIN @@ -27,11 +29,16 @@ namespace tcmalloc { namespace tcmalloc_internal { namespace { +struct RangeInfo { + size_t index; + size_t len; +}; + template static void BM_MarkUnmark(benchmark::State& state) { RangeTracker range; absl::BitGen rng; - std::vector> things; + std::vector things; while (range.used() < N / 2) { size_t len = absl::LogUniform(rng, 0, range.longest_free() - 1) + 1; @@ -43,7 +50,7 @@ static void BM_MarkUnmark(benchmark::State& state) { for (auto s : state) { size_t index = absl::Uniform(rng, 0, things.size()); auto p = things[index]; - range.Unmark(p.first, p.second); + range.Unmark(p.index, p.len); size_t len = absl::LogUniform(rng, 0, range.longest_free() - 1) + 1; things[index] = {range.FindAndMark(len), len}; @@ -58,6 +65,7 @@ BENCHMARK_TEMPLATE(BM_MarkUnmark, 256 * 32); template static void BM_MarkUnmarkEmpty(benchmark::State& state) { RangeTracker range; + absl::BitGen rng; for (auto s : state) { size_t index = range.FindAndMark(K); benchmark::DoNotOptimize(index); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc index 4f9202e22196..9cd9df1e9710 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc @@ -14,15 +14,15 @@ #include "tcmalloc/internal/range_tracker.h" -#include +#include +#include + #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/attributes.h" #include "absl/container/fixed_array.h" -#include "absl/random/distributions.h" #include "absl/random/random.h" namespace tcmalloc { @@ -35,17 +35,17 @@ using testing::Pair; class BitmapTest : public testing::Test { protected: template - std::vector FindSetResults(const Bitmap &map) { + std::vector FindSetResults(const Bitmap& map) { return FindResults(map); } template - std::vector FindClearResults(const Bitmap &map) { + std::vector FindClearResults(const Bitmap& map) { return FindResults(map); } template - std::vector FindResults(const Bitmap &map) { + std::vector FindResults(const Bitmap& map) { std::vector results; ssize_t last = -1; for (size_t i = 0; i < N; ++i) { @@ -63,17 +63,17 @@ class BitmapTest : public testing::Test { } template - std::vector FindSetResultsBackwards(const Bitmap &map) { + std::vector FindSetResultsBackwards(const Bitmap& map) { return FindResultsBackwards(map); } template - std::vector FindClearResultsBackwards(const Bitmap &map) { + std::vector FindClearResultsBackwards(const Bitmap& map) { return FindResultsBackwards(map); } template - std::vector FindResultsBackwards(const Bitmap &map) { + std::vector FindResultsBackwards(const Bitmap& map) { std::vector results; ssize_t last = N; for (ssize_t i = N - 1; i >= 0; --i) { @@ -187,6 +187,7 @@ TEST_F(BitmapTest, FindClear) { TEST_F(BitmapTest, CountBits) { Bitmap<253> map; map.SetRange(0, 253); + EXPECT_EQ(map.CountBits(), 253); EXPECT_EQ(map.CountBits(0, 253), 253); EXPECT_EQ(map.CountBits(8, 245), 245); EXPECT_EQ(map.CountBits(0, 250), 250); @@ -197,6 +198,7 @@ TEST_F(BitmapTest, CountBits) { map.ClearBit(63); map.ClearBit(128); + EXPECT_EQ(map.CountBits(), 248); EXPECT_EQ(map.CountBits(0, 253), 248); EXPECT_EQ(map.CountBits(8, 245), 241); EXPECT_EQ(map.CountBits(0, 250), 245); @@ -206,12 +208,14 @@ TEST_F(BitmapTest, CountBits) { map.ClearBit(251); map.ClearBit(252); + EXPECT_EQ(map.CountBits(), 244); EXPECT_EQ(map.CountBits(0, 253), 244); EXPECT_EQ(map.CountBits(8, 245), 237); EXPECT_EQ(map.CountBits(0, 250), 243); map.ClearBit(0); + EXPECT_EQ(map.CountBits(), 243); EXPECT_EQ(map.CountBits(0, 253), 243); EXPECT_EQ(map.CountBits(8, 245), 237); EXPECT_EQ(map.CountBits(0, 250), 242); @@ -289,6 +293,17 @@ TEST_F(RangeTrackerTest, Trivial) { EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 300))); } +TEST_F(RangeTrackerTest, Mark) { + EXPECT_EQ(range_.longest_free(), kBits); + range_.Mark(100, 100); + EXPECT_EQ(range_.used(), 100); + EXPECT_EQ(range_.longest_free(), kBits - 200); + EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100), Pair(200, kBits - 200))); + range_.Unmark(100, 100); + EXPECT_EQ(range_.used(), 0); + EXPECT_EQ(range_.longest_free(), kBits); +} + } // namespace } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/residency.cc b/contrib/libs/tcmalloc/tcmalloc/internal/residency.cc new file mode 100644 index 000000000000..13f804f5a28f --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/residency.cc @@ -0,0 +1,230 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/residency.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/range_tracker.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +// From fs/proc/task_mmu.c: +#define PM_SWAP 1ULL << 62 +#define PM_PRESENT 1ULL << 63 + +constexpr bool PageSwapped(uint64_t flags) { + constexpr uint64_t kSwap = PM_SWAP; + return (flags & kSwap) == kSwap; +} + +constexpr bool PagePresent(uint64_t flags) { + constexpr uint64_t kPresentPage = PM_PRESENT; + return (flags & kPresentPage) == kPresentPage; +} + +// Small helper to interpret /proc/pid/pagemap. Bit 62 represents if the page is +// swapped, and bit 63 represents if the page is present. +void Update(const uint64_t input, const size_t size, Residency::Info& info) { + // From fs/proc/task_mmu.c: + if (PagePresent(input)) { + info.bytes_resident += size; + } + if (PageSwapped(input)) { + info.bytes_swapped += size; + } +} + +} // namespace + +Residency::Residency() : fd_(signal_safe_open("/proc/self/pagemap", O_RDONLY)) { + TC_CHECK_GE(sizeof(buf_), kSizeOfHugepageInPagemap, + "Buffer size is not large enough to hold the pagemap entries"); + TC_CHECK_LE(kNativePagesInHugePage, kMaxResidencyBits, + "Actual number of native pages in a hugepage is larger than the " + "total capacity of residency bitmaps"); +} + +Residency::Residency(const char* const alternate_filename) + : fd_(signal_safe_open(alternate_filename, O_RDONLY)) { + TC_CHECK_GE(sizeof(buf_), kSizeOfHugepageInPagemap, + "Buffer size is not large enough to hold the pagemap entries"); + TC_CHECK_LE(kNativePagesInHugePage, kMaxResidencyBits, + "Actual number of native pages in a hugepage is larger than the " + "total capacity of residency bitmaps"); +} + +Residency::~Residency() { + if (fd_ >= 0) { + signal_safe_close(fd_); + } +} + +absl::StatusCode Residency::Seek(const uintptr_t vaddr) { + size_t offset = vaddr / kPageSize * kPagemapEntrySize; + // Note: lseek can't be interrupted. + off_t status = ::lseek(fd_, offset, SEEK_SET); + if (status != offset) { + return absl::StatusCode::kUnavailable; + } + return absl::StatusCode::kOk; +} + +std::optional Residency::ReadOne() { + static_assert(sizeof(buf_) >= kPagemapEntrySize); + // /proc/pid/pagemap is a sequence of 64-bit values in machine endianness, one + // per page. The style guide really does not want me to do this "unsafe + // conversion", but the conversion is done in reverse by the kernel and we + // never persist it anywhere, so we actually do want this. + auto status = signal_safe_read(fd_, reinterpret_cast(buf_), + kPagemapEntrySize, nullptr); + if (status != kPagemapEntrySize) { + return std::nullopt; + } + return buf_[0]; +} + +absl::StatusCode Residency::ReadMany(int64_t num_pages, Residency::Info& info) { + while (num_pages > 0) { + const size_t batch_size = std::min(kEntriesInBuf, num_pages); + const size_t to_read = kPagemapEntrySize * batch_size; + + // We read continuously. For the first read, this starts at wherever the + // first ReadOne ended. See above note for the reinterpret_cast. + auto status = + signal_safe_read(fd_, reinterpret_cast(buf_), to_read, nullptr); + if (status != to_read) { + return absl::StatusCode::kUnavailable; + } + for (int i = 0; i < batch_size; ++i) { + Update(buf_[i], kPageSize, info); + } + num_pages -= batch_size; + } + return absl::StatusCode::kOk; +} + +std::optional Residency::Get(const void* const addr, + const size_t size) { + if (fd_ < 0) { + return std::nullopt; + } + + Residency::Info info; + if (size == 0) return info; + + uintptr_t uaddr = reinterpret_cast(addr); + // Round address down to get the start of the page containing the data. + uintptr_t basePage = uaddr & ~(kPageSize - 1); + // Round end address up to get the end of the page containing the data. + // The data is in [basePage, endPage). + uintptr_t endPage = (uaddr + size + kPageSize - 1) & ~(kPageSize - 1); + + int64_t remainingPages = (endPage - basePage) / kPageSize; + + if (auto res = Seek(basePage); res != absl::StatusCode::kOk) { + return std::nullopt; + } + + if (remainingPages == 1) { + auto res = ReadOne(); + if (!res.has_value()) return std::nullopt; + Update(res.value(), size, info); + return info; + } + + // Since the input address might not be page-aligned (it can possibly point to + // an arbitrary object), we read the information about the first page + // separately with ReadOne, then read the complete pages with ReadMany, and + // then read the last page with ReadOne again if needed. + auto res = ReadOne(); + if (!res.has_value()) return std::nullopt; + + // Handle the first page. + size_t firstPageSize = kPageSize - (uaddr - basePage); + Update(res.value(), firstPageSize, info); + remainingPages--; + + // Handle all pages but the last page. + if (auto res = ReadMany(remainingPages - 1, info); + res != absl::StatusCode::kOk) { + return std::nullopt; + } + + // Check final page + size_t lastPageSize = kPageSize - (endPage - uaddr - size); + res = ReadOne(); + if (!res.has_value()) return std::nullopt; + Update(res.value(), lastPageSize, info); + return info; +} + +Residency::SinglePageBitmaps Residency::GetHolesAndSwappedBitmaps( + const void* const addr) { + Bitmap page_holes; + Bitmap page_swapped; + uintptr_t currPage = reinterpret_cast(addr); + if ((currPage & kHugePageMask) != currPage) { + TC_LOG("Address is not hugepage aligned"); + return SinglePageBitmaps{page_holes, page_swapped, + absl::StatusCode::kFailedPrecondition}; + } + auto res = Seek(currPage); + if (res != absl::StatusCode::kOk) { + return SinglePageBitmaps{page_holes, page_swapped, + absl::StatusCode::kUnavailable}; + } + auto status = signal_safe_read(fd_, reinterpret_cast(buf_), + kSizeOfHugepageInPagemap, nullptr); + if (status != kSizeOfHugepageInPagemap) { + TC_LOG( + "Could not read from pagemap file due to unexpected number of bytes " + "read. Expected %d bytes, got %d bytes", + kSizeOfHugepageInPagemap, status); + return SinglePageBitmaps{page_holes, page_swapped, + absl::StatusCode::kUnavailable}; + } + + for (int native_page_idx = 0; native_page_idx < kNativePagesInHugePage; + ++native_page_idx) { + uint64_t page_map = buf_[native_page_idx]; + // Case for page hole + if (!PagePresent(page_map)) { + page_holes.SetBit(native_page_idx); + // Case for zswap + if (PageSwapped(page_map)) { + page_swapped.SetBit(native_page_idx); + } + } + } + return SinglePageBitmaps{page_holes, page_swapped, absl::StatusCode::kOk}; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/residency.h b/contrib/libs/tcmalloc/tcmalloc/internal/residency.h new file mode 100644 index 000000000000..309edb393c69 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/residency.h @@ -0,0 +1,120 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_RESIDENCY_H_ +#define TCMALLOC_INTERNAL_RESIDENCY_H_ + +#include +#include +#include + +#include +#include + +#include "absl/status/status.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/internal/range_tracker.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Residency offers information about memory residency: whether or not specific +// spans of memory are resident in core ("m in core"), swapped, or not present. +// Originally, this was implemented via the mincore syscall, but has since been +// abstracted to provide more information. +class Residency { + public: + // This class keeps an open file handle to procfs. Destroy the object to + // reclaim it. + Residency(); + ~Residency(); + + // Query a span of memory starting from `addr` for `size` bytes. + // + // We use std::optional for return value as std::optional guarantees that no + // dynamic memory allocation would happen. In contrast, absl::StatusOr may + // dynamically allocate memory when needed. Using std::optional allows us to + // use the function in places where memory allocation is prohibited. + // + // This is NOT thread-safe. Do not use multiple copies of this class across + // threads. + struct Info { + size_t bytes_resident = 0; + size_t bytes_swapped = 0; + }; + std::optional Get(const void* addr, size_t size); + + static constexpr int kMaxResidencyBits = 512; + + // Struct is ordered with bitmaps first to optimize cacheline usage. + struct SinglePageBitmaps { + Bitmap holes; + Bitmap swapped; + absl::StatusCode status; + }; + + // Using a hugepage-aligned address, parse through /proc/self/pagemap + // to output two bitmaps - one for pages that are holes and one for pages that + // are swapped. Hugepage-sized regions are assumed to be 2MiB in size. A + // SinglePageBitmaps struct is returned with the status , the page_holes + // bitmap, and the page_swapped bitmap. + SinglePageBitmaps GetHolesAndSwappedBitmaps(const void* addr); + + private: + // This helper seeks the internal file to the correct location for the given + // virtual address. + absl::StatusCode Seek(uintptr_t vaddr); + // This helper reads information for a single page. This is useful for the + // boundaries. It continues the read from the last Seek() or last Read + // operation. + std::optional ReadOne(); + // This helper reads information for `num_pages` worth of _full_ pages and + // puts the results into `info`. It continues the read from the last Seek() or + // last Read operation. + absl::StatusCode ReadMany(int64_t num_pages, Info& info); + + // For testing. + friend class ResidencySpouse; + explicit Residency(const char* alternate_filename); + + // Size of the buffer used to gather results. + static constexpr int kBufferLength = 4096; + static constexpr int kPagemapEntrySize = 8; + static constexpr int kEntriesInBuf = kBufferLength / kPagemapEntrySize; + + const size_t kPageSize = GetPageSize(); + + static constexpr uintptr_t kHugePageMask = ~(kHugePageSize - 1); + const size_t kNativePagesInHugePage = kHugePageSize / kPageSize; + + uint64_t buf_[kEntriesInBuf]; + const int fd_; + const size_t kSizeOfHugepageInPagemap = + kPagemapEntrySize * kNativePagesInHugePage; +}; + +inline std::ostream& operator<<(std::ostream& stream, + const Residency::Info& rhs) { + return stream << "{.resident = " << rhs.bytes_resident + << ", .swapped = " << rhs.bytes_swapped << "}"; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_RESIDENCY_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/residency_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/residency_test.cc new file mode 100644 index 000000000000..c0a8cb1fc0a5 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/residency_test.cc @@ -0,0 +1,373 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/residency.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/page_size.h" +#include "tcmalloc/internal/range_tracker.h" +#include "tcmalloc/internal/util.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class ResidencySpouse { + public: + explicit ResidencySpouse() = default; + explicit ResidencySpouse(const char* const filename) : r_(filename) {} + explicit ResidencySpouse(absl::string_view filename) : r_(filename.data()) {} + + template + decltype(auto) Get(Args&&... args) { + return r_.Get(std::forward(args)...); + } + + decltype(auto) GetHolesAndSwappedBitmaps(const void* const addr) { + return r_.GetHolesAndSwappedBitmaps(addr); + } + + private: + Residency r_; +}; + +namespace { + +using ::testing::FieldsAre; +using ::testing::Optional; + +constexpr uint64_t kPageSwapped = (1ULL << 62); +constexpr uint64_t kPagePresent = (1ULL << 63); + +TEST(ResidenceTest, ThisProcess) { + const size_t kPageSize = GetPageSize(); + const int kNumPages = 16; + + // Try both private and shared mappings to make sure we have the bit order of + // /proc/pid/pageflags correct. + for (const int flags : + {MAP_ANONYMOUS | MAP_SHARED, MAP_ANONYMOUS | MAP_PRIVATE}) { + const size_t kHead = kPageSize * 10; + const size_t kTail = kPageSize * 10; + + Residency r; + // Overallocate kNumPages of memory, so we can munmap the page before and + // after it. + void* p = mmap(nullptr, kNumPages * kPageSize + kHead + kTail, + PROT_READ | PROT_WRITE, flags, -1, 0); + ASSERT_NE(p, MAP_FAILED) << errno; + + EXPECT_THAT(r.Get(p, (kNumPages + 2) * kPageSize), + Optional(FieldsAre(0, 0))); + ASSERT_EQ(munmap(p, kPageSize), 0); + void* q = reinterpret_cast(p) + kHead; + void* last = reinterpret_cast(p) + kNumPages * kPageSize + kHead; + ASSERT_EQ(munmap(last, kPageSize), 0); + + EXPECT_THAT(r.Get(p, kHead), Optional(FieldsAre(0, 0))); + EXPECT_THAT(r.Get(last, kTail), Optional(FieldsAre(0, 0))); + + memset(q, 0, kNumPages * kPageSize); + (void)mlock(q, kNumPages * kPageSize); + ::benchmark::DoNotOptimize(q); + + EXPECT_THAT(r.Get(p, kHead), Optional(FieldsAre(0, 0))); + EXPECT_THAT(r.Get(last, kTail), Optional(FieldsAre(0, 0))); + + EXPECT_THAT(r.Get(q, kPageSize), Optional(FieldsAre(kPageSize, 0))); + + EXPECT_THAT(r.Get(q, (kNumPages + 2) * kPageSize), + Optional(FieldsAre(kPageSize * kNumPages, 0))); + + EXPECT_THAT(r.Get(reinterpret_cast(q) + 7, kPageSize - 7), + Optional(FieldsAre(kPageSize - 7, 0))); + + EXPECT_THAT(r.Get(reinterpret_cast(q) + 7, kPageSize), + Optional(FieldsAre(kPageSize, 0))); + + EXPECT_THAT(r.Get(reinterpret_cast(q) + 7, 3 * kPageSize), + Optional(FieldsAre(kPageSize * 3, 0))); + + EXPECT_THAT(r.Get(reinterpret_cast(q) + 7, kNumPages * kPageSize), + Optional(FieldsAre(kPageSize * kNumPages - 7, 0))); + + EXPECT_THAT( + r.Get(reinterpret_cast(q) + 7, kNumPages * kPageSize - 7), + Optional(FieldsAre(kPageSize * kNumPages - 7, 0))); + + EXPECT_THAT( + r.Get(reinterpret_cast(q) + 7, (kNumPages + 1) * kPageSize), + Optional(FieldsAre(kPageSize * kNumPages - 7, 0))); + + EXPECT_THAT( + r.Get(reinterpret_cast(q) + 7, (kNumPages + 1) * kPageSize - 7), + Optional(FieldsAre(kPageSize * kNumPages - 7, 0))); + + ASSERT_EQ(munmap(q, kNumPages * kPageSize), 0); + } +} + +TEST(ResidenceTest, CannotOpen) { + ResidencySpouse r("/tmp/a667ba48-18ba-4523-a8a7-b49ece3a6c2b"); + EXPECT_FALSE(r.Get(nullptr, 1).has_value()); +} + +TEST(ResidenceTest, CannotRead) { + ResidencySpouse r("/dev/null"); + EXPECT_FALSE(r.Get(nullptr, 1).has_value()); +} + +TEST(ResidenceTest, CannotSeek) { + ResidencySpouse r("/dev/null"); + EXPECT_FALSE(r.Get(&r, 1).has_value()); +} + +// Method that can write a region with a single hugepage +// a region with a single missing page, a region with every other page missing, +// a region with all missing pages, or a region with a hugepage in the middle. +void GenerateHolesInSinglePage(absl::string_view filename, int case_num, + int num_pages) { + int write_fd = signal_safe_open(filename.data(), O_CREAT | O_WRONLY, S_IRUSR); + CHECK_NE(write_fd, -1) << errno; + std::vector buf(num_pages, 0); + for (int i = 0; i < num_pages; i++) { + switch (case_num) { + case 0: + // All pages are present + buf[i] = kPagePresent; + break; + case 1: + // All Pages are swapped + buf[i] = kPageSwapped; + break; + case 2: + // All pages are holes + buf[i] = 0; + break; + case 3: + // Every other page is a hole, rest are present + if (i % 2 == 0) { + buf[i] = 0; + } else if (i % 2 == 1) { + buf[i] = kPagePresent; + } + break; + case 4: + // Every other page is swapped, rest are present + if (i % 2 == 0) { + buf[i] = kPageSwapped; + } else if (i % 2 == 1) { + buf[i] = kPagePresent; + } + break; + case 5: + // Every other page is swapped, rest are holes + if (i % 2 == 0) { + buf[i] = kPageSwapped; + } else if (i % 2 == 1) { + buf[i] = 0; + } + break; + } + } + int size_of_write = num_pages * sizeof(uint64_t); + CHECK_EQ(write(write_fd, buf.data(), size_of_write), size_of_write); + CHECK_EQ(close(write_fd), 0) << errno; +} + +Residency::SinglePageBitmaps GenerateExpectedSinglePageBitmaps(int case_num) { + Bitmap<512> expected_holes; + Bitmap<512> expected_swapped; + switch (case_num) { + case 0: + // All Pages are present. Both bitmaps are 0 + break; + case 1: + // All Pages are swapped. Both bitmaps are all 1 + expected_holes.SetRange(0, 512); + expected_swapped.SetRange(0, 512); + break; + case 2: + // All pages are holes. Holes bitmap is all 1 + expected_holes.SetRange(0, 512); + break; + case 3: + // Every other page is a hole, rest are present. + // Both bitmaps are 0 and 1 alternating + for (int idx = 0; idx < 512; idx += 2) { + if (idx % 2 == 0) { + expected_holes.SetBit(idx); + } + } + break; + case 4: + // Every other page is swapped, rest are present, + // Bitmaps are 0 and 1 alternating + for (int idx = 0; idx < 512; idx += 2) { + if (idx % 2 == 0) { + expected_holes.SetBit(idx); + expected_swapped.SetBit(idx); + } + } + break; + case 5: + // Every other page is swapped, rest are holes, + // swapped bitmaps are 0 and 1 alternating, holes bitmaps are all 1 + for (int idx = 0; idx < 512; idx++) { + expected_holes.SetRange(0, 512); + if (idx % 2 == 0) { + expected_swapped.SetBit(idx); + } + } + break; + } + return Residency::SinglePageBitmaps{expected_holes, expected_swapped, + absl::StatusCode::kOk}; +} + +// Method that compares two bitmaps to see if they are equivalent +bool BitmapsAreEqual(const Bitmap<512>& bitmap1, const Bitmap<512>& bitmap2) { + for (int i = 0; i < 512; ++i) { + if (bitmap1.GetBit(i) != bitmap2.GetBit(i)) { + return false; + } + } + return true; +} + +TEST(PageMapTest, GetHolesAndSwappedBitmaps) { + constexpr int kNumCases = 6; + std::array expected; + for (int i = 0; i < kNumCases; ++i) { + expected[i] = GenerateExpectedSinglePageBitmaps(i); + } + + std::optional g; + for (int i = 0; i < kNumCases; ++i) { + std::string file_path = + absl::StrCat(testing::TempDir(), "/holes_in_single_page_", i); + GenerateHolesInSinglePage(file_path, /*case_num=*/i, + /*num_pages=*/512); + + g.emplace(); + ResidencySpouse s(file_path); + Residency::SinglePageBitmaps res = + s.GetHolesAndSwappedBitmaps(reinterpret_cast(0)); + g.reset(); + EXPECT_THAT(res.status, expected[i].status); + EXPECT_TRUE(BitmapsAreEqual(res.holes, expected[i].holes)); + EXPECT_TRUE(BitmapsAreEqual(res.swapped, expected[i].swapped)); + } +} + +TEST(PageMapTest, CountHolesWithAddressBeyondFirstPage) { + std::optional g; + std::string file_path = + absl::StrCat(testing::TempDir(), "/holes_in_single_page"); + GenerateHolesInSinglePage(file_path, /*case_num=*/5, + /*num_pages=*/2048); + Residency::SinglePageBitmaps expected = GenerateExpectedSinglePageBitmaps(5); + g.emplace(); + ResidencySpouse s(file_path); + Residency::SinglePageBitmaps res = + s.GetHolesAndSwappedBitmaps(reinterpret_cast(2 << 21)); + g.reset(); + EXPECT_THAT(res.status, expected.status); + EXPECT_TRUE(BitmapsAreEqual(res.holes, expected.holes)); + EXPECT_TRUE(BitmapsAreEqual(res.swapped, expected.swapped)); +} + +TEST(PageMapTest, VerifyAddressAlignmentCheckPasses) { + std::optional g; + std::string file_path = absl::StrCat(testing::TempDir(), "/alignment_check"); + GenerateHolesInSinglePage(file_path, /*case_num=*/0, + /*num_pages=*/512); + g.emplace(); + ResidencySpouse s(file_path); + Residency::SinglePageBitmaps non_align_addr_res = + s.GetHolesAndSwappedBitmaps(reinterpret_cast(0x00001)); + g.reset(); + EXPECT_EQ(non_align_addr_res.status, absl::StatusCode::kFailedPrecondition); +} + +TEST(PageMapTest, VerifyAddressAlignmentBeyondFirstPageFails) { + std::optional g; + g.emplace(); + ResidencySpouse s; + Residency::SinglePageBitmaps res = + s.GetHolesAndSwappedBitmaps(reinterpret_cast((2 << 21) + 1)); + g.reset(); + EXPECT_EQ(res.status, absl::StatusCode::kFailedPrecondition); +} + +TEST(PageMapIntegrationTest, WorksOnActualData) { + std::optional g; + void* addr = mmap(nullptr, 4 << 20, PROT_WRITE, + MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE, -1, 0); + ASSERT_NE(addr, MAP_FAILED) << errno; + auto position = reinterpret_cast(addr); + if ((position & (kHugePageSize - 1)) != 0) { + position |= kHugePageSize - 1; + position++; + addr = reinterpret_cast(position); + } + g.emplace(); + Residency r; + auto res = r.GetHolesAndSwappedBitmaps(addr); + g.reset(); + ASSERT_EQ(res.status, absl::StatusCode::kOk); + EXPECT_TRUE(res.holes.IsZero()); + EXPECT_TRUE(res.swapped.IsZero()); + ASSERT_EQ(munmap(reinterpret_cast(addr) + 1 * 4096, 4096), 0) + << errno; + ASSERT_EQ(munmap(reinterpret_cast(addr) + 17 * 4096, 4096), 0) + << errno; + + g.emplace(); + res = r.GetHolesAndSwappedBitmaps(addr); + g.reset(); + ASSERT_EQ(res.status, absl::StatusCode::kOk); + EXPECT_FALSE(res.holes.IsZero()); + ASSERT_TRUE(res.holes.GetBit(1)); + ASSERT_TRUE(res.holes.GetBit(17)); + res.holes.ClearLowestBit(); + res.holes.ClearLowestBit(); + EXPECT_TRUE(res.holes.IsZero()); + EXPECT_TRUE(res.swapped.IsZero()); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation.h b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation.h new file mode 100644 index 000000000000..ef6f7d551bc1 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation.h @@ -0,0 +1,64 @@ +#pragma clang system_header +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_H_ +#define TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_H_ + +#include + +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/sampled_allocation_recorder.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Stores information about the sampled allocation. +struct SampledAllocation : public tcmalloc_internal::Sample { + // We use this constructor to initialize `graveyard_`, which is used to + // maintain the freelist of SampledAllocations. When we revive objects from + // the freelist, we use `PrepareForSampling()` to update the state of the + // object. + constexpr SampledAllocation() = default; + + // When no object is available on the freelist, we allocate for a new + // SampledAllocation object and invoke this constructor with + // `PrepareForSampling()`. + explicit SampledAllocation(StackTrace&& stack_trace) { + PrepareForSampling(std::move(stack_trace)); + } + + SampledAllocation(const SampledAllocation&) = delete; + SampledAllocation& operator=(const SampledAllocation&) = delete; + + SampledAllocation(SampledAllocation&&) = delete; + SampledAllocation& operator=(SampledAllocation&&) = delete; + + // Prepares the state of the object. It is invoked when either a new sampled + // allocation is constructed or when an object is revived from the freelist. + void PrepareForSampling(StackTrace&& stack_trace) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock) { + sampled_stack = std::move(stack_trace); + } + + // The stack trace of the sampled allocation. + StackTrace sampled_stack = {}; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder.h b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder.h new file mode 100644 index 000000000000..cf4a1164d658 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder.h @@ -0,0 +1,262 @@ +#pragma clang system_header +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: sampled_allocation_recorder.h +// ----------------------------------------------------------------------------- +// +// This header file defines a lock-free linked list for recording TCMalloc +// sampled allocations collected from a random/stochastic process. + +#ifndef TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_RECORDER_H_ +#define TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_RECORDER_H_ + +#include + +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/functional/function_ref.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Sample that has members required for linking samples in the linked list of +// samples maintained by the SampleRecorder. Type T defines the sampled data. +template +struct Sample { + // Guards the ability to restore the sample to a pristine state. This + // prevents races with sampling and resurrecting an object. + absl::base_internal::SpinLock lock{absl::kConstInit, + absl::base_internal::SCHEDULE_KERNEL_ONLY}; + T* next = nullptr; + T* dead ABSL_GUARDED_BY(lock) = nullptr; +}; + +// Holds samples and their associated stack traces. +// +// Thread safe. +template +class SampleRecorder { + public: + using Allocator = AllocatorT; + + constexpr explicit SampleRecorder( + Allocator& allocator ABSL_ATTRIBUTE_LIFETIME_BOUND); + ~SampleRecorder(); + + SampleRecorder(const SampleRecorder&) = delete; + SampleRecorder& operator=(const SampleRecorder&) = delete; + + SampleRecorder(SampleRecorder&&) = delete; + SampleRecorder& operator=(SampleRecorder&&) = delete; + + // Registers for sampling. Returns an opaque registration info. + template + T* Register(Targs&&... args); + + // Unregisters the sample. + void Unregister(T* sample); + + // The dispose callback will be called on all samples the moment they are + // being unregistered. Only affects samples that are unregistered after the + // callback has been set. + // Returns the previous callback. + using DisposeCallback = void (*)(const T&); + DisposeCallback SetDisposeCallback(DisposeCallback f); + + // Unregisters any live samples starting from `all_`. Note that if there are + // any samples added in front of `all_` in other threads after this function + // reads `all_`, they won't be cleaned up. External synchronization is + // required if the intended outcome is to have no live sample after this call. + // Extra care must be taken when `Unregister()` is invoked concurrently with + // this function to avoid a dead sample (updated by this function) being + // passed to `Unregister()` which assumes the sample is live. + void UnregisterAll(); + + // Iterates over all the registered samples. + void Iterate(const absl::FunctionRef& f); + + void AcquireInternalLocks(); + void ReleaseInternalLocks(); + + private: + void PushNew(T* sample); + void PushDead(T* sample); + template + T* PopDead(Targs&&... args); + + // Intrusive lock free linked lists for tracking samples. + // + // `all_` records all samples (they are never removed from this list) and is + // terminated with a `nullptr`. + // + // `graveyard_.dead` is a circular linked list. When it is empty, + // `graveyard_.dead == &graveyard`. The list is circular so that + // every item on it (even the last) has a non-null dead pointer. This allows + // `Iterate` to determine if a given sample is live or dead using only + // information on the sample itself. + // + // For example, nodes [A, B, C, D, E] with [A, C, E] alive and [B, D] dead + // looks like this (G is the Graveyard): + // + // +---+ +---+ +---+ +---+ +---+ + // all -->| A |--->| B |--->| C |--->| D |--->| E | + // | | | | | | | | | | + // +---+ | | +->| |-+ | | +->| |-+ | | + // | G | +---+ | +---+ | +---+ | +---+ | +---+ + // | | | | | | + // | | --------+ +--------+ | + // +---+ | + // ^ | + // +--------------------------------------+ + // + std::atomic all_; + T graveyard_; + + std::atomic dispose_; + Allocator* const allocator_; +}; + +template +typename SampleRecorder::DisposeCallback +SampleRecorder::SetDisposeCallback(DisposeCallback f) { + return dispose_.exchange(f, std::memory_order_relaxed); +} + +template +constexpr SampleRecorder::SampleRecorder(Allocator& allocator) + : all_(nullptr), dispose_(nullptr), allocator_(&allocator) { + graveyard_.dead = &graveyard_; +} + +template +SampleRecorder::~SampleRecorder() { + T* s = all_.load(std::memory_order_acquire); + while (s != nullptr) { + T* next = s->next; + allocator_->Delete(s); + s = next; + } +} + +template +void SampleRecorder::PushNew(T* sample) { + sample->next = all_.load(std::memory_order_relaxed); + while (!all_.compare_exchange_weak(sample->next, sample, + std::memory_order_release, + std::memory_order_relaxed)) { + } +} + +template +void SampleRecorder::PushDead(T* sample) { + if (auto* dispose = dispose_.load(std::memory_order_relaxed)) { + dispose(*sample); + } + sample->sampled_stack.user_data.Reset(); + + AllocationGuardSpinLockHolder graveyard_lock(&graveyard_.lock); + AllocationGuardSpinLockHolder sample_lock(&sample->lock); + sample->dead = graveyard_.dead; + graveyard_.dead = sample; +} + +template +template +T* SampleRecorder::PopDead(Targs&&... args) { + AllocationGuardSpinLockHolder graveyard_lock(&graveyard_.lock); + + // The list is circular, so eventually it collapses down to + // graveyard_.dead == &graveyard_ + // when it is empty. + T* sample = graveyard_.dead; + if (sample == &graveyard_) return nullptr; + + AllocationGuardSpinLockHolder sample_lock(&sample->lock); + graveyard_.dead = sample->dead; + sample->dead = nullptr; + sample->PrepareForSampling(std::forward(args)...); + return sample; +} + +template +template +T* SampleRecorder::Register(Targs&&... args) { + T* sample = PopDead(std::forward(args)...); + if (sample == nullptr) { + // Resurrection failed. Hire a new warlock. + sample = allocator_->New(std::forward(args)...); + PushNew(sample); + } + + return sample; +} + +template +void SampleRecorder::Unregister(T* sample) { + PushDead(sample); +} + +template +void SampleRecorder::UnregisterAll() { + AllocationGuardSpinLockHolder graveyard_lock(&graveyard_.lock); + T* sample = all_.load(std::memory_order_acquire); + auto* dispose = dispose_.load(std::memory_order_relaxed); + while (sample != nullptr) { + { + AllocationGuardSpinLockHolder sample_lock(&sample->lock); + if (sample->dead == nullptr) { + if (dispose) dispose(*sample); + sample->dead = graveyard_.dead; + graveyard_.dead = sample; + } + } + sample = sample->next; + } +} + +template +void SampleRecorder::Iterate( + const absl::FunctionRef& f) { + T* s = all_.load(std::memory_order_acquire); + while (s != nullptr) { + AllocationGuardSpinLockHolder l(&s->lock); + if (s->dead == nullptr) { + f(*s); + } + s = s->next; + } +} + +template +void SampleRecorder::AcquireInternalLocks() { + graveyard_.lock.Lock(); +} + +template +void SampleRecorder::ReleaseInternalLocks() { + graveyard_.lock.Unlock(); +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_SAMPLED_ALLOCATION_RECORDER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder_test.cc new file mode 100644 index 000000000000..6e42bc972415 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_recorder_test.cc @@ -0,0 +1,278 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/sampled_allocation_recorder.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/thread_annotations.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/explicitly_constructed.h" +#include "tcmalloc/testing/thread_manager.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAre; + +struct Info : public Sample { + public: + Info() { PrepareForSampling(); } + void PrepareForSampling() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock) { + initialized = true; + } + std::atomic size; + absl::Time create_time; + bool initialized; +}; + +class TestAllocator { + public: + static Info* New() { + alloc_count_.fetch_add(1, std::memory_order_relaxed); + return new Info; + } + static void Delete(Info* info) { + alloc_count_.fetch_sub(1, std::memory_order_relaxed); + delete info; + } + static uint64_t alloc_count() { + return alloc_count_.load(std::memory_order_relaxed); + } + inline static std::atomic alloc_count_{0}; +}; + +class SampleRecorderTest : public ::testing::Test { + public: + SampleRecorderTest() : sample_recorder_(allocator_) {} + + std::vector GetSizes() { + std::vector res; + // Reserve to avoid reentrant allocations while iterating. + res.reserve(5); + sample_recorder_.Iterate([&](const Info& info) { + res.push_back(info.size.load(std::memory_order_acquire)); + }); + return res; + } + + Info* Register(size_t size) { + auto* info = sample_recorder_.Register(); + assert(info != nullptr); + info->size.store(size); + return info; + } + + TestAllocator allocator_; + SampleRecorder sample_recorder_; +}; + +// Check that the state modified by PrepareForSampling() is properly set. +TEST_F(SampleRecorderTest, PrepareForSampling) { + Info* info1 = Register(1); + // PrepareForSampling() is invoked in the constructor. + EXPECT_TRUE(info1->initialized); + info1->initialized = false; + sample_recorder_.Unregister(info1); + + Info* info2 = Register(2); + // We are reusing the sample, PrepareForSampling() is invoked in PopDead(); + EXPECT_TRUE(info2->initialized); +} + +TEST_F(SampleRecorderTest, Registration) { + auto* info1 = Register(1); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(1)); + + auto* info2 = Register(2); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(1, 2)); + info1->size.store(3); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(3, 2)); + + sample_recorder_.Unregister(info1); + sample_recorder_.Unregister(info2); +} + +TEST_F(SampleRecorderTest, Unregistration) { + std::vector infos; + for (size_t i = 0; i < 3; ++i) { + infos.push_back(Register(i)); + } + EXPECT_THAT(GetSizes(), UnorderedElementsAre(0, 1, 2)); + + sample_recorder_.Unregister(infos[1]); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(0, 2)); + + infos.push_back(Register(3)); + infos.push_back(Register(4)); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(0, 2, 3, 4)); + sample_recorder_.Unregister(infos[3]); + EXPECT_THAT(GetSizes(), UnorderedElementsAre(0, 2, 4)); + + sample_recorder_.Unregister(infos[0]); + sample_recorder_.Unregister(infos[2]); + sample_recorder_.Unregister(infos[4]); + EXPECT_THAT(GetSizes(), IsEmpty()); + + for (size_t i = 0; i < 10; ++i) { + Register(i); + } + sample_recorder_.UnregisterAll(); + // In a single thread, we expect all samples to be cleaned up. + EXPECT_THAT(GetSizes(), IsEmpty()); + // UnregisterAll() marks all the live samples as dead. If we register + // another set of samples, we expect the dead samples are reused and + // no actual allocation is needed for the new set of samples. + const uint64_t alloc_count1 = allocator_.alloc_count(); + for (size_t i = 0; i < 10; ++i) { + Register(i); + } + const uint64_t alloc_count2 = allocator_.alloc_count(); + EXPECT_EQ(alloc_count1, alloc_count2); +} + +TEST_F(SampleRecorderTest, MultiThreaded) { + absl::Notification stop; + ThreadManager threads; + const int kThreads = 10; + threads.Start(kThreads, [&](int) { + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector infoz; + while (!stop.HasBeenNotified()) { + if (infoz.empty()) { + infoz.push_back(sample_recorder_.Register()); + } + switch (std::uniform_int_distribution<>(0, 2)(gen)) { + case 0: { + infoz.push_back(sample_recorder_.Register()); + break; + } + case 1: { + size_t p = std::uniform_int_distribution<>(0, infoz.size() - 1)(gen); + Info* info = infoz[p]; + infoz[p] = infoz.back(); + infoz.pop_back(); + sample_recorder_.Unregister(info); + break; + } + case 2: { + absl::Duration oldest = absl::ZeroDuration(); + sample_recorder_.Iterate([&](const Info& info) { + oldest = std::max(oldest, absl::Now() - info.create_time); + ASSERT_TRUE(info.initialized); + }); + ASSERT_GE(oldest, absl::ZeroDuration()); + break; + } + } + } + }); + + // Another `SampleRecorder` instance to test `UnregisterAll()`, which does not + // work well with the setup above since `infoz` might find itself storing dead + // objects as `UnregisterAll()` is running concurrently. And `Unregister()` + // assumes the object it is going to mark dead is still alive. + SampleRecorder sample_recorder{allocator_}; + threads.Start(kThreads, [&](int) { sample_recorder.Register(); }); + threads.Start(kThreads, [&](int) { sample_recorder.UnregisterAll(); }); + threads.Start(kThreads, [&](int) { + sample_recorder.Iterate( + [&](const Info& info) { ASSERT_TRUE(info.initialized); }); + }); + + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); + threads.Stop(); +} + +TEST_F(SampleRecorderTest, Callback) { + auto* info1 = Register(1); + auto* info2 = Register(2); + + static const Info* expected; + + auto callback = [](const Info& info) { + // We can't use `info` outside of this callback because the object will be + // disposed as soon as we return from here. + EXPECT_EQ(&info, expected); + }; + + // Set the callback. + EXPECT_EQ(sample_recorder_.SetDisposeCallback(callback), nullptr); + expected = info1; + sample_recorder_.Unregister(info1); + + // Unset the callback. + EXPECT_EQ(callback, sample_recorder_.SetDisposeCallback(nullptr)); + expected = nullptr; // no more calls. + sample_recorder_.Unregister(info2); +} + +// Similar to Sample above but requires parameter(s) at initialization. +struct InfoWithParam : public Sample { + public: + // Default constructor to initialize |graveyard_|. + InfoWithParam() = default; + explicit InfoWithParam(size_t size) { PrepareForSampling(size); } + void PrepareForSampling(size_t size) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock) { + info_size = size; + initialized = true; + } + size_t info_size; + bool initialized; +}; + +class InfoAllocator { + public: + static InfoWithParam* New(size_t size) { return new InfoWithParam(size); } + static void Delete(InfoWithParam* infoWithParam) { delete infoWithParam; } +}; + +TEST(SampleRecorderWithParamTest, RegisterWithParam) { + InfoAllocator allocator; + SampleRecorder sample_recorder{allocator}; + // Register() goes though New(). + InfoWithParam* info = sample_recorder.Register(1); + EXPECT_THAT(info->info_size, 1); + EXPECT_TRUE(info->initialized); + // Set these values to something else. + info->info_size = 0; + info->initialized = false; + sample_recorder.Unregister(info); + // |info| is not deleted, just marked as dead. Here, Register() would invoke + // PopDead(), revive the same object, with its fields populated by PopDead(). + sample_recorder.Register(2); + EXPECT_THAT(info->info_size, 2); + EXPECT_TRUE(info->initialized); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_test.cc new file mode 100644 index 000000000000..e398d6d2320b --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sampled_allocation_test.cc @@ -0,0 +1,70 @@ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/sampled_allocation.h" + +#include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "absl/debugging/stacktrace.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +StackTrace PrepareStackTrace() { + StackTrace st; + st.depth = absl::GetStackTrace(st.stack, kMaxStackDepth, /* skip_count= */ 0); + st.requested_size = 8; + st.requested_alignment = 4; + st.allocated_size = 8; + st.access_hint = 1; + st.weight = 4; + return st; +} + +TEST(SampledAllocationTest, PrepareForSampling) { + // PrepareForSampling() invoked in the constructor. + SampledAllocation sampled_allocation(PrepareStackTrace()); + absl::base_internal::SpinLockHolder sample_lock(&sampled_allocation.lock); + + // Now verify some fields. + EXPECT_GT(sampled_allocation.sampled_stack.depth, 0); + EXPECT_EQ(sampled_allocation.sampled_stack.requested_size, 8); + EXPECT_EQ(sampled_allocation.sampled_stack.requested_alignment, 4); + EXPECT_EQ(sampled_allocation.sampled_stack.allocated_size, 8); + EXPECT_EQ(sampled_allocation.sampled_stack.access_hint, 1); + EXPECT_EQ(sampled_allocation.sampled_stack.weight, 4); + + // Set them to different values. + sampled_allocation.sampled_stack.depth = 0; + sampled_allocation.sampled_stack.requested_size = 0; + sampled_allocation.sampled_stack.requested_alignment = 0; + sampled_allocation.sampled_stack.allocated_size = 0; + sampled_allocation.sampled_stack.access_hint = 0; + sampled_allocation.sampled_stack.weight = 0; + + // Call PrepareForSampling() again and check the fields. + sampled_allocation.PrepareForSampling(PrepareStackTrace()); + EXPECT_GT(sampled_allocation.sampled_stack.depth, 0); + EXPECT_EQ(sampled_allocation.sampled_stack.requested_size, 8); + EXPECT_EQ(sampled_allocation.sampled_stack.requested_alignment, 4); + EXPECT_EQ(sampled_allocation.sampled_stack.allocated_size, 8); + EXPECT_EQ(sampled_allocation.sampled_stack.access_hint, 1); + EXPECT_EQ(sampled_allocation.sampled_stack.weight, 4); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter.h b/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter.h new file mode 100644 index 000000000000..b05b587d9bd2 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter.h @@ -0,0 +1,161 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_STACKTRACE_FILTER_H_ +#define TCMALLOC_INTERNAL_STACKTRACE_FILTER_H_ + +#include +#include +#include +#include + +#include "absl/hash/hash.h" +#include "absl/types/span.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Counting Bloom Filter (CBF) implementation for stack traces. The template +// parameter kSize denotes the size of the Bloom Filter, and kHashNum the number +// of hash functions used. The probability of false positives is calculated as: +// +// P(num_stacks) = (1 - e^(-kHashNum * (num_stacks / kSize))) ^ kHashNum +// +// Where `num_stacks` are unique stack traces currently present in the filter. +// +// The main benefit of a CBF (vs. other data structure such as a regular Bloom +// Filter, or a Cache with an eviction policy) is that if the sum of all Add() +// operations becomes zero again, the CBF will no longer contain the item. False +// positives can be mitigated by configuring the CBF according to above formula. +// +// Thread-safety: thread-safe. +template +class StackTraceFilter { + public: + static constexpr size_t kSize = kSize_; + static constexpr size_t kHashNum = kHashNum_; + + static_assert(kSize > 0, "size must be non-zero"); + static_assert(kHashNum > 0, "number of hashes must be non-zero"); + + constexpr StackTraceFilter() = default; + + // Returns true if the filter contains the provided stack trace. See above + // formula to calculate the probability of false positives. + bool Contains(absl::Span stack_trace) const { + size_t stack_hash = GetFirstHash(stack_trace); + + for (size_t i = 0; i < kHashNum; ++i) { + if (!counts_[stack_hash % kSize].load(std::memory_order_relaxed)) + return false; + stack_hash = GetNextHash(stack_hash); + } + + return true; + } + + // Add (or remove if `val` < 0) a stack trace from the filter. The sum of + // values `val` added to the filter since construction or the last Clear() + // determines if a stack trace is contained or not: for any non-zero sum, + // Contains() returns true; false if the sum is zero. + void Add(absl::Span stack_trace, int val) { + Add(GetFirstHash(stack_trace), val); + } + + protected: + static size_t GetFirstHash(absl::Span s) { + return absl::HashOf(s); + } + + static size_t GetNextHash(size_t prev_hash) { + return absl::HashOf(prev_hash); + } + + void Add(size_t stack_hash, int val) { + for (size_t i = 0; i < kHashNum; ++i) { + counts_[stack_hash % kSize].fetch_add(val, std::memory_order_relaxed); + stack_hash = GetNextHash(stack_hash); + } + } + + private: + // Use uint to allow for integer wrap-around; false positives are possible if + // all kHashNum counts wrap to 0 (which is unlikely with kHashNum > 1). + std::array, kSize> counts_ = {}; + + friend class StackTraceFilterTest; +}; + +// Similar to StackTraceFilter, except that entries have to be decayed to be +// fully removed from the filter: the sum value of a stack trace is the added +// value + number of non-negative Add() calls. Each call to Add() or Decay() +// decays a stack trace: the stack trace to be decayed is the one added in the +// kDecaySteps'th previous call to Add(). +// +// Thread-safety: thread-safe. +template +class DecayingStackTraceFilter : public StackTraceFilter { + using Base = StackTraceFilter; + + public: + // Add (or remove if `val` < 0) a stack trace from the filter. On addition + // (non-negative value), a previously added stack trace is decayed. + void Add(absl::Span stack_trace, int val) { + const size_t stack_hash = this->GetFirstHash(stack_trace); + if (val >= 0) { + Decay(stack_hash); + // Because 0-valued entries denote unused entries, add 1 to be decayed if + // this is a non-zero hash (very likely). + Base::Add(stack_hash, val + !!stack_hash); + } else { + // Removal. + Base::Add(stack_hash, val); + } + } + + // Decays a previously added stack trace. + void Decay() { Decay(0); } + + // Force decay all previously added stack traces. + void DecayAll() { + for (int i = 0; i < kDecaySteps; ++i) { + Decay(); + } + } + + private: + // Replace the entry in the current ring buffer position with `replace_hash` + // and decay the previous stack trace. Advances to the next position. + void Decay(size_t replace_hash) { + const size_t pos = pos_.fetch_add(1, std::memory_order_relaxed); + const size_t decay_hash = + ring(pos).exchange(replace_hash, std::memory_order_relaxed); + // 0-valued entries denote unused entries. + if (decay_hash) Base::Add(decay_hash, -1); + } + + auto& ring(size_t pos) { return ring_[pos % ring_.size()]; } + + std::array, kDecaySteps> ring_ = {}; + std::atomic pos_ = 0; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_STACKTRACE_FILTER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter_test.cc new file mode 100644 index 000000000000..9347a62b1dce --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/stacktrace_filter_test.cc @@ -0,0 +1,262 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/stacktrace_filter.h" + +#include +#include + +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "absl/types/span.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class StackTraceFilterTest : public testing::Test { + protected: + using DefaultFilter = StackTraceFilter<256, 1>; + + void SetUp() override { + absl::flat_hash_set hashes; + absl::flat_hash_set hash_bases; + + auto initialize_unique_stack_trace = [&](void*& val) { + uint64_t pc = 0; + while (true) { + ++pc; + // Checking for wrap around (unique stack trace never found) + ASSERT_NE(pc, 0); + val = reinterpret_cast(pc); + size_t hash = DefaultFilter::GetFirstHash({&val, 1}); + size_t hash_base = GetFirstIndex({&val, 1}); + if (!hash_bases.contains(hash_base) && !hashes.contains(hash)) { + hashes.insert(hash); + hash_bases.insert(hash_base); + break; + } + } + }; + + initialize_unique_stack_trace(stack_trace1_val_); + initialize_unique_stack_trace(stack_trace2_val_); + initialize_unique_stack_trace(stack_trace3_val_); + + // Ensure no collisions among test set (the initializer above should ensure + // this already). + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace1_), + DefaultFilter::GetFirstHash(stack_trace2_)); + ASSERT_NE(GetFirstIndex(stack_trace1_), GetFirstIndex(stack_trace2_)); + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace1_), + DefaultFilter::GetFirstHash(stack_trace3_)); + ASSERT_NE(GetFirstIndex(stack_trace1_), GetFirstIndex(stack_trace3_)); + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace2_), + DefaultFilter::GetFirstHash(stack_trace3_)); + ASSERT_NE(GetFirstIndex(stack_trace2_), GetFirstIndex(stack_trace3_)); + } + + void InitializeColliderStackTrace() { + absl::flat_hash_set hashes; + absl::flat_hash_set hash_bases; + + // Do not add base of stack_trace1_, because that is the match that is being + // created. + hashes.insert(DefaultFilter::GetFirstHash(stack_trace1_)); + hashes.insert(DefaultFilter::GetFirstHash(stack_trace2_)); + hash_bases.insert(GetFirstIndex(stack_trace2_)); + hashes.insert(DefaultFilter::GetFirstHash(stack_trace3_)); + hash_bases.insert(GetFirstIndex(stack_trace3_)); + + size_t hash1_base = GetFirstIndex(stack_trace1_); + uint64_t pc = reinterpret_cast(stack_trace1_[0]); + size_t collider_hash; + size_t collider_hash_base; + while (true) { + ++pc; + // Checking for wrap around + ASSERT_NE(pc, 0); + collider_stack_trace_val_ = reinterpret_cast(pc); + collider_hash = DefaultFilter::GetFirstHash(collider_stack_trace_); + collider_hash_base = GetFirstIndex(collider_stack_trace_); + // if a possible match, check to avoid collisions with others + if (hash1_base == collider_hash_base && !hashes.contains(collider_hash) && + !hash_bases.contains(collider_hash_base)) { + break; + } + } + + // Double check the work above + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace1_), + DefaultFilter::GetFirstHash(collider_stack_trace_)); + ASSERT_EQ(GetFirstIndex(stack_trace1_), + GetFirstIndex(collider_stack_trace_)); + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace2_), + DefaultFilter::GetFirstHash(collider_stack_trace_)); + ASSERT_NE(GetFirstIndex(stack_trace2_), + GetFirstIndex(collider_stack_trace_)); + ASSERT_NE(DefaultFilter::GetFirstHash(stack_trace3_), + DefaultFilter::GetFirstHash(collider_stack_trace_)); + ASSERT_NE(GetFirstIndex(stack_trace3_), + GetFirstIndex(collider_stack_trace_)); + } + + static size_t GetFirstIndex(absl::Span stack_trace) { + return DefaultFilter::GetFirstHash(stack_trace) % DefaultFilter::kSize; + } + + void* stack_trace1_val_ = nullptr; + absl::Span stack_trace1_{&stack_trace1_val_, 1}; + void* stack_trace2_val_ = nullptr; + absl::Span stack_trace2_{&stack_trace2_val_, 1}; + void* stack_trace3_val_ = nullptr; + absl::Span stack_trace3_{&stack_trace3_val_, 1}; + void* collider_stack_trace_val_ = nullptr; + absl::Span collider_stack_trace_{&collider_stack_trace_val_, 1}; +}; + +namespace { + +// This test proves that class can be owned by a constexpr constructor class. +// This is required as the class will be instantiated within +// tcmalloc::tcmalloc_internal::Static. +TEST_F(StackTraceFilterTest, ConstexprConstructor) { + class Wrapper { + public: + constexpr Wrapper() = default; + DefaultFilter filter_; + }; + + // Instantiate + [[maybe_unused]] Wrapper wrapper; +} + +TEST_F(StackTraceFilterTest, InitialState) { + DefaultFilter filter; + EXPECT_FALSE(filter.Contains(stack_trace1_)); +} + +TEST_F(StackTraceFilterTest, AddRemove) { + DefaultFilter filter; + for (int i = 0; i < 100; ++i) { + filter.Add(stack_trace1_, i); + EXPECT_EQ(i > 0, filter.Contains(stack_trace1_)); + } + for (int i = 0; i < 100; ++i) { + filter.Add(stack_trace1_, -i); + } + EXPECT_FALSE(filter.Contains(stack_trace1_)); +} + +TEST_F(StackTraceFilterTest, CollisionFalsePositive) { + InitializeColliderStackTrace(); + // False positive because of collision ... + DefaultFilter filter; + filter.Add(stack_trace1_, 1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + EXPECT_TRUE(filter.Contains(collider_stack_trace_)); + + filter.Add(stack_trace1_, -1); + EXPECT_FALSE(filter.Contains(stack_trace1_)); + EXPECT_FALSE(filter.Contains(collider_stack_trace_)); + + filter.Add(collider_stack_trace_, 1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + EXPECT_TRUE(filter.Contains(collider_stack_trace_)); +} + +TEST_F(StackTraceFilterTest, CollisionMultiHash) { + InitializeColliderStackTrace(); + // ... but with additional hash functions the probability of collision + // decreases. + StackTraceFilter<256, 10> filter; + filter.Add(stack_trace1_, 1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + EXPECT_FALSE(filter.Contains(collider_stack_trace_)); + + filter.Add(collider_stack_trace_, 1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + EXPECT_TRUE(filter.Contains(collider_stack_trace_)); + + filter.Add(stack_trace1_, -1); + EXPECT_FALSE(filter.Contains(stack_trace1_)); + EXPECT_TRUE(filter.Contains(collider_stack_trace_)); + + filter.Add(collider_stack_trace_, -1); + EXPECT_FALSE(filter.Contains(stack_trace1_)); + EXPECT_FALSE(filter.Contains(collider_stack_trace_)); +} + +// Just test that integer wrap around does not trigger any hardening checks, +// because for our CBF wrap around is benign. +TEST_F(StackTraceFilterTest, IntegerWrapAround) { + DefaultFilter filter; + filter.Add(stack_trace1_, 1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.Add(stack_trace1_, ~0u); + EXPECT_FALSE(filter.Contains(stack_trace1_)); +} + +class DecayingStackTraceFilterTest : public StackTraceFilterTest { + protected: + static constexpr size_t kDecaySteps = 3; + using DefaultFilter = DecayingStackTraceFilter<256, 10, kDecaySteps>; +}; + +TEST_F(DecayingStackTraceFilterTest, AddAndDecay) { + DefaultFilter filter; + for (int i = 0; i < 10; ++i) { + filter.Add(stack_trace1_, i); + for (int j = 0; j < kDecaySteps; ++j) { + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.Decay(); + } + if (i) { + // If non-zero addition, we need to negate it as well. + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.Add(stack_trace1_, -i); + } + EXPECT_FALSE(filter.Contains(stack_trace1_)); + } +} + +TEST_F(DecayingStackTraceFilterTest, AddAndAdd) { + DefaultFilter filter; + for (int i = 0; i < 10; ++i) { + filter.Add(stack_trace1_, i); + for (int j = 0; j < kDecaySteps; ++j) { + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.Add(stack_trace2_, 0); // implies decay + } + if (i) { + // If non-zero addition, we need to negate it as well. + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.Add(stack_trace1_, -i); + } + EXPECT_FALSE(filter.Contains(stack_trace1_)); + EXPECT_TRUE(filter.Contains(stack_trace2_)); + } +} + +TEST_F(DecayingStackTraceFilterTest, DecayAll) { + DefaultFilter filter; + filter.Add(stack_trace1_, 1); + filter.Add(stack_trace1_, -1); + EXPECT_TRUE(filter.Contains(stack_trace1_)); + filter.DecayAll(); + EXPECT_FALSE(filter.Contains(stack_trace1_)); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.cc b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.cc new file mode 100644 index 000000000000..50252e821dcd --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.cc @@ -0,0 +1,147 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/sysinfo.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +#if __linux__ +namespace { +bool IsInBounds(int cpu) { return 0 <= cpu && cpu < kMaxCpus; } +} // namespace + +std::optional ParseCpulist( + absl::FunctionRef read) { + CpuSet set; + set.Zero(); + + std::array buf; + size_t carry_over = 0; + int cpu_from = -1; + + while (true) { + const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over); + if (ABSL_PREDICT_FALSE(rc < 0)) { + return std::nullopt; + } + + const absl::string_view current(buf.data(), carry_over + rc); + + // If we have no more data to parse & couldn't read any then we've reached + // the end of the input & are done. + if (current.empty() && rc == 0) { + break; + } + if (current == "\n" && rc == 0) { + break; + } + + size_t consumed; + const size_t dash = current.find('-'); + const size_t comma = current.find(','); + if (dash != absl::string_view::npos && dash < comma) { + if (!absl::SimpleAtoi(current.substr(0, dash), &cpu_from) || + !IsInBounds(cpu_from)) { + return std::nullopt; + } + consumed = dash + 1; + } else if (comma != absl::string_view::npos || rc == 0) { + int cpu; + if (!absl::SimpleAtoi(current.substr(0, comma), &cpu) || + !IsInBounds(cpu)) { + return std::nullopt; + } + if (comma == absl::string_view::npos) { + consumed = current.size(); + } else { + consumed = comma + 1; + } + if (cpu_from != -1) { + for (int c = cpu_from; c <= cpu; c++) { + set.Set(c); + } + cpu_from = -1; + } else { + set.Set(cpu); + } + } else { + consumed = 0; + } + + carry_over = current.size() - consumed; + memmove(buf.data(), buf.data() + consumed, carry_over); + } + + return set; +} + +namespace sysinfo_internal { + +std::optional NumPossibleCPUsNoCache() { + int fd = signal_safe_open("/sys/devices/system/cpu/possible", + O_RDONLY | O_CLOEXEC); + + // This is slightly more state than we actually need, but it lets us reuse + // an already fuzz tested implementation detail. + std::optional cpus = + ParseCpulist([&](char* const buf, const size_t count) { + return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr); + }); + + signal_safe_close(fd); + + if (!cpus.has_value()) { + return std::nullopt; + } + + std::optional max_so_far; + for (int i = 0; i < kMaxCpus; ++i) { + if (cpus->IsSet(i)) { + max_so_far = std::max(i, max_so_far.value_or(-1)); + } + } + + if (!max_so_far.has_value()) { + return std::nullopt; + } + + return *max_so_far + 1; +} + +} // namespace sysinfo_internal + +#endif // __linux__ + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.h b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.h new file mode 100644 index 000000000000..3cc218fbe95b --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo.h @@ -0,0 +1,80 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_SYSINFO_H_ +#define TCMALLOC_INTERNAL_SYSINFO_H_ + +#include +#include + +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" +#include "absl/functional/function_ref.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/logging.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +#if __linux__ +// Parse a CPU list in the format used by +// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU +// numbers or ranges in the format - inclusive all joined by comma +// characters. +// +// Returns std::nullopt on error. +// +// The read function is expected to operate much like the read syscall. It +// should read up to `count` bytes into `buf` and return the number of bytes +// actually read. If an error occurs during reading it should return -1 with +// errno set to an appropriate error code. read should handle EINTR and retry. +std::optional ParseCpulist( + absl::FunctionRef read); + +namespace sysinfo_internal { + +// Returns the number of possible CPUs on the machine, including currently +// offline CPUs. If this cannot be retrieved, std::nullopt is returned. +// +// The result of this function is not cached internally. +std::optional NumPossibleCPUsNoCache(); + +} // namespace sysinfo_internal +#endif // __linux__ + +inline std::optional NumCPUsMaybe() { + ABSL_CONST_INIT static absl::once_flag flag; + ABSL_CONST_INIT static std::optional result; + absl::base_internal::LowLevelCallOnce( + &flag, [&]() { result = sysinfo_internal::NumPossibleCPUsNoCache(); }); + return result; +} + +inline int NumCPUs() { + std::optional maybe_cpus = NumCPUsMaybe(); + TC_CHECK(maybe_cpus.has_value()); + return *maybe_cpus; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_SYSINFO_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_fuzz.cc new file mode 100644 index 000000000000..d5ae38d250bf --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_fuzz.cc @@ -0,0 +1,48 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "fuzztest/fuzztest.h" +#include "tcmalloc/internal/cpu_utils.h" +#include "tcmalloc/internal/sysinfo.h" + +namespace tcmalloc::tcmalloc_internal { +namespace { + +void ParseInput(const std::string& s) { + const char* data = s.data(); + size_t size = s.size(); + + std::optional r = + ParseCpulist([&](char* buf, size_t count) -> ssize_t { + size_t to_read = std::min(size, count); + if (to_read > 0) { + memcpy(buf, data, to_read); + data += to_read; + size -= to_read; + } + return to_read; + }); + (void)r; +} + +FUZZ_TEST(SysinfoTest, ParseInput) + ; + +} // namespace +} // namespace tcmalloc::tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_test.cc new file mode 100644 index 000000000000..950f6f344ea8 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/sysinfo_test.cc @@ -0,0 +1,181 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/sysinfo.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/random/random.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/cpu_utils.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +TEST(ParseCpulistTest, Empty) { + absl::string_view empty("\n"); + + const absl::optional parsed = + ParseCpulist([&](char* const buf, const size_t count) -> ssize_t { + // Calculate how much data we have left to provide. + const size_t to_copy = std::min(count, empty.size()); + + // If none, we have no choice but to provide nothing. + if (to_copy == 0) return 0; + + memcpy(buf, empty.data(), to_copy); + empty.remove_prefix(to_copy); + return to_copy; + }); + + // No CPUs should be active on this NUMA node. + ASSERT_THAT(parsed, testing::Ne(std::nullopt)); + EXPECT_EQ(parsed->Count(), 0); +} + +TEST(ParseCpulistTest, AtBounds) { + std::string cpulist = absl::StrCat("0-", kMaxCpus - 1); + + const absl::optional parsed = + ParseCpulist([&](char* const buf, const size_t count) -> ssize_t { + // Calculate how much data we have left to provide. + const size_t to_copy = std::min(count, cpulist.size()); + + // If none, we have no choice but to provide nothing. + if (to_copy == 0) return 0; + + memcpy(buf, cpulist.data(), to_copy); + cpulist.erase(0, to_copy); + return to_copy; + }); + + // All CPUs should be active on this NUMA node. + ASSERT_THAT(parsed, testing::Ne(std::nullopt)); + EXPECT_EQ(parsed->Count(), kMaxCpus); +} + +TEST(ParseCpulistTest, NotInBounds) { + std::string cpulist = absl::StrCat("0-", kMaxCpus); + + const absl::optional parsed = + ParseCpulist([&](char* const buf, const size_t count) -> ssize_t { + // Calculate how much data we have left to provide. + const size_t to_copy = std::min(count, cpulist.size()); + + // If none, we have no choice but to provide nothing. + if (to_copy == 0) return 0; + + memcpy(buf, cpulist.data(), to_copy); + cpulist.erase(0, to_copy); + return to_copy; + }); + + ASSERT_THAT(parsed, testing::Eq(std::nullopt)); +} + +// Ensure that we can parse randomized cpulists correctly. +TEST(ParseCpulistTest, Random) { + absl::BitGen gen; + + static constexpr int kIterations = 100; + for (int i = 0; i < kIterations; i++) { + CpuSet reference; + reference.Zero(); + + // Set a random number of CPUs within the reference set. + const double density = absl::Uniform(gen, 0.0, 1.0); + for (int cpu = 0; cpu < kMaxCpus; cpu++) { + if (absl::Bernoulli(gen, density)) { + reference.Set(cpu); + } + } + + // Serialize the reference set into a cpulist-style string. + std::vector components; + for (int cpu = 0; cpu < kMaxCpus; cpu++) { + if (!reference.IsSet(cpu)) continue; + + const int start = cpu; + int next = cpu + 1; + while (next < kMaxCpus && reference.IsSet(next)) { + cpu = next; + next = cpu + 1; + } + + if (cpu == start) { + components.push_back(absl::StrCat(cpu)); + } else { + components.push_back(absl::StrCat(start, "-", cpu)); + } + } + const std::string serialized = absl::StrJoin(components, ","); + + // Now parse that string using our ParseCpulist function, randomizing the + // amount of data we provide to it from each read. + absl::string_view remaining(serialized); + const absl::optional parsed = + ParseCpulist([&](char* const buf, const size_t count) -> ssize_t { + // Calculate how much data we have left to provide. + const size_t max = std::min(count, remaining.size()); + + // If none, we have no choice but to provide nothing. + if (max == 0) return 0; + + // If we do have data, return a randomly sized subset of it to stress + // the logic around reading partial values. + const size_t copy = absl::Uniform(gen, static_cast(1), max); + memcpy(buf, remaining.data(), copy); + remaining.remove_prefix(copy); + return copy; + }); + + // We ought to have parsed the same set of CPUs that we serialized. + ASSERT_THAT(parsed, testing::Ne(std::nullopt)); + EXPECT_TRUE(CPU_EQUAL_S(kCpuSetBytes, parsed->data(), reference.data())); + } +} + +TEST(NumCPUs, NoCache) { + const int result = []() { + AllocationGuard guard; + return *sysinfo_internal::NumPossibleCPUsNoCache(); + }(); + + // TODO(b/67389555): This test may fail if there are offlined CPUs. + EXPECT_EQ(result, absl::base_internal::NumCPUs()); +} + +TEST(NumCPUs, Cached) { + // TODO(b/67389555): This test may fail if there are offlined CPUs. + EXPECT_EQ(NumCPUs(), absl::base_internal::NumCPUs()); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5534221534363648 b/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5534221534363648 new file mode 100644 index 0000000000000000000000000000000000000000..4f148062426d06ccd0526192f85c9637de3b1a02 GIT binary patch literal 16 UcmZQ!U|{$U1k6C%-7k~@04(kV?f?J) literal 0 HcmV?d00001 diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5647243657216000 b/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5647243657216000 new file mode 100644 index 0000000000000000000000000000000000000000..4f148062426d06ccd0526192f85c9637de3b1a02 GIT binary patch literal 16 UcmZQ!U|{$U1k6C%-7k~@04(kV?f?J) literal 0 HcmV?d00001 diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5915530833559552 b/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-5915530833559552 new file mode 100644 index 0000000000000000000000000000000000000000..8ef030e030e9893df4c0cbc223946b0499953085 GIT binary patch literal 12 ScmZQ!U|{(D|NnnxAPoQ-4g>%I literal 0 HcmV?d00001 diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-6685031907328000.fuzz b/contrib/libs/tcmalloc/tcmalloc/internal/testdata/profile_builder_fuzz/clusterfuzz-testcase-minimized-profile_builder_fuzz-6685031907328000.fuzz new file mode 100644 index 0000000000000000000000000000000000000000..9c88f4b010401328921f6a0ceeca9321eb7b2a72 GIT binary patch literal 84 zcmZQ!U|{$U1k6C%-7l2ke-Q`(IRXp}AQlUR$$%lx!0->G4+ua~U f, + // receives the offset of the entry and the entry itself. Offsets are relative + // to the beginning of the buffer. + void Iter(absl::FunctionRef f, SkipEntriesSetting skip_entries) const; // Iterates over the last num_epochs data points (if -1, iterate to the // oldest entry). Offsets are relative to the end of the buffer. - void IterBackwards(absl::FunctionRef f, + void IterBackwards(absl::FunctionRef f, int64_t num_epochs = -1) const; // This retrieves a particular data point (if offset is outside the valid @@ -139,16 +140,13 @@ bool TimeSeriesTracker::UpdateClock() { template void TimeSeriesTracker::Iter( - absl::FunctionRef f, + absl::FunctionRef f, SkipEntriesSetting skip_entries) const { size_t j = current_epoch_ + 1; if (j == kEpochs) j = 0; - int64_t timestamp = - (last_epoch_ - kEpochs) * absl::ToInt64Nanoseconds(epoch_length_); for (int offset = 0; offset < kEpochs; offset++) { - timestamp += absl::ToInt64Nanoseconds(epoch_length_); if (skip_entries == kDoNotSkipEmptyEntries || !entries_[j].empty()) { - f(offset, timestamp, entries_[j]); + f(offset, entries_[j]); } j++; if (j == kEpochs) j = 0; @@ -157,18 +155,13 @@ void TimeSeriesTracker::Iter( template void TimeSeriesTracker::IterBackwards( - absl::FunctionRef f, - int64_t num_epochs) const { + absl::FunctionRef f, int64_t num_epochs) const { // -1 means that we are outputting all epochs. num_epochs = (num_epochs == -1) ? kEpochs : num_epochs; size_t j = current_epoch_; - ASSERT(num_epochs <= kEpochs); - int64_t timestamp = last_epoch_ * absl::ToInt64Nanoseconds(epoch_length_); + TC_ASSERT_LE(num_epochs, kEpochs); for (size_t offset = 0; offset < num_epochs; ++offset) { - // This is deliberately int64_t and not a time unit, since clock_ is not - // guaranteed to be a real time base. - f(offset, timestamp, entries_[j]); - timestamp -= absl::ToInt64Nanoseconds(epoch_length_); + f(offset, entries_[j]); if (j == 0) j = kEpochs; --j; } @@ -182,7 +175,7 @@ const T TimeSeriesTracker::GetEpochAtOffset(size_t offset) { } template -bool TimeSeriesTracker::Report(S val) { +bool TimeSeriesTracker::Report(const S& val) { bool updated_clock = UpdateClock(); entries_[current_epoch_].Report(val); return updated_clock; diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc index 1f753061616c..aee5d440778f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc @@ -14,8 +14,16 @@ #include "tcmalloc/internal/timeseries_tracker.h" +#include +#include +#include + #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/clock.h" using ::testing::ElementsAre; @@ -72,8 +80,7 @@ TEST(TimeSeriesTest, CycleClock) { int num_timestamps = 0; int offset_1, offset_2; tracker.Iter( - [&](size_t offset, int64_t ts, - const TimeSeriesTrackerTest::TestEntry& e) { + [&](size_t offset, const TimeSeriesTrackerTest::TestEntry& e) { ASSERT_LT(num_timestamps, 2); if (num_timestamps == 0) { offset_1 = offset; @@ -92,7 +99,6 @@ TEST(TimeSeriesTest, CycleClock) { } TEST_F(TimeSeriesTrackerTest, Works) { - const int64_t kEpochLength = absl::ToInt64Nanoseconds(kDuration) / 8; Advance(kDuration); tracker_.Report(1); @@ -105,17 +111,13 @@ TEST_F(TimeSeriesTrackerTest, Works) { int num_timestamps = 0; int offset_1, offset_2; tracker_.Iter( - [&](size_t offset, int64_t ts, const TestEntry& e) { + [&](size_t offset, const TestEntry& e) { ASSERT_LT(num_timestamps, 2); if (num_timestamps == 0) { offset_1 = offset; - EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration), ts); EXPECT_THAT(e.values_, ElementsAre(1, 2)); } else { offset_2 = offset; - EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration) + - absl::ToInt64Nanoseconds(kDuration) / 4, - ts); EXPECT_THAT(e.values_, ElementsAre(4)); } num_timestamps++; @@ -128,14 +130,11 @@ TEST_F(TimeSeriesTrackerTest, Works) { Advance(kDuration / 4); // Iterate through entries not skipping empty entries. - int64_t expected_timestamp = absl::ToInt64Nanoseconds(kDuration) / 4; num_timestamps = 0; tracker_.Iter( - [&](size_t offset, int64_t ts, const TestEntry& e) { - expected_timestamp += kEpochLength; + [&](size_t offset, const TestEntry& e) { ASSERT_LT(num_timestamps, 8); - EXPECT_EQ(expected_timestamp, ts); num_timestamps++; }, tracker_.kDoNotSkipEmptyEntries); @@ -148,13 +147,10 @@ TEST_F(TimeSeriesTrackerTest, Works) { // Iterate backwards. num_timestamps = 0; - expected_timestamp = - 7 * absl::ToInt64Nanoseconds(kDuration) / 4; // Current time tracker_.IterBackwards( - [&](size_t offset, int64_t ts, const TestEntry& e) { + [&](size_t offset, const TestEntry& e) { ASSERT_LT(num_timestamps, 3); EXPECT_EQ(num_timestamps, offset); - EXPECT_EQ(expected_timestamp, ts); if (num_timestamps == 0) { EXPECT_THAT(e.values_, ElementsAre(16)); } else if (num_timestamps == 1) { @@ -162,7 +158,6 @@ TEST_F(TimeSeriesTrackerTest, Works) { } else { EXPECT_THAT(e.values_, ElementsAre(8)); } - expected_timestamp -= kEpochLength; num_timestamps++; }, 3); @@ -174,12 +169,12 @@ TEST_F(TimeSeriesTrackerTest, Works) { EXPECT_TRUE(tracker_.GetEpochAtOffset(3).empty()); EXPECT_TRUE(tracker_.GetEpochAtOffset(1000).empty()); - // This should annilate everything. + // This should annihilate everything. Advance(kDuration * 2); tracker_.UpdateTimeBase(); tracker_.Iter( - [&](size_t offset, int64_t ts, const TestEntry& e) { - ASSERT_TRUE(false) << "Time series should be empty"; + [&](size_t offset, const TestEntry& e) { + FAIL() << "Time series should be empty"; }, tracker_.kSkipEmptyEntries); diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc index ef705b02e38e..5f26e286ea30 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc @@ -21,11 +21,11 @@ #include #include -#include +#include #include "absl/time/clock.h" #include "absl/time/time.h" -#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/config.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { @@ -34,9 +34,11 @@ namespace tcmalloc_internal { int signal_safe_open(const char* path, int flags, ...) { int fd; va_list ap; + using mode_t_va_arg_type = + std::conditional::type; va_start(ap, flags); - mode_t mode = va_arg(ap, mode_t); + mode_t mode = va_arg(ap, mode_t_va_arg_type); va_end(ap); do { @@ -121,75 +123,6 @@ ssize_t signal_safe_read(int fd, char* buf, size_t count, size_t* bytes_read) { return rc; } -std::vector AllowedCpus() { - // We have no need for dynamically sized sets (currently >1024 CPUs for glibc) - // at the present time. We could change this in the future. - cpu_set_t allowed_cpus; - CHECK_CONDITION(sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) == - 0); - int n = CPU_COUNT(&allowed_cpus), c = 0; - - std::vector result(n); - for (int i = 0; i < CPU_SETSIZE && n; i++) { - if (CPU_ISSET(i, &allowed_cpus)) { - result[c++] = i; - n--; - } - } - CHECK_CONDITION(0 == n); - - return result; -} - -static cpu_set_t SpanToCpuSetT(absl::Span mask) { - cpu_set_t result; - CPU_ZERO(&result); - for (int cpu : mask) { - CPU_SET(cpu, &result); - } - return result; -} - -ScopedAffinityMask::ScopedAffinityMask(absl::Span allowed_cpus) { - specified_cpus_ = SpanToCpuSetT(allowed_cpus); - // getaffinity should never fail. - CHECK_CONDITION( - sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0); - // See destructor comments on setaffinity interactions. Tampered() will - // necessarily be true in this case. - sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_); -} - -ScopedAffinityMask::ScopedAffinityMask(int allowed_cpu) { - CPU_ZERO(&specified_cpus_); - CPU_SET(allowed_cpu, &specified_cpus_); - - // getaffinity should never fail. - CHECK_CONDITION( - sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0); - // See destructor comments on setaffinity interactions. Tampered() will - // necessarily be true in this case. - sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_); -} - -ScopedAffinityMask::~ScopedAffinityMask() { - // If something else has already reset our affinity, do not attempt to - // restrict towards our original mask. This is best-effort as the tampering - // may obviously occur during the destruction of *this. - if (!Tampered()) { - // Note: We do not assert success here, conflicts may restrict us from all - // 'original_cpus_'. - sched_setaffinity(0, sizeof(original_cpus_), &original_cpus_); - } -} - -bool ScopedAffinityMask::Tampered() { - cpu_set_t current_cpus; - CHECK_CONDITION(sched_getaffinity(0, sizeof(current_cpus), ¤t_cpus) == - 0); - return !CPU_EQUAL(¤t_cpus, &specified_cpus_); // Mismatch => modified. -} - } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.h b/contrib/libs/tcmalloc/tcmalloc/internal/util.h index b43e32225727..4469685d6c15 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/util.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,21 +16,18 @@ #ifndef TCMALLOC_INTERNAL_UTIL_H_ #define TCMALLOC_INTERNAL_UTIL_H_ +#include #include // IWYU pragma: keep #include #include #include -#include #include #include #include -#include - -#include "absl/base/internal/sysinfo.h" #include "absl/time/time.h" -#include "absl/types/span.h" #include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" #define TCMALLOC_RETRY_ON_TEMP_FAILURE(expression) \ (__extension__({ \ @@ -49,7 +47,7 @@ namespace tcmalloc_internal { // signal_safe_open() - a wrapper for open(2) which ignores signals // Semantics equivalent to open(2): // returns a file-descriptor (>=0) on success, -1 on failure, error in errno -int signal_safe_open(const char *path, int flags, ...); +int signal_safe_open(const char* path, int flags, ...); // signal_safe_close() - a wrapper for close(2) which ignores signals // Semantics equivalent to close(2): @@ -68,8 +66,8 @@ int signal_safe_close(int fd); // flushed from the buffer in the first write. To handle this case the optional // bytes_written parameter is provided, when not-NULL, it will always return the // total bytes written before any error. -ssize_t signal_safe_write(int fd, const char *buf, size_t count, - size_t *bytes_written); +ssize_t signal_safe_write(int fd, const char* buf, size_t count, + size_t* bytes_written); // signal_safe_read() - a wrapper for read(2) which ignores signals // Semantics equivalent to read(2): @@ -83,7 +81,7 @@ ssize_t signal_safe_write(int fd, const char *buf, size_t count, // read by a previous read. To handle this case the optional bytes_written // parameter is provided, when not-NULL, it will always return the total bytes // read before any error. -ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read); +ssize_t signal_safe_read(int fd, char* buf, size_t count, size_t* bytes_read); // signal_safe_poll() - a wrapper for poll(2) which ignores signals // Semantics equivalent to poll(2): @@ -93,42 +91,52 @@ ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read); // poll for data. Unlike ppoll/pselect, signal_safe_poll is *ignoring* signals // not attempting to re-enable them. Protecting us from the traditional races // involved with the latter. -int signal_safe_poll(struct ::pollfd *fds, int nfds, absl::Duration timeout); +int signal_safe_poll(struct ::pollfd* fds, int nfds, absl::Duration timeout); -// Affinity helpers. +class ScopedSigmask { + public: + // Masks all signal handlers. (SIG_SETMASK, All) + ScopedSigmask() noexcept; -// Returns a vector of the which cpus the currently allowed thread is allowed to -// run on. There are no guarantees that this will not change before, after, or -// even during, the call to AllowedCpus(). -std::vector AllowedCpus(); + // No copy, move or assign + ScopedSigmask(const ScopedSigmask&) = delete; + ScopedSigmask& operator=(const ScopedSigmask&) = delete; -// Enacts a scoped affinity mask on the constructing thread. Attempts to -// restore the original affinity mask on destruction. -// -// REQUIRES: For test-use only. Do not use this in production code. -class ScopedAffinityMask { + // Restores the masked signal handlers to its former state. + ~ScopedSigmask() noexcept; + + private: + void Setmask(int how, sigset_t* set, sigset_t* old); + + sigset_t old_set_; +}; + +inline ScopedSigmask::ScopedSigmask() noexcept { + sigset_t set; + sigfillset(&set); + Setmask(SIG_SETMASK, &set, &old_set_); +} + +inline ScopedSigmask::~ScopedSigmask() noexcept { + Setmask(SIG_SETMASK, &old_set_, nullptr); +} + +inline void ScopedSigmask::Setmask(int how, sigset_t* set, sigset_t* old) { + const int result = pthread_sigmask(how, set, old); + TC_CHECK_EQ(result, 0); +} + +// RAII class that will restore errno to the value it has when created. +class ErrnoRestorer { public: - // When racing with an external restriction that has a zero-intersection with - // "allowed_cpus" we will construct, but immediately register as "Tampered()", - // without actual changes to affinity. - explicit ScopedAffinityMask(absl::Span allowed_cpus); - explicit ScopedAffinityMask(int allowed_cpu); - - // Restores original affinity iff our scoped affinity has not been externally - // modified (i.e. Tampered()). Otherwise, the updated affinity is preserved. - ~ScopedAffinityMask(); - - // Returns true if the affinity mask no longer matches what was set at point - // of construction. - // - // Note: This is instantaneous and not fool-proof. It's possible for an - // external affinity modification to subsequently align with our originally - // specified "allowed_cpus". In this case Tampered() will return false when - // time may have been spent executing previously on non-specified cpus. - bool Tampered(); + ErrnoRestorer() : saved_errno_(errno) {} + ~ErrnoRestorer() { errno = saved_errno_; } + + ErrnoRestorer(const ErrnoRestorer&) = delete; + ErrnoRestorer& operator=(const ErrnoRestorer&) = delete; private: - cpu_set_t original_cpus_, specified_cpus_; + int saved_errno_; }; } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/ya.make b/contrib/libs/tcmalloc/tcmalloc/internal/ya.make new file mode 100644 index 000000000000..36fd07fe3482 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal/ya.make @@ -0,0 +1,21 @@ +PROTO_LIBRARY() + +VERSION(dummy) + +LICENSE(Apache-2.0) + +WITHOUT_LICENSE_TEXTS() + +EXCLUDE_TAGS( + GO_PROTO + PY_PROTO + JAVA_PROTO +) + +SRC(profile.proto) + +ADDINCL( + GLOBAL contrib/libs/tcmalloc +) + +END() diff --git a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h index 66027418ed20..fc29fad7730f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,10 +21,15 @@ #ifndef TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_ #define TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_ +#include +#include +#include +#include #include +#include #include "absl/base/attributes.h" -#include "absl/functional/function_ref.h" +#include "absl/time/time.h" #include "tcmalloc/malloc_extension.h" namespace tcmalloc { @@ -66,39 +72,71 @@ MallocExtension_Internal_SnapshotCurrent(tcmalloc::ProfileType type); ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase* MallocExtension_Internal_StartAllocationProfiling(); +ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase* +MallocExtension_Internal_StartLifetimeProfiling(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ActivateGuardedSampling(); ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::Ownership MallocExtension_Internal_GetOwnership(const void* ptr); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( - tcmalloc::MallocExtension::MemoryLimit* limit); +ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_GetMemoryLimit( + tcmalloc::MallocExtension::LimitKind limit_kind); ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( const char* name_data, size_t name_size, size_t* value); ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); +ABSL_ATTRIBUTE_WEAK bool +MallocExtension_Internal_GetBackgroundProcessActionsEnabled(); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_GetBackgroundProcessSleepInterval(absl::Duration* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( absl::Duration* ret); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_GetSkipSubreleaseShortInterval(absl::Duration* ret); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseLongInterval( + absl::Duration* ret); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_GetCacheDemandReleaseShortInterval( + absl::Duration* ret); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_GetCacheDemandReleaseLongInterval(absl::Duration* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetProperties( std::map* ret); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetExperiments( + std::map* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetStats(std::string* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxPerCpuCacheSize( int32_t value); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_SetBackgroundProcessActionsEnabled(bool value); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_SetBackgroundProcessSleepInterval( + absl::Duration value); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseInterval( absl::Duration value); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_SetSkipSubreleaseShortInterval(absl::Duration value); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseLongInterval( + absl::Duration value); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_SetCacheDemandReleaseShortInterval( + absl::Duration value); +ABSL_ATTRIBUTE_WEAK void +MallocExtension_Internal_SetCacheDemandReleaseLongInterval( + absl::Duration value); ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ReleaseMemoryToSystem( - size_t bytes); +ABSL_ATTRIBUTE_WEAK size_t +MallocExtension_Internal_ReleaseMemoryToSystem(size_t bytes); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMemoryLimit( - const tcmalloc::MallocExtension::MemoryLimit* limit); + size_t limit, tcmalloc::MallocExtension::LimitKind limit_kind); ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_GetAllocatedSize(const void* ptr); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadBusy(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadIdle(); -ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetProfileSamplingRate(); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetProfileSamplingRate( +ABSL_ATTRIBUTE_WEAK int64_t +MallocExtension_Internal_GetProfileSamplingInterval(); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetProfileSamplingInterval( int64_t); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ProcessBackgroundActions(); @@ -108,8 +146,9 @@ MallocExtension_Internal_GetBackgroundReleaseRate(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetBackgroundReleaseRate( tcmalloc::MallocExtension::BytesPerSecond); -ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetGuardedSamplingRate(); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetGuardedSamplingRate( +ABSL_ATTRIBUTE_WEAK int64_t +MallocExtension_Internal_GetGuardedSamplingInterval(); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetGuardedSamplingInterval( int64_t); ABSL_ATTRIBUTE_WEAK int64_t @@ -125,7 +164,6 @@ MallocExtension_SetSampleUserDataCallbacks( tcmalloc::MallocExtension::CreateSampleUserDataCallback create, tcmalloc::MallocExtension::CopySampleUserDataCallback copy, tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy); - } #endif diff --git a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_tracing_extension.h b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_tracing_extension.h new file mode 100644 index 000000000000..f747c9bb9362 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_tracing_extension.h @@ -0,0 +1,36 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. + +#ifndef TCMALLOC_INTERNAL_MALLOC_TRACING_EXTENSION_H_ +#define TCMALLOC_INTERNAL_MALLOC_TRACING_EXTENSION_H_ + +#include "absl/base/attributes.h" +#include "absl/status/statusor.h" +#include "tcmalloc/malloc_tracing_extension.h" + +#if ABSL_HAVE_ATTRIBUTE_WEAK && !defined(__APPLE__) && !defined(__EMSCRIPTEN__) + +ABSL_ATTRIBUTE_WEAK +absl::StatusOr +MallocTracingExtension_Internal_GetAllocatedAddressRanges(); + +#endif + +#endif // TCMALLOC_INTERNAL_MALLOC_TRACING_EXTENSION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc index 539525271946..dc6593423031 100644 --- a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc @@ -12,358 +12,389 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "absl/types/span.h" #include "tcmalloc/common.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/size_class_info.h" GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { - namespace tcmalloc_internal { -// is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. +// Columns in the following tables: +// - bytes: size of the size class +// - pages: number of pages per span +// - batch: preferred number of objects for transfers between caches +// - class: size class number +// - objs: number of objects per span +// - waste/fixed: fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use +// a single 8KiB page, then we will hold 85 objects per span, and have 32 +// bytes left over. There is also a fixed component of 48 bytes of TCMalloc +// metadata per span. Together, the fixed overhead would be wasted/allocated +// = (32 + 48) / (8192 - 32) ~= 0.98%. +// - waste/sampling: overhead due to heap sampling +// (rounding to page size, proxy object, metadata). +// - inc: increment from the previous size class. This caps the dynamic +// overhead component based on mismatches between the number of bytes +// requested and the number of bytes provided by the size class. Together +// they sum to the total overhead; for instance if you asked for a 50-byte +// allocation that rounds up to a 64-byte size class, the dynamic overhead +// would be 28%, and if waste were 22% it would mean (on average) 25 bytes +// of overhead for allocations of that size. // clang-format off #if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 #if TCMALLOC_PAGE_SHIFT == 13 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 86; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 24, 1, 32}, // 0.68% - { 32, 1, 32}, // 0.59% - { 40, 1, 32}, // 0.98% - { 48, 1, 32}, // 0.98% - { 56, 1, 32}, // 0.78% - { 64, 1, 32}, // 0.59% - { 72, 1, 32}, // 1.28% - { 80, 1, 32}, // 0.98% - { 88, 1, 32}, // 0.68% - { 96, 1, 32}, // 0.98% - { 104, 1, 32}, // 1.58% - { 112, 1, 32}, // 0.78% - { 120, 1, 32}, // 0.98% - { 128, 1, 32}, // 0.59% - { 136, 1, 32}, // 0.98% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 296, 1, 32}, // 3.10% - { 312, 1, 32}, // 1.58% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 408, 1, 32}, // 0.98% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 2, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 2, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 98304, 12, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 1024 0.58% 0.42% 0.00% + { 16, 1, 32}, // 1 512 0.58% 0.42% 100.00% + { 24, 1, 32}, // 2 341 0.68% 0.42% 50.00% + { 32, 1, 32}, // 3 256 0.58% 0.42% 33.33% + { 40, 1, 32}, // 4 204 0.97% 0.42% 25.00% + { 48, 1, 32}, // 5 170 0.97% 0.42% 20.00% + { 56, 1, 32}, // 6 146 0.78% 0.42% 16.67% + { 64, 1, 32}, // 7 128 0.58% 0.42% 14.29% + { 72, 1, 32}, // 8 113 1.26% 0.42% 12.50% + { 80, 1, 32}, // 9 102 0.97% 0.42% 11.11% + { 88, 1, 32}, // 10 93 0.68% 0.42% 10.00% + { 96, 1, 32}, // 11 85 0.97% 0.42% 9.09% + { 104, 1, 32}, // 12 78 1.55% 0.42% 8.33% + { 112, 1, 32}, // 13 73 0.78% 0.42% 7.69% + { 120, 1, 32}, // 14 68 0.97% 0.42% 7.14% + { 128, 1, 32}, // 15 64 0.58% 0.42% 6.67% + { 136, 1, 32}, // 16 60 0.97% 0.42% 6.25% + { 144, 1, 32}, // 17 56 2.14% 0.42% 5.88% + { 152, 1, 32}, // 18 53 2.23% 0.42% 5.56% + { 160, 1, 32}, // 19 51 0.97% 0.42% 5.26% + { 168, 1, 32}, // 20 48 2.14% 0.42% 5.00% + { 184, 1, 32}, // 21 44 1.75% 0.42% 9.52% + { 192, 1, 32}, // 22 42 2.14% 0.42% 4.35% + { 208, 1, 32}, // 23 39 1.55% 0.42% 8.33% + { 224, 1, 32}, // 24 36 2.14% 0.42% 7.69% + { 240, 1, 32}, // 25 34 0.97% 0.42% 7.14% + { 256, 1, 32}, // 26 32 0.58% 0.42% 6.67% + { 264, 1, 32}, // 27 31 0.68% 0.42% 3.12% + { 280, 1, 32}, // 28 29 1.46% 0.42% 6.06% + { 312, 1, 32}, // 29 26 1.55% 0.42% 11.43% + { 336, 1, 32}, // 30 24 2.14% 0.42% 7.69% + { 352, 1, 32}, // 31 23 1.75% 0.42% 4.76% + { 384, 1, 32}, // 32 21 2.14% 0.42% 9.09% + { 408, 1, 32}, // 33 20 0.97% 0.42% 6.25% + { 448, 1, 32}, // 34 18 2.14% 0.42% 9.80% + { 480, 1, 32}, // 35 17 0.97% 0.42% 7.14% + { 512, 1, 32}, // 36 16 0.58% 0.42% 6.67% + { 576, 1, 32}, // 37 14 2.14% 0.42% 12.50% + { 640, 1, 32}, // 38 12 6.80% 0.43% 11.11% + { 704, 1, 32}, // 39 11 6.02% 0.43% 10.00% + { 768, 1, 32}, // 40 10 6.80% 0.43% 9.09% + { 896, 1, 32}, // 41 9 2.14% 0.43% 16.67% + { 1024, 1, 32}, // 42 8 0.58% 0.42% 14.29% + { 1152, 2, 32}, // 43 14 1.85% 0.43% 12.50% + { 1280, 2, 32}, // 44 12 6.52% 0.43% 11.11% + { 1408, 2, 32}, // 45 11 5.74% 0.43% 10.00% + { 1536, 2, 32}, // 46 10 6.52% 0.43% 9.09% + { 1792, 2, 32}, // 47 9 1.85% 0.43% 16.67% + { 2048, 2, 32}, // 48 8 0.29% 0.42% 14.29% + { 2304, 2, 28}, // 49 7 1.85% 0.43% 12.50% + { 2688, 2, 24}, // 50 6 1.85% 0.43% 16.67% + { 3200, 2, 20}, // 51 5 2.63% 0.43% 19.05% + { 3584, 4, 18}, // 52 9 1.71% 0.43% 12.00% + { 4096, 1, 16}, // 53 2 0.58% 0.43% 14.29% + { 4736, 3, 13}, // 54 5 3.83% 0.43% 15.62% + { 5376, 2, 12}, // 55 3 1.85% 0.43% 13.51% + { 6144, 3, 10}, // 56 4 0.19% 0.42% 14.29% + { 6528, 4, 10}, // 57 5 0.54% 0.43% 6.25% + { 7168, 7, 9}, // 58 8 0.08% 0.42% 9.80% + { 8192, 1, 8}, // 59 1 0.58% 0.03% 14.29% + { 9472, 5, 6}, // 60 4 7.61% 0.85% 15.62% + { 10240, 4, 6}, // 61 3 6.39% 0.85% 8.11% + { 12288, 3, 5}, // 62 2 0.19% 0.82% 20.00% + { 13568, 5, 4}, // 63 3 0.74% 0.82% 10.42% + { 14336, 7, 4}, // 64 4 0.08% 0.82% 5.66% + { 16384, 2, 4}, // 65 1 0.29% 0.03% 14.29% + { 20480, 5, 3}, // 66 2 0.12% 1.21% 25.00% + { 24576, 3, 2}, // 67 1 0.19% 0.03% 20.00% + { 28672, 7, 2}, // 68 2 0.08% 1.60% 16.67% + { 32768, 4, 2}, // 69 1 0.15% 0.03% 14.29% + { 40960, 5, 2}, // 70 1 0.12% 0.03% 25.00% + { 49152, 6, 2}, // 71 1 0.10% 0.03% 20.00% + { 57344, 7, 2}, // 72 1 0.08% 0.03% 16.67% + { 65536, 8, 2}, // 73 1 0.07% 0.03% 14.29% + { 73728, 9, 2}, // 74 1 0.07% 0.03% 12.50% + { 81920, 10, 2}, // 75 1 0.06% 0.03% 11.11% + { 98304, 12, 2}, // 76 1 0.05% 0.03% 20.00% + {114688, 14, 2}, // 77 1 0.04% 0.03% 16.67% + {131072, 16, 2}, // 78 1 0.04% 0.03% 14.29% + {139264, 17, 2}, // 79 1 0.03% 0.03% 6.25% + {155648, 19, 2}, // 80 1 0.03% 0.03% 11.76% + {180224, 22, 2}, // 81 1 0.03% 0.03% 15.79% + {204800, 25, 2}, // 82 1 0.02% 0.03% 13.64% + {229376, 28, 2}, // 83 1 0.02% 0.03% 12.00% + {262144, 32, 2}, // 84 1 0.02% 0.03% 14.29% }; #elif TCMALLOC_PAGE_SHIFT == 15 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 78; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 24, 1, 32}, // 0.17% - { 32, 1, 32}, // 0.15% - { 40, 1, 32}, // 0.17% - { 48, 1, 32}, // 0.24% - { 56, 1, 32}, // 0.17% - { 64, 1, 32}, // 0.15% - { 72, 1, 32}, // 0.17% - { 80, 1, 32}, // 0.29% - { 88, 1, 32}, // 0.24% - { 96, 1, 32}, // 0.24% - { 104, 1, 32}, // 0.17% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 280, 1, 32}, // 0.17% - { 304, 1, 32}, // 0.89% - { 328, 1, 32}, // 1.06% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 488, 1, 32}, // 0.37% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 4096 0.15% 1.60% 0.00% + { 16, 1, 32}, // 1 2048 0.15% 1.60% 100.00% + { 24, 1, 32}, // 2 1365 0.17% 1.60% 50.00% + { 32, 1, 32}, // 3 1024 0.15% 1.60% 33.33% + { 40, 1, 32}, // 4 819 0.17% 1.60% 25.00% + { 48, 1, 32}, // 5 682 0.24% 1.60% 20.00% + { 56, 1, 32}, // 6 585 0.17% 1.60% 16.67% + { 64, 1, 32}, // 7 512 0.15% 1.60% 14.29% + { 72, 1, 32}, // 8 455 0.17% 1.60% 12.50% + { 80, 1, 32}, // 9 409 0.29% 1.60% 11.11% + { 88, 1, 32}, // 10 372 0.24% 1.60% 10.00% + { 96, 1, 32}, // 11 341 0.24% 1.60% 9.09% + { 104, 1, 32}, // 12 315 0.17% 1.60% 8.33% + { 112, 1, 32}, // 13 292 0.34% 1.60% 7.69% + { 120, 1, 32}, // 14 273 0.17% 1.60% 7.14% + { 128, 1, 32}, // 15 256 0.15% 1.60% 6.67% + { 144, 1, 32}, // 16 227 0.39% 1.60% 12.50% + { 160, 1, 32}, // 17 204 0.54% 1.60% 11.11% + { 176, 1, 32}, // 18 186 0.24% 1.60% 10.00% + { 192, 1, 32}, // 19 170 0.54% 1.60% 9.09% + { 208, 1, 32}, // 20 157 0.49% 1.60% 8.33% + { 224, 1, 32}, // 21 146 0.34% 1.60% 7.69% + { 240, 1, 32}, // 22 136 0.54% 1.60% 7.14% + { 256, 1, 32}, // 23 128 0.15% 1.60% 6.67% + { 280, 1, 32}, // 24 117 0.17% 1.60% 9.38% + { 312, 1, 32}, // 25 105 0.17% 1.60% 11.43% + { 344, 1, 32}, // 26 95 0.41% 1.60% 10.26% + { 376, 1, 32}, // 27 87 0.32% 1.60% 9.30% + { 400, 1, 32}, // 28 81 1.27% 1.60% 6.38% + { 448, 1, 32}, // 29 73 0.34% 1.60% 12.00% + { 512, 1, 32}, // 30 64 0.15% 1.60% 14.29% + { 576, 1, 32}, // 31 56 1.71% 1.60% 12.50% + { 640, 1, 32}, // 32 51 0.54% 1.60% 11.11% + { 704, 1, 32}, // 33 46 1.32% 1.60% 10.00% + { 768, 1, 32}, // 34 42 1.71% 1.60% 9.09% + { 896, 1, 32}, // 35 36 1.71% 1.60% 16.67% + { 1024, 1, 32}, // 36 32 0.15% 1.60% 14.29% + { 1152, 1, 32}, // 37 28 1.71% 1.60% 12.50% + { 1280, 1, 32}, // 38 25 2.49% 1.60% 11.11% + { 1536, 1, 32}, // 39 21 1.71% 1.60% 20.00% + { 1664, 1, 32}, // 40 19 3.66% 1.60% 8.33% + { 1920, 1, 32}, // 41 17 0.54% 1.60% 15.38% + { 2048, 1, 32}, // 42 16 0.15% 1.60% 6.67% + { 2176, 1, 30}, // 43 15 0.54% 1.60% 6.25% + { 2432, 1, 26}, // 44 13 3.66% 1.60% 11.76% + { 2688, 1, 24}, // 45 12 1.71% 1.60% 10.53% + { 2944, 1, 22}, // 46 11 1.32% 1.60% 9.52% + { 3200, 1, 20}, // 47 10 2.49% 1.60% 8.70% + { 3584, 1, 18}, // 48 9 1.71% 1.60% 12.00% + { 4096, 1, 16}, // 49 8 0.15% 1.60% 14.29% + { 4608, 1, 14}, // 50 7 1.71% 1.60% 12.50% + { 5376, 1, 12}, // 51 6 1.71% 1.60% 16.67% + { 6528, 1, 10}, // 52 5 0.54% 1.60% 21.43% + { 8192, 1, 8}, // 53 4 0.15% 1.60% 25.49% + { 9344, 2, 7}, // 54 7 0.27% 1.60% 14.06% + { 10880, 1, 6}, // 55 3 0.54% 1.60% 16.44% + { 13056, 2, 5}, // 56 5 0.46% 1.60% 20.00% + { 13952, 3, 4}, // 57 7 0.70% 1.60% 6.86% + { 16384, 1, 4}, // 58 2 0.15% 1.60% 17.43% + { 19072, 3, 3}, // 59 5 3.04% 1.62% 16.41% + { 21760, 2, 3}, // 60 3 0.46% 1.60% 14.09% + { 24576, 3, 2}, // 61 4 0.05% 1.60% 12.94% + { 28672, 7, 2}, // 62 8 0.02% 1.60% 16.67% + { 32768, 1, 2}, // 63 1 0.15% 0.03% 14.29% + { 38144, 5, 2}, // 64 4 6.90% 3.28% 16.41% + { 40960, 4, 2}, // 65 3 6.28% 3.28% 7.38% + { 49152, 3, 2}, // 66 2 0.05% 3.16% 20.00% + { 57344, 7, 2}, // 67 4 0.02% 3.16% 16.67% + { 65536, 2, 2}, // 68 1 0.07% 0.03% 14.29% + { 81920, 5, 2}, // 69 2 0.03% 4.72% 25.00% + { 98304, 3, 2}, // 70 1 0.05% 0.03% 20.00% + {114688, 7, 2}, // 71 2 0.02% 6.28% 16.67% + {131072, 4, 2}, // 72 1 0.04% 0.03% 14.29% + {163840, 5, 2}, // 73 1 0.03% 0.03% 25.00% + {196608, 6, 2}, // 74 1 0.02% 0.03% 20.00% + {229376, 7, 2}, // 75 1 0.02% 0.03% 16.67% + {262144, 8, 2}, // 76 1 0.02% 0.03% 14.29% }; #elif TCMALLOC_PAGE_SHIFT == 18 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 89; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 24, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 40, 1, 32}, // 0.03% - { 48, 1, 32}, // 0.02% - { 56, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 72, 1, 32}, // 0.04% - { 80, 1, 32}, // 0.04% - { 88, 1, 32}, // 0.05% - { 96, 1, 32}, // 0.04% - { 104, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 360, 1, 32}, // 0.04% - { 408, 1, 32}, // 0.10% - { 456, 1, 32}, // 0.17% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10880, 1, 6}, // 0.41% - { 11904, 1, 5}, // 0.12% - { 13056, 1, 5}, // 0.41% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 18688, 1, 3}, // 0.21% - { 21760, 1, 3}, // 0.41% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 32768 0.02% 12.53% 0.00% + { 16, 1, 32}, // 1 16384 0.02% 12.53% 100.00% + { 24, 1, 32}, // 2 10922 0.02% 12.53% 50.00% + { 32, 1, 32}, // 3 8192 0.02% 12.53% 33.33% + { 40, 1, 32}, // 4 6553 0.03% 12.53% 25.00% + { 48, 1, 32}, // 5 5461 0.02% 12.53% 20.00% + { 56, 1, 32}, // 6 4681 0.02% 12.53% 16.67% + { 64, 1, 32}, // 7 4096 0.02% 12.53% 14.29% + { 72, 1, 32}, // 8 3640 0.04% 12.53% 12.50% + { 80, 1, 32}, // 9 3276 0.04% 12.53% 11.11% + { 88, 1, 32}, // 10 2978 0.05% 12.53% 10.00% + { 96, 1, 32}, // 11 2730 0.04% 12.53% 9.09% + { 104, 1, 32}, // 12 2520 0.04% 12.53% 8.33% + { 112, 1, 32}, // 13 2340 0.04% 12.53% 7.69% + { 128, 1, 32}, // 14 2048 0.02% 12.53% 14.29% + { 144, 1, 32}, // 15 1820 0.04% 12.53% 12.50% + { 160, 1, 32}, // 16 1638 0.04% 12.53% 11.11% + { 176, 1, 32}, // 17 1489 0.05% 12.53% 10.00% + { 192, 1, 32}, // 18 1365 0.04% 12.53% 9.09% + { 208, 1, 32}, // 19 1260 0.04% 12.53% 8.33% + { 232, 1, 32}, // 20 1129 0.10% 12.53% 11.54% + { 256, 1, 32}, // 21 1024 0.02% 12.53% 10.34% + { 280, 1, 32}, // 22 936 0.04% 12.53% 9.38% + { 304, 1, 32}, // 23 862 0.05% 12.53% 8.57% + { 336, 1, 32}, // 24 780 0.04% 12.53% 10.53% + { 384, 1, 32}, // 25 682 0.12% 12.53% 14.29% + { 448, 1, 32}, // 26 585 0.04% 12.53% 16.67% + { 480, 1, 32}, // 27 546 0.04% 12.53% 7.14% + { 512, 1, 32}, // 28 512 0.02% 12.53% 6.67% + { 576, 1, 32}, // 29 455 0.04% 12.53% 12.50% + { 640, 1, 32}, // 30 409 0.16% 12.53% 11.11% + { 704, 1, 32}, // 31 372 0.12% 12.53% 10.00% + { 768, 1, 32}, // 32 341 0.12% 12.53% 9.09% + { 896, 1, 32}, // 33 292 0.21% 12.53% 16.67% + { 1024, 1, 32}, // 34 256 0.02% 12.53% 14.29% + { 1152, 1, 32}, // 35 227 0.26% 12.53% 12.50% + { 1280, 1, 32}, // 36 204 0.41% 12.53% 11.11% + { 1408, 1, 32}, // 37 186 0.12% 12.53% 10.00% + { 1664, 1, 32}, // 38 157 0.36% 12.53% 18.18% + { 1920, 1, 32}, // 39 136 0.41% 12.53% 15.38% + { 2048, 1, 32}, // 40 128 0.02% 12.53% 6.67% + { 2304, 1, 28}, // 41 113 0.70% 12.53% 12.50% + { 2560, 1, 25}, // 42 102 0.41% 12.53% 11.11% + { 2688, 1, 24}, // 43 97 0.56% 12.53% 5.00% + { 3072, 1, 21}, // 44 85 0.41% 12.53% 14.29% + { 3328, 1, 19}, // 45 78 0.99% 12.54% 8.33% + { 3584, 1, 18}, // 46 73 0.21% 12.53% 7.69% + { 3840, 1, 17}, // 47 68 0.41% 12.53% 7.14% + { 4096, 1, 16}, // 48 64 0.02% 12.53% 6.67% + { 4224, 1, 15}, // 49 62 0.12% 12.53% 3.12% + { 4480, 1, 14}, // 50 58 0.90% 12.54% 6.06% + { 4736, 1, 13}, // 51 55 0.65% 12.54% 5.71% + { 5120, 1, 12}, // 52 51 0.41% 12.53% 8.11% + { 5504, 1, 11}, // 53 47 1.34% 12.54% 7.50% + { 6144, 1, 10}, // 54 42 1.58% 12.54% 11.63% + { 6528, 1, 10}, // 55 40 0.41% 12.53% 6.25% + { 7168, 1, 9}, // 56 36 1.58% 12.54% 9.80% + { 8192, 1, 8}, // 57 32 0.02% 12.53% 14.29% + { 9344, 1, 7}, // 58 28 0.21% 12.53% 14.06% + { 10368, 1, 6}, // 59 25 1.14% 12.54% 10.96% + { 11392, 1, 5}, // 60 23 0.07% 12.53% 9.88% + { 12416, 1, 5}, // 61 21 0.56% 12.54% 8.99% + { 13696, 1, 4}, // 62 19 0.75% 12.54% 10.31% + { 15360, 1, 4}, // 63 17 0.41% 12.54% 12.15% + { 16384, 1, 4}, // 64 16 0.02% 12.53% 6.67% + { 17408, 1, 3}, // 65 15 0.41% 12.54% 6.25% + { 18688, 1, 3}, // 66 14 0.21% 12.54% 7.35% + { 20096, 1, 3}, // 67 13 0.36% 12.54% 7.53% + { 21760, 1, 3}, // 68 12 0.41% 12.54% 8.28% + { 23808, 1, 2}, // 69 11 0.12% 12.53% 9.41% + { 26112, 1, 2}, // 70 10 0.41% 12.54% 9.68% + { 29056, 1, 2}, // 71 9 0.26% 12.54% 11.27% + { 32768, 1, 2}, // 72 8 0.02% 12.53% 12.78% + { 37376, 1, 2}, // 73 7 0.21% 12.54% 14.06% + { 43648, 1, 2}, // 74 6 0.12% 12.54% 16.78% + { 45568, 2, 2}, // 75 11 4.40% 12.63% 4.40% + { 52352, 1, 2}, // 76 5 0.16% 12.54% 14.89% + { 56064, 2, 2}, // 77 9 3.77% 12.63% 7.09% + { 65536, 1, 2}, // 78 4 0.02% 12.53% 16.89% + { 74880, 2, 2}, // 79 7 0.03% 12.53% 14.26% + { 87296, 1, 2}, // 80 3 0.12% 12.54% 16.58% + {104832, 2, 2}, // 81 5 0.03% 12.54% 20.09% + {112256, 3, 2}, // 82 7 0.09% 12.54% 7.08% + {131072, 1, 2}, // 83 2 0.02% 12.53% 16.76% + {149760, 3, 2}, // 84 5 4.79% 12.88% 14.26% + {174720, 2, 2}, // 85 3 0.03% 12.54% 16.67% + {209664, 4, 2}, // 86 5 0.03% 12.54% 20.00% + {262144, 1, 2}, // 87 1 0.02% 0.03% 25.03% }; #elif TCMALLOC_PAGE_SHIFT == 12 static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 46; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 24, 1, 32}, // 1.57% - { 32, 1, 32}, // 1.17% - { 40, 1, 32}, // 1.57% - { 48, 1, 32}, // 1.57% - { 56, 1, 32}, // 1.37% - { 64, 1, 32}, // 1.17% - { 72, 1, 32}, // 2.78% - { 80, 1, 32}, // 1.57% - { 88, 1, 32}, // 2.37% - { 96, 1, 32}, // 2.78% - { 104, 1, 32}, // 2.17% - { 120, 1, 32}, // 1.57% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 184, 1, 32}, // 2.37% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 312, 1, 32}, // 2.17% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 408, 1, 32}, // 1.57% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = false, + .span_size = 48, + .sampling_interval = 524288, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 512 1.16% 0.92% 0.00% + { 16, 1, 32}, // 1 256 1.16% 0.92% 100.00% + { 24, 1, 32}, // 2 170 1.54% 0.92% 50.00% + { 32, 1, 32}, // 3 128 1.16% 0.92% 33.33% + { 40, 1, 32}, // 4 102 1.54% 0.92% 25.00% + { 48, 1, 32}, // 5 85 1.54% 0.92% 20.00% + { 56, 1, 32}, // 6 73 1.35% 0.92% 16.67% + { 64, 1, 32}, // 7 64 1.16% 0.92% 14.29% + { 72, 1, 32}, // 8 56 2.70% 0.92% 12.50% + { 80, 1, 32}, // 9 51 1.54% 0.92% 11.11% + { 88, 1, 32}, // 10 46 2.32% 0.92% 10.00% + { 96, 1, 32}, // 11 42 2.70% 0.92% 9.09% + { 104, 1, 32}, // 12 39 2.12% 0.92% 8.33% + { 112, 1, 32}, // 13 36 2.70% 0.92% 7.69% + { 128, 1, 32}, // 14 32 1.16% 0.92% 14.29% + { 144, 1, 32}, // 15 28 2.70% 0.92% 12.50% + { 160, 1, 32}, // 16 25 3.47% 0.92% 11.11% + { 176, 1, 32}, // 17 23 2.32% 0.92% 10.00% + { 192, 1, 32}, // 18 21 2.70% 0.92% 9.09% + { 208, 1, 32}, // 19 19 4.63% 0.92% 8.33% + { 224, 1, 32}, // 20 18 2.70% 0.92% 7.69% + { 240, 1, 32}, // 21 17 1.54% 0.92% 7.14% + { 256, 1, 32}, // 22 16 1.16% 0.92% 6.67% + { 272, 1, 32}, // 23 15 1.54% 0.92% 6.25% + { 312, 1, 32}, // 24 13 2.12% 0.92% 14.71% + { 336, 1, 32}, // 25 12 2.70% 0.92% 7.69% + { 368, 1, 32}, // 26 11 2.32% 0.92% 9.52% + { 448, 1, 32}, // 27 9 2.70% 0.92% 21.74% + { 512, 1, 32}, // 28 8 1.16% 0.92% 14.29% + { 576, 2, 32}, // 29 14 2.14% 0.92% 12.50% + { 704, 2, 32}, // 30 11 6.02% 0.92% 22.22% + { 768, 2, 32}, // 31 10 6.80% 0.93% 9.09% + { 896, 2, 32}, // 32 9 2.14% 0.92% 16.67% + { 1024, 2, 32}, // 33 8 0.58% 0.92% 14.29% + { 1152, 3, 32}, // 34 10 6.61% 0.93% 12.50% + { 1536, 3, 32}, // 35 8 0.39% 0.92% 33.33% + { 1792, 4, 32}, // 36 9 1.85% 0.92% 16.67% + { 2048, 4, 32}, // 37 8 0.29% 0.92% 14.29% + { 2688, 4, 24}, // 38 6 1.85% 0.93% 31.25% + { 3200, 4, 20}, // 39 5 2.63% 0.93% 19.05% + { 4096, 4, 16}, // 40 4 0.29% 0.92% 28.00% + { 4736, 5, 13}, // 41 4 7.72% 1.77% 15.62% + { 6144, 3, 10}, // 42 2 0.39% 1.70% 29.73% + { 7168, 7, 9}, // 43 4 0.17% 1.70% 16.67% + { 8192, 4, 8}, // 44 2 0.29% 1.70% 14.29% }; #else #error "Unsupported TCMALLOC_PAGE_SHIFT value!" @@ -371,334 +402,354 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount #else #if TCMALLOC_PAGE_SHIFT == 13 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 86; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 48, 1, 32}, // 0.98% - { 64, 1, 32}, // 0.59% - { 80, 1, 32}, // 0.98% - { 96, 1, 32}, // 0.98% - { 112, 1, 32}, // 0.78% - { 128, 1, 32}, // 0.59% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 288, 1, 32}, // 2.18% - { 304, 1, 32}, // 4.25% - { 320, 1, 32}, // 3.00% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 384, 1, 32}, // 2.18% - { 400, 1, 32}, // 3.00% - { 416, 1, 32}, // 4.25% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 2, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 6784, 5, 9}, // 0.75% - { 7168, 7, 9}, // 0.08% - { 8192, 2, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 90112, 11, 2}, // 0.05% - { 98304, 12, 2}, // 0.05% - { 106496, 13, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 139264, 17, 2}, // 0.03% - { 155648, 19, 2}, // 0.03% - { 172032, 21, 2}, // 0.03% - { 188416, 23, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 221184, 27, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 1024 0.58% 0.42% 0.00% + { 16, 1, 32}, // 1 512 0.58% 0.42% 100.00% + { 32, 1, 32}, // 2 256 0.58% 0.42% 100.00% + { 48, 1, 32}, // 3 170 0.97% 0.42% 50.00% + { 64, 1, 32}, // 4 128 0.58% 0.42% 33.33% + { 80, 1, 32}, // 5 102 0.97% 0.42% 25.00% + { 96, 1, 32}, // 6 85 0.97% 0.42% 20.00% + { 112, 1, 32}, // 7 73 0.78% 0.42% 16.67% + { 128, 1, 32}, // 8 64 0.58% 0.42% 14.29% + { 144, 1, 32}, // 9 56 2.14% 0.42% 12.50% + { 160, 1, 32}, // 10 51 0.97% 0.42% 11.11% + { 176, 1, 32}, // 11 46 1.75% 0.42% 10.00% + { 192, 1, 32}, // 12 42 2.14% 0.42% 9.09% + { 208, 1, 32}, // 13 39 1.55% 0.42% 8.33% + { 224, 1, 32}, // 14 36 2.14% 0.42% 7.69% + { 240, 1, 32}, // 15 34 0.97% 0.42% 7.14% + { 256, 1, 32}, // 16 32 0.58% 0.42% 6.67% + { 272, 1, 32}, // 17 30 0.97% 0.42% 6.25% + { 288, 1, 32}, // 18 28 2.14% 0.42% 5.88% + { 304, 1, 32}, // 19 26 4.08% 0.42% 5.56% + { 320, 1, 32}, // 20 25 2.91% 0.42% 5.26% + { 336, 1, 32}, // 21 24 2.14% 0.42% 5.00% + { 352, 1, 32}, // 22 23 1.75% 0.42% 4.76% + { 368, 1, 32}, // 23 22 1.75% 0.42% 4.55% + { 384, 1, 32}, // 24 21 2.14% 0.42% 4.35% + { 400, 1, 32}, // 25 20 2.91% 0.42% 4.17% + { 416, 1, 32}, // 26 19 4.08% 0.43% 4.00% + { 448, 1, 32}, // 27 18 2.14% 0.42% 7.69% + { 480, 1, 32}, // 28 17 0.97% 0.42% 7.14% + { 512, 1, 32}, // 29 16 0.58% 0.42% 6.67% + { 576, 1, 32}, // 30 14 2.14% 0.42% 12.50% + { 640, 1, 32}, // 31 12 6.80% 0.43% 11.11% + { 704, 1, 32}, // 32 11 6.02% 0.43% 10.00% + { 768, 1, 32}, // 33 10 6.80% 0.43% 9.09% + { 896, 1, 32}, // 34 9 2.14% 0.43% 16.67% + { 1024, 1, 32}, // 35 8 0.58% 0.42% 14.29% + { 1152, 2, 32}, // 36 14 1.85% 0.43% 12.50% + { 1280, 2, 32}, // 37 12 6.52% 0.43% 11.11% + { 1408, 2, 32}, // 38 11 5.74% 0.43% 10.00% + { 1536, 2, 32}, // 39 10 6.52% 0.43% 9.09% + { 1792, 2, 32}, // 40 9 1.85% 0.43% 16.67% + { 2048, 2, 32}, // 41 8 0.29% 0.42% 14.29% + { 2304, 2, 28}, // 42 7 1.85% 0.43% 12.50% + { 2688, 2, 24}, // 43 6 1.85% 0.43% 16.67% + { 2816, 3, 23}, // 44 8 8.51% 0.44% 4.76% + { 3200, 2, 20}, // 45 5 2.63% 0.43% 13.64% + { 3456, 3, 18}, // 46 7 1.75% 0.43% 8.00% + { 3584, 4, 18}, // 47 9 1.71% 0.43% 3.70% + { 4096, 1, 16}, // 48 2 0.58% 0.43% 14.29% + { 4736, 3, 13}, // 49 5 3.83% 0.43% 15.62% + { 5376, 2, 12}, // 50 3 1.85% 0.43% 13.51% + { 6144, 3, 10}, // 51 4 0.19% 0.42% 14.29% + { 6528, 4, 10}, // 52 5 0.54% 0.43% 6.25% + { 7168, 7, 9}, // 53 8 0.08% 0.42% 9.80% + { 8192, 1, 8}, // 54 1 0.58% 0.03% 14.29% + { 9472, 5, 6}, // 55 4 7.61% 0.85% 15.62% + { 10240, 4, 6}, // 56 3 6.39% 0.85% 8.11% + { 12288, 3, 5}, // 57 2 0.19% 0.82% 20.00% + { 13568, 5, 4}, // 58 3 0.74% 0.82% 10.42% + { 14336, 7, 4}, // 59 4 0.08% 0.82% 5.66% + { 16384, 2, 4}, // 60 1 0.29% 0.03% 14.29% + { 20480, 5, 3}, // 61 2 0.12% 1.21% 25.00% + { 24576, 3, 2}, // 62 1 0.19% 0.03% 20.00% + { 28672, 7, 2}, // 63 2 0.08% 1.60% 16.67% + { 32768, 4, 2}, // 64 1 0.15% 0.03% 14.29% + { 40960, 5, 2}, // 65 1 0.12% 0.03% 25.00% + { 49152, 6, 2}, // 66 1 0.10% 0.03% 20.00% + { 57344, 7, 2}, // 67 1 0.08% 0.03% 16.67% + { 65536, 8, 2}, // 68 1 0.07% 0.03% 14.29% + { 73728, 9, 2}, // 69 1 0.07% 0.03% 12.50% + { 81920, 10, 2}, // 70 1 0.06% 0.03% 11.11% + { 90112, 11, 2}, // 71 1 0.05% 0.03% 10.00% + { 98304, 12, 2}, // 72 1 0.05% 0.03% 9.09% + {106496, 13, 2}, // 73 1 0.05% 0.03% 8.33% + {114688, 14, 2}, // 74 1 0.04% 0.03% 7.69% + {131072, 16, 2}, // 75 1 0.04% 0.03% 14.29% + {139264, 17, 2}, // 76 1 0.03% 0.03% 6.25% + {147456, 18, 2}, // 77 1 0.03% 0.03% 5.88% + {155648, 19, 2}, // 78 1 0.03% 0.03% 5.56% + {172032, 21, 2}, // 79 1 0.03% 0.03% 10.53% + {188416, 23, 2}, // 80 1 0.03% 0.03% 9.52% + {204800, 25, 2}, // 81 1 0.02% 0.03% 8.70% + {221184, 27, 2}, // 82 1 0.02% 0.03% 8.00% + {237568, 29, 2}, // 83 1 0.02% 0.03% 7.41% + {262144, 32, 2}, // 84 1 0.02% 0.03% 10.34% }; #elif TCMALLOC_PAGE_SHIFT == 15 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 78; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 48, 1, 32}, // 0.24% - { 64, 1, 32}, // 0.15% - { 80, 1, 32}, // 0.29% - { 96, 1, 32}, // 0.24% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 272, 1, 32}, // 0.54% - { 288, 1, 32}, // 0.84% - { 304, 1, 32}, // 0.89% - { 320, 1, 32}, // 0.54% - { 336, 1, 32}, // 0.69% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 480, 1, 32}, // 0.54% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 768, 1, 32}, // 1.74% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1408, 1, 32}, // 1.33% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2432, 1, 26}, // 3.80% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13056, 2, 5}, // 0.47% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 4096 0.15% 1.60% 0.00% + { 16, 1, 32}, // 1 2048 0.15% 1.60% 100.00% + { 32, 1, 32}, // 2 1024 0.15% 1.60% 100.00% + { 48, 1, 32}, // 3 682 0.24% 1.60% 50.00% + { 64, 1, 32}, // 4 512 0.15% 1.60% 33.33% + { 80, 1, 32}, // 5 409 0.29% 1.60% 25.00% + { 96, 1, 32}, // 6 341 0.24% 1.60% 20.00% + { 112, 1, 32}, // 7 292 0.34% 1.60% 16.67% + { 128, 1, 32}, // 8 256 0.15% 1.60% 14.29% + { 144, 1, 32}, // 9 227 0.39% 1.60% 12.50% + { 160, 1, 32}, // 10 204 0.54% 1.60% 11.11% + { 176, 1, 32}, // 11 186 0.24% 1.60% 10.00% + { 192, 1, 32}, // 12 170 0.54% 1.60% 9.09% + { 208, 1, 32}, // 13 157 0.49% 1.60% 8.33% + { 224, 1, 32}, // 14 146 0.34% 1.60% 7.69% + { 240, 1, 32}, // 15 136 0.54% 1.60% 7.14% + { 256, 1, 32}, // 16 128 0.15% 1.60% 6.67% + { 288, 1, 32}, // 17 113 0.83% 1.60% 12.50% + { 320, 1, 32}, // 18 102 0.54% 1.60% 11.11% + { 352, 1, 32}, // 19 93 0.24% 1.60% 10.00% + { 384, 1, 32}, // 20 85 0.54% 1.60% 9.09% + { 400, 1, 32}, // 21 81 1.27% 1.60% 4.17% + { 448, 1, 32}, // 22 73 0.34% 1.60% 12.00% + { 480, 1, 32}, // 23 68 0.54% 1.60% 7.14% + { 512, 1, 32}, // 24 64 0.15% 1.60% 6.67% + { 576, 1, 32}, // 25 56 1.71% 1.60% 12.50% + { 640, 1, 32}, // 26 51 0.54% 1.60% 11.11% + { 704, 1, 32}, // 27 46 1.32% 1.60% 10.00% + { 768, 1, 32}, // 28 42 1.71% 1.60% 9.09% + { 832, 1, 32}, // 29 39 1.12% 1.60% 8.33% + { 896, 1, 32}, // 30 36 1.71% 1.60% 7.69% + { 1024, 1, 32}, // 31 32 0.15% 1.60% 14.29% + { 1152, 1, 32}, // 32 28 1.71% 1.60% 12.50% + { 1280, 1, 32}, // 33 25 2.49% 1.60% 11.11% + { 1408, 1, 32}, // 34 23 1.32% 1.60% 10.00% + { 1536, 1, 32}, // 35 21 1.71% 1.60% 9.09% + { 1664, 1, 32}, // 36 19 3.66% 1.60% 8.33% + { 1792, 1, 32}, // 37 18 1.71% 1.60% 7.69% + { 1920, 1, 32}, // 38 17 0.54% 1.60% 7.14% + { 2048, 1, 32}, // 39 16 0.15% 1.60% 6.67% + { 2176, 1, 30}, // 40 15 0.54% 1.60% 6.25% + { 2304, 1, 28}, // 41 14 1.71% 1.60% 5.88% + { 2432, 1, 26}, // 42 13 3.66% 1.60% 5.56% + { 2688, 1, 24}, // 43 12 1.71% 1.60% 10.53% + { 2944, 1, 22}, // 44 11 1.32% 1.60% 9.52% + { 3200, 1, 20}, // 45 10 2.49% 1.60% 8.70% + { 3584, 1, 18}, // 46 9 1.71% 1.60% 12.00% + { 4096, 1, 16}, // 47 8 0.15% 1.60% 14.29% + { 4608, 1, 14}, // 48 7 1.71% 1.60% 12.50% + { 5376, 1, 12}, // 49 6 1.71% 1.60% 16.67% + { 6528, 1, 10}, // 50 5 0.54% 1.60% 21.43% + { 7168, 2, 9}, // 51 9 1.63% 1.60% 9.80% + { 8192, 1, 8}, // 52 4 0.15% 1.60% 14.29% + { 9344, 2, 7}, // 53 7 0.27% 1.60% 14.06% + { 10880, 1, 6}, // 54 3 0.54% 1.60% 16.44% + { 13056, 2, 5}, // 55 5 0.46% 1.60% 20.00% + { 13952, 3, 4}, // 56 7 0.70% 1.60% 6.86% + { 16384, 1, 4}, // 57 2 0.15% 1.60% 17.43% + { 19072, 3, 3}, // 58 5 3.04% 1.62% 16.41% + { 21760, 2, 3}, // 59 3 0.46% 1.60% 14.09% + { 24576, 3, 2}, // 60 4 0.05% 1.60% 12.94% + { 26112, 4, 2}, // 61 5 0.43% 1.60% 6.25% + { 28672, 7, 2}, // 62 8 0.02% 1.60% 9.80% + { 32768, 1, 2}, // 63 1 0.15% 0.03% 14.29% + { 38144, 5, 2}, // 64 4 6.90% 3.28% 16.41% + { 40960, 4, 2}, // 65 3 6.28% 3.28% 7.38% + { 49152, 3, 2}, // 66 2 0.05% 3.16% 20.00% + { 57344, 7, 2}, // 67 4 0.02% 3.16% 16.67% + { 65536, 2, 2}, // 68 1 0.07% 0.03% 14.29% + { 81920, 5, 2}, // 69 2 0.03% 4.72% 25.00% + { 98304, 3, 2}, // 70 1 0.05% 0.03% 20.00% + {114688, 7, 2}, // 71 2 0.02% 6.28% 16.67% + {131072, 4, 2}, // 72 1 0.04% 0.03% 14.29% + {163840, 5, 2}, // 73 1 0.03% 0.03% 25.00% + {196608, 6, 2}, // 74 1 0.02% 0.03% 20.00% + {229376, 7, 2}, // 75 1 0.02% 0.03% 16.67% + {262144, 8, 2}, // 76 1 0.02% 0.03% 14.29% }; #elif TCMALLOC_PAGE_SHIFT == 18 static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 89; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 48, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 80, 1, 32}, // 0.04% - { 96, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 368, 1, 32}, // 0.07% - { 416, 1, 32}, // 0.04% - { 464, 1, 32}, // 0.19% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1408, 1, 32}, // 0.12% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3200, 1, 20}, // 1.15% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10368, 1, 6}, // 1.15% - { 11392, 1, 5}, // 0.07% - { 12416, 1, 5}, // 0.56% - { 13696, 1, 4}, // 0.76% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 17408, 1, 3}, // 0.41% - { 20096, 1, 3}, // 0.36% - { 21760, 1, 3}, // 0.41% - { 23808, 1, 2}, // 0.12% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 196608, 3, 2}, // 0.01% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = true, + .span_size = 48, + .sampling_interval = 2097152, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 32768 0.02% 12.53% 0.00% + { 16, 1, 32}, // 1 16384 0.02% 12.53% 100.00% + { 32, 1, 32}, // 2 8192 0.02% 12.53% 100.00% + { 48, 1, 32}, // 3 5461 0.02% 12.53% 50.00% + { 64, 1, 32}, // 4 4096 0.02% 12.53% 33.33% + { 80, 1, 32}, // 5 3276 0.04% 12.53% 25.00% + { 96, 1, 32}, // 6 2730 0.04% 12.53% 20.00% + { 112, 1, 32}, // 7 2340 0.04% 12.53% 16.67% + { 128, 1, 32}, // 8 2048 0.02% 12.53% 14.29% + { 144, 1, 32}, // 9 1820 0.04% 12.53% 12.50% + { 160, 1, 32}, // 10 1638 0.04% 12.53% 11.11% + { 176, 1, 32}, // 11 1489 0.05% 12.53% 10.00% + { 192, 1, 32}, // 12 1365 0.04% 12.53% 9.09% + { 208, 1, 32}, // 13 1260 0.04% 12.53% 8.33% + { 224, 1, 32}, // 14 1170 0.04% 12.53% 7.69% + { 240, 1, 32}, // 15 1092 0.04% 12.53% 7.14% + { 256, 1, 32}, // 16 1024 0.02% 12.53% 6.67% + { 272, 1, 32}, // 17 963 0.10% 12.53% 6.25% + { 304, 1, 32}, // 18 862 0.05% 12.53% 11.76% + { 336, 1, 32}, // 19 780 0.04% 12.53% 10.53% + { 384, 1, 32}, // 20 682 0.12% 12.53% 14.29% + { 416, 1, 32}, // 21 630 0.04% 12.53% 8.33% + { 448, 1, 32}, // 22 585 0.04% 12.53% 7.69% + { 480, 1, 32}, // 23 546 0.04% 12.53% 7.14% + { 512, 1, 32}, // 24 512 0.02% 12.53% 6.67% + { 576, 1, 32}, // 25 455 0.04% 12.53% 12.50% + { 640, 1, 32}, // 26 409 0.16% 12.53% 11.11% + { 704, 1, 32}, // 27 372 0.12% 12.53% 10.00% + { 768, 1, 32}, // 28 341 0.12% 12.53% 9.09% + { 896, 1, 32}, // 29 292 0.21% 12.53% 16.67% + { 1024, 1, 32}, // 30 256 0.02% 12.53% 14.29% + { 1152, 1, 32}, // 31 227 0.26% 12.53% 12.50% + { 1280, 1, 32}, // 32 204 0.41% 12.53% 11.11% + { 1408, 1, 32}, // 33 186 0.12% 12.53% 10.00% + { 1536, 1, 32}, // 34 170 0.41% 12.53% 9.09% + { 1664, 1, 32}, // 35 157 0.36% 12.53% 8.33% + { 1920, 1, 32}, // 36 136 0.41% 12.53% 15.38% + { 2048, 1, 32}, // 37 128 0.02% 12.53% 6.67% + { 2304, 1, 28}, // 38 113 0.70% 12.53% 12.50% + { 2560, 1, 25}, // 39 102 0.41% 12.53% 11.11% + { 2688, 1, 24}, // 40 97 0.56% 12.53% 5.00% + { 3072, 1, 21}, // 41 85 0.41% 12.53% 14.29% + { 3328, 1, 19}, // 42 78 0.99% 12.54% 8.33% + { 3584, 1, 18}, // 43 73 0.21% 12.53% 7.69% + { 3840, 1, 17}, // 44 68 0.41% 12.53% 7.14% + { 4096, 1, 16}, // 45 64 0.02% 12.53% 6.67% + { 4224, 1, 15}, // 46 62 0.12% 12.53% 3.12% + { 4480, 1, 14}, // 47 58 0.90% 12.54% 6.06% + { 4736, 1, 13}, // 48 55 0.65% 12.54% 5.71% + { 5120, 1, 12}, // 49 51 0.41% 12.53% 8.11% + { 5504, 1, 11}, // 50 47 1.34% 12.54% 7.50% + { 6144, 1, 10}, // 51 42 1.58% 12.54% 11.63% + { 6528, 1, 10}, // 52 40 0.41% 12.53% 6.25% + { 7168, 1, 9}, // 53 36 1.58% 12.54% 9.80% + { 8192, 1, 8}, // 54 32 0.02% 12.53% 14.29% + { 8704, 1, 7}, // 55 30 0.41% 12.54% 6.25% + { 9344, 1, 7}, // 56 28 0.21% 12.53% 7.35% + { 10368, 1, 6}, // 57 25 1.14% 12.54% 10.96% + { 11392, 1, 5}, // 58 23 0.07% 12.53% 9.88% + { 12416, 1, 5}, // 59 21 0.56% 12.54% 8.99% + { 13056, 1, 5}, // 60 20 0.41% 12.54% 5.15% + { 13696, 1, 4}, // 61 19 0.75% 12.54% 4.90% + { 15360, 1, 4}, // 62 17 0.41% 12.54% 12.15% + { 16384, 1, 4}, // 63 16 0.02% 12.53% 6.67% + { 17408, 1, 3}, // 64 15 0.41% 12.54% 6.25% + { 18688, 1, 3}, // 65 14 0.21% 12.54% 7.35% + { 20096, 1, 3}, // 66 13 0.36% 12.54% 7.53% + { 21760, 1, 3}, // 67 12 0.41% 12.54% 8.28% + { 23808, 1, 2}, // 68 11 0.12% 12.53% 9.41% + { 26112, 1, 2}, // 69 10 0.41% 12.54% 9.68% + { 29056, 1, 2}, // 70 9 0.26% 12.54% 11.27% + { 32768, 1, 2}, // 71 8 0.02% 12.53% 12.78% + { 37376, 1, 2}, // 72 7 0.21% 12.54% 14.06% + { 43648, 1, 2}, // 73 6 0.12% 12.54% 16.78% + { 45568, 2, 2}, // 74 11 4.40% 12.63% 4.40% + { 52352, 1, 2}, // 75 5 0.16% 12.54% 14.89% + { 56064, 2, 2}, // 76 9 3.77% 12.63% 7.09% + { 65536, 1, 2}, // 77 4 0.02% 12.53% 16.89% + { 74880, 2, 2}, // 78 7 0.03% 12.53% 14.26% + { 87296, 1, 2}, // 79 3 0.12% 12.54% 16.58% + {104832, 2, 2}, // 80 5 0.03% 12.54% 20.09% + {112256, 3, 2}, // 81 7 0.09% 12.54% 7.08% + {131072, 1, 2}, // 82 2 0.02% 12.53% 16.76% + {149760, 3, 2}, // 83 5 4.79% 12.88% 14.26% + {174720, 2, 2}, // 84 3 0.03% 12.54% 16.67% + {196608, 3, 2}, // 85 4 0.01% 12.53% 12.53% + {209664, 4, 2}, // 86 5 0.03% 12.54% 6.64% + {262144, 1, 2}, // 87 1 0.02% 0.03% 25.03% }; #elif TCMALLOC_PAGE_SHIFT == 12 static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 46; -static_assert(kCount <= kNumClasses); -const int SizeMap::kLegacySizeClassesCount = kCount; -const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = { - // , , - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 48, 1, 32}, // 1.57% - { 64, 1, 32}, // 1.17% - { 80, 1, 32}, // 1.57% - { 96, 1, 32}, // 2.78% - { 112, 1, 32}, // 2.78% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 176, 1, 32}, // 2.37% - { 192, 1, 32}, // 2.78% - { 208, 1, 32}, // 4.86% - { 224, 1, 32}, // 2.78% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 288, 1, 32}, // 2.78% - { 304, 1, 32}, // 4.86% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 400, 1, 32}, // 3.60% - { 448, 1, 32}, // 2.78% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 640, 2, 32}, // 7.29% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3200, 4, 20}, // 2.70% - { 3584, 7, 18}, // 0.17% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% +static constexpr SizeClassAssumptions Assumptions{ + .has_expanded_classes = false, + .span_size = 48, + .sampling_interval = 524288, + .large_size = 1024, + .large_size_alignment = 128, +}; +static constexpr SizeClassInfo List[] = { +// | waste | +// bytes pages batch class objs |fixed sampling| inc + { 0, 0, 0}, // 0 0 0.00% 0.00% 0.00% + { 8, 1, 32}, // 0 512 1.16% 0.92% 0.00% + { 16, 1, 32}, // 1 256 1.16% 0.92% 100.00% + { 32, 1, 32}, // 2 128 1.16% 0.92% 100.00% + { 48, 1, 32}, // 3 85 1.54% 0.92% 50.00% + { 64, 1, 32}, // 4 64 1.16% 0.92% 33.33% + { 80, 1, 32}, // 5 51 1.54% 0.92% 25.00% + { 96, 1, 32}, // 6 42 2.70% 0.92% 20.00% + { 112, 1, 32}, // 7 36 2.70% 0.92% 16.67% + { 128, 1, 32}, // 8 32 1.16% 0.92% 14.29% + { 144, 1, 32}, // 9 28 2.70% 0.92% 12.50% + { 160, 1, 32}, // 10 25 3.47% 0.92% 11.11% + { 176, 1, 32}, // 11 23 2.32% 0.92% 10.00% + { 192, 1, 32}, // 12 21 2.70% 0.92% 9.09% + { 208, 1, 32}, // 13 19 4.63% 0.92% 8.33% + { 224, 1, 32}, // 14 18 2.70% 0.92% 7.69% + { 240, 1, 32}, // 15 17 1.54% 0.92% 7.14% + { 256, 1, 32}, // 16 16 1.16% 0.92% 6.67% + { 272, 1, 32}, // 17 15 1.54% 0.92% 6.25% + { 288, 1, 32}, // 18 14 2.70% 0.92% 5.88% + { 304, 1, 32}, // 19 13 4.63% 0.92% 5.56% + { 336, 1, 32}, // 20 12 2.70% 0.92% 10.53% + { 368, 1, 32}, // 21 11 2.32% 0.92% 9.52% + { 448, 1, 32}, // 22 9 2.70% 0.92% 21.74% + { 512, 1, 32}, // 23 8 1.16% 0.92% 14.29% + { 576, 2, 32}, // 24 14 2.14% 0.92% 12.50% + { 640, 2, 32}, // 25 12 6.80% 0.92% 11.11% + { 704, 2, 32}, // 26 11 6.02% 0.92% 10.00% + { 768, 2, 32}, // 27 10 6.80% 0.93% 9.09% + { 896, 2, 32}, // 28 9 2.14% 0.92% 16.67% + { 1024, 2, 32}, // 29 8 0.58% 0.92% 14.29% + { 1152, 3, 32}, // 30 10 6.61% 0.93% 12.50% + { 1280, 3, 32}, // 31 9 6.61% 0.93% 11.11% + { 1536, 3, 32}, // 32 8 0.39% 0.92% 20.00% + { 1792, 4, 32}, // 33 9 1.85% 0.92% 16.67% + { 2048, 4, 32}, // 34 8 0.29% 0.92% 14.29% + { 2304, 4, 28}, // 35 7 1.85% 0.92% 12.50% + { 2688, 4, 24}, // 36 6 1.85% 0.93% 16.67% + { 3200, 4, 20}, // 37 5 2.63% 0.93% 19.05% + { 3584, 7, 18}, // 38 8 0.17% 0.92% 12.00% + { 4096, 4, 16}, // 39 4 0.29% 0.92% 14.29% + { 4736, 5, 13}, // 40 4 7.72% 1.77% 15.62% + { 5376, 4, 12}, // 41 3 1.85% 1.72% 13.51% + { 6144, 3, 10}, // 42 2 0.39% 1.70% 14.29% + { 7168, 7, 9}, // 43 4 0.17% 1.70% 16.67% + { 8192, 4, 8}, // 44 2 0.29% 1.70% 14.29% }; #else #error "Unsupported TCMALLOC_PAGE_SHIFT value!" @@ -706,6 +757,9 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount #endif // clang-format on +static_assert(sizeof(List) / sizeof(List[0]) <= kNumBaseClasses); +extern constexpr SizeClasses kLegacySizeClasses{List, Assumptions}; + } // namespace tcmalloc_internal } // namespace tcmalloc GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h index 89f8e4e5c817..ec99f52567ca 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,15 +26,193 @@ #define TCMALLOC_LIBC_OVERRIDE_H_ #include +#include +#include -#include "tcmalloc/tcmalloc.h" +#include +#include +#include + +#include "tcmalloc/tcmalloc.h" // IWYU pragma: keep + +#define TCMALLOC_ALIAS(tc_fn) \ + __attribute__((alias(#tc_fn), visibility("default"))) + +// NOLINTBEGIN(misc-definitions-in-headers) #if defined(__GLIBC__) -#include "tcmalloc/libc_override_glibc.h" + +#define TCMALLOC_NOTHROW noexcept + +extern "C" { + +void* __libc_malloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMalloc); +void __libc_free(void* ptr) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalFree); +void* __libc_realloc(void* ptr, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalRealloc); +void* __libc_calloc(size_t n, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalCalloc); +void __libc_cfree(void* ptr) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalCfree); +void* __libc_memalign(size_t align, size_t s) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMemalign); +void* __libc_valloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalValloc); +void* __libc_pvalloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalPvalloc); +int __posix_memalign(void** r, size_t a, size_t s) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); + +// We also have to hook libc malloc. While our work with weak symbols +// should make sure libc malloc is never called in most situations, it +// can be worked around by shared libraries with the DEEPBIND +// environment variable set. The below hooks libc to call our malloc +// routines even in that situation. In other situations, this hook +// should never be called. + +static void* glibc_override_malloc(size_t size, const void* caller) { + return TCMallocInternalMalloc(size); +} +static void* glibc_override_realloc(void* ptr, size_t size, + const void* caller) { + return TCMallocInternalRealloc(ptr, size); +} +static void glibc_override_free(void* ptr, const void* caller) { + TCMallocInternalFree(ptr); +} +static void* glibc_override_memalign(size_t align, size_t size, + const void* caller) { + return TCMallocInternalMemalign(align, size); +} + +// We should be using __malloc_initialize_hook here. (See +// http://swoolley.org/man.cgi/3/malloc_hook.) However, this causes weird +// linker errors with programs that link with -static, so instead we just assign +// the vars directly at static-constructor time. That should serve the same +// effect of making sure the hooks are set before the first malloc call the +// program makes. + +// Glibc-2.14 and above make __malloc_hook and friends volatile +#ifndef __MALLOC_HOOK_VOLATILE +#define __MALLOC_HOOK_VOLATILE /**/ +#endif + +void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(size_t, const void*) = + &glibc_override_malloc; +void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(void*, size_t, const void*) = + &glibc_override_realloc; +void (*__MALLOC_HOOK_VOLATILE __free_hook)(void*, + const void*) = &glibc_override_free; +void* (*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void*) = + &glibc_override_memalign; + +} // extern "C" #else -#include "tcmalloc/libc_override_redefine.h" +#define TCMALLOC_NOTHROW + +#endif // defined(__GLIBC__) + +void* operator new(size_t size) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNew); +void operator delete(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete); +void operator delete(void* p, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteSized); +void* operator new[](size_t size) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewArray); +void operator delete[](void* p) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArray); +void operator delete[](void* p, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArraySized); +void* operator new(size_t size, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewNothrow); +void* operator new[](size_t size, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewArrayNothrow); +void operator delete(void* p, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteNothrow); +void operator delete[](void* p, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayNothrow); + +void* operator new(size_t size, std::align_val_t alignment) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewAligned); +void* operator new(size_t size, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewAlignedNothrow); +void operator delete(void* p, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteAligned); +void operator delete(void* p, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteAlignedNothrow); +void operator delete(void* p, size_t size, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned); +void* operator new[](size_t size, std::align_val_t alignment) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned); +void* operator new[](size_t size, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewArrayAlignedNothrow); +void operator delete[](void* p, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned); +void operator delete[](void* p, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAlignedNothrow); +void operator delete[](void* p, size_t size, + std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArraySizedAligned); + +extern "C" { + +void* malloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMalloc); +void free(void* ptr) TCMALLOC_NOTHROW TCMALLOC_ALIAS(TCMallocInternalFree); +void free_sized(void* ptr, size_t size) + TCMALLOC_ALIAS(TCMallocInternalFreeSized); +void free_aligned_sized(void* ptr, size_t align, size_t size) + TCMALLOC_ALIAS(TCMallocInternalFreeAlignedSized); +void sdallocx(void* ptr, size_t size, int flags) noexcept + TCMALLOC_ALIAS(TCMallocInternalSdallocx); +void* realloc(void* ptr, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalRealloc); +void* reallocarray(void* ptr, size_t n, size_t size) + TCMALLOC_ALIAS(TCMallocInternalReallocArray); +void* calloc(size_t n, size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalCalloc); +void cfree(void* ptr) TCMALLOC_NOTHROW TCMALLOC_ALIAS(TCMallocInternalCfree); +void* memalign(size_t align, size_t s) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMemalign); +void* aligned_alloc(size_t align, size_t s) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalAlignedAlloc); +void* valloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalValloc); +void* pvalloc(size_t size) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalPvalloc); +int posix_memalign(void** r, size_t a, size_t s) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); +void malloc_stats(void) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallocStats); +int malloc_trim(size_t pad) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallocTrim); +int mallopt(int cmd, int value) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallOpt); +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +struct mallinfo mallinfo(void) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallInfo); +#endif +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO2 +struct mallinfo2 mallinfo2(void) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallInfo2); #endif +int malloc_info(int opts, FILE* fp) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallocInfo); +size_t malloc_size(void* p) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallocSize); +size_t malloc_usable_size(void* p) TCMALLOC_NOTHROW + TCMALLOC_ALIAS(TCMallocInternalMallocSize); + +} // extern "C" + +// NOLINTEND(misc-definitions-in-headers) #endif // TCMALLOC_LIBC_OVERRIDE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h deleted file mode 100644 index 709bcb727fef..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Used to override malloc routines on systems that define the -// memory allocation routines to be weak symbols in their libc -// (almost all unix-based systems are like this), on gcc, which -// suppports the 'alias' attribute. - -#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ -#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ - -#include - -#include - -#include "tcmalloc/tcmalloc.h" - -#ifndef __GNUC__ -#error libc_override_gcc_and_weak.h is for gcc distributions only. -#endif - -// visibility("default") ensures that these symbols are always exported, even -// with -fvisibility=hidden. -#define TCMALLOC_ALIAS(tc_fn) \ - __attribute__((alias(#tc_fn), visibility("default"))) - -void* operator new(size_t size) noexcept(false) - TCMALLOC_ALIAS(TCMallocInternalNew); -void operator delete(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete); -void operator delete(void* p, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteSized); -void* operator new[](size_t size) noexcept(false) - TCMALLOC_ALIAS(TCMallocInternalNewArray); -void operator delete[](void* p) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArray); -void operator delete[](void* p, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArraySized); -void* operator new(size_t size, const std::nothrow_t& nt) noexcept - TCMALLOC_ALIAS(TCMallocInternalNewNothrow); -void* operator new[](size_t size, const std::nothrow_t& nt) noexcept - TCMALLOC_ALIAS(TCMallocInternalNewArrayNothrow); -void operator delete(void* p, const std::nothrow_t& nt) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteNothrow); -void operator delete[](void* p, const std::nothrow_t& nt) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArrayNothrow); - -void* operator new(size_t size, std::align_val_t alignment) noexcept(false) - TCMALLOC_ALIAS(TCMallocInternalNewAligned); -void* operator new(size_t size, std::align_val_t alignment, - const std::nothrow_t&) noexcept - TCMALLOC_ALIAS(TCMallocInternalNewAligned_nothrow); -void operator delete(void* p, std::align_val_t alignment) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteAligned); -void operator delete(void* p, std::align_val_t alignment, - const std::nothrow_t&) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteAligned_nothrow); -void operator delete(void* p, size_t size, std::align_val_t alignment) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned); -void* operator new[](size_t size, std::align_val_t alignment) noexcept(false) - TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned); -void* operator new[](size_t size, std::align_val_t alignment, - const std::nothrow_t&) noexcept - TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned_nothrow); -void operator delete[](void* p, std::align_val_t alignment) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned); -void operator delete[](void* p, std::align_val_t alignment, - const std::nothrow_t&) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned_nothrow); -void operator delete[](void* p, size_t size, - std::align_val_t alignemnt) noexcept - TCMALLOC_ALIAS(TCMallocInternalDeleteArraySizedAligned); - -extern "C" { -void* malloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalMalloc); -void free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree); -void sdallocx(void* ptr, size_t size, int flags) noexcept - TCMALLOC_ALIAS(TCMallocInternalSdallocx); -void* realloc(void* ptr, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalRealloc); -void* calloc(size_t n, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalCalloc); -void cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree); -void* memalign(size_t align, size_t s) noexcept - TCMALLOC_ALIAS(TCMallocInternalMemalign); -void* aligned_alloc(size_t align, size_t s) noexcept - TCMALLOC_ALIAS(TCMallocInternalAlignedAlloc); -void* valloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalValloc); -void* pvalloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalPvalloc); -int posix_memalign(void** r, size_t a, size_t s) noexcept - TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); -void malloc_stats(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocStats); -int mallopt(int cmd, int value) noexcept - TCMALLOC_ALIAS(TCMallocInternalMallOpt); -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO -struct mallinfo mallinfo(void) noexcept - TCMALLOC_ALIAS(TCMallocInternalMallocInfo); -#endif -size_t malloc_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize); -size_t malloc_usable_size(void* p) noexcept - TCMALLOC_ALIAS(TCMallocInternalMallocSize); -} // extern "C" - -#endif // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h deleted file mode 100644 index 8e23b6eb78a7..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Used to override malloc routines on systems that are using glibc. - -#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ -#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ - -#include -#include - -#include "tcmalloc/tcmalloc.h" - -#ifndef __GLIBC__ -#error libc_override_glibc.h is for glibc distributions only. -#endif - -// In glibc, the memory-allocation methods are weak symbols, so we can -// just override them with our own. If we're using gcc, we can use -// __attribute__((alias)) to do the overriding easily (exception: -// Mach-O, which doesn't support aliases). Otherwise we have to use a -// function call. -#if !defined(__GNUC__) || defined(__MACH__) - -#include "libc_override_redefine.h" - -#else // #if !defined(__GNUC__) || defined(__MACH__) - -// If we get here, we're a gcc system, so do all the overriding we do -// with gcc. This does the overriding of all the 'normal' memory -// allocation. -#include "libc_override_gcc_and_weak.h" - -// We also have to do some glibc-specific overriding. Some library -// routines on RedHat 9 allocate memory using malloc() and free it -// using __libc_free() (or vice-versa). Since we provide our own -// implementations of malloc/free, we need to make sure that the -// __libc_XXX variants (defined as part of glibc) also point to the -// same implementations. Since it only matters for redhat, we -// do it inside the gcc #ifdef, since redhat uses gcc. -// TODO(b/134690953): only do this if we detect we're an old enough glibc? - -extern "C" { -void* __libc_malloc(size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalMalloc); -void __libc_free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree); -void* __libc_realloc(void* ptr, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalRealloc); -void* __libc_calloc(size_t n, size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalCalloc); -void __libc_cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree); -void* __libc_memalign(size_t align, size_t s) noexcept - TCMALLOC_ALIAS(TCMallocInternalMemalign); -void* __libc_valloc(size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalValloc); -void* __libc_pvalloc(size_t size) noexcept - TCMALLOC_ALIAS(TCMallocInternalPvalloc); -int __posix_memalign(void** r, size_t a, size_t s) noexcept - TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); -} // extern "C" - -#endif // #if defined(__GNUC__) && !defined(__MACH__) - -// We also have to hook libc malloc. While our work with weak symbols -// should make sure libc malloc is never called in most situations, it -// can be worked around by shared libraries with the DEEPBIND -// environment variable set. The below hooks libc to call our malloc -// routines even in that situation. In other situations, this hook -// should never be called. -extern "C" { -static void* glibc_override_malloc(size_t size, const void* caller) { - return TCMallocInternalMalloc(size); -} -static void* glibc_override_realloc(void* ptr, size_t size, - const void* caller) { - return TCMallocInternalRealloc(ptr, size); -} -static void glibc_override_free(void* ptr, const void* caller) { - TCMallocInternalFree(ptr); -} -static void* glibc_override_memalign(size_t align, size_t size, - const void* caller) { - return TCMallocInternalMemalign(align, size); -} - -// We should be using __malloc_initialize_hook here. (See -// http://swoolley.org/man.cgi/3/malloc_hook.) However, this causes weird -// linker errors with programs that link with -static, so instead we just assign -// the vars directly at static-constructor time. That should serve the same -// effect of making sure the hooks are set before the first malloc call the -// program makes. - -// Glibc-2.14 and above make __malloc_hook and friends volatile -#ifndef __MALLOC_HOOK_VOLATILE -#define __MALLOC_HOOK_VOLATILE /**/ -#endif - -void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(size_t, const void*) = - &glibc_override_malloc; -void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(void*, size_t, const void*) = - &glibc_override_realloc; -void (*__MALLOC_HOOK_VOLATILE __free_hook)(void*, - const void*) = &glibc_override_free; -void* (*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void*) = - &glibc_override_memalign; - -} // extern "C" - -#endif // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h deleted file mode 100644 index b1655461c39d..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Used on systems that don't have their own definition of -// malloc/new/etc. (Typically this will be a windows msvcrt.dll that -// has been edited to remove the definitions.) We can just define our -// own as normal functions. -// -// This should also work on systems were all the malloc routines are -// defined as weak symbols, and there's no support for aliasing. - -#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ -#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ - -#include -#include - -#include "tcmalloc/tcmalloc.h" - -void* operator new(size_t size) { return TCMallocInternalNew(size); } -void operator delete(void* p) noexcept { TCMallocInternalDelete(p); } -void* operator new[](size_t size) { return TCMallocInternalNewArray(size); } -void operator delete[](void* p) noexcept { TCMallocInternalDeleteArray(p); } -void* operator new(size_t size, const std::nothrow_t& nt) noexcept { - return TCMallocInternalNewNothrow(size, nt); -} -void* operator new[](size_t size, const std::nothrow_t& nt) noexcept { - return TCMallocInternalNewArrayNothrow(size, nt); -} -void operator delete(void* ptr, const std::nothrow_t& nt) noexcept { - return TCMallocInternalDeleteNothrow(ptr, nt); -} -void operator delete[](void* ptr, const std::nothrow_t& nt) noexcept { - return TCMallocInternalDeleteArrayNothrow(ptr, nt); -} - -extern "C" { -void* malloc(size_t s) { return TCMallocInternalMalloc(s); } -void* calloc(size_t n, size_t s) { return TCMallocInternalCalloc(n, s); } -void* realloc(void* p, size_t s) { return TCMallocInternalRealloc(p, s); } -void free(void* p) { TCMallocInternalFree(p); } -void* memalign(size_t a, size_t s) { return TCMallocInternalMemalign(a, s); } -int posix_memalign(void** r, size_t a, size_t s) { - return TCMallocInternalPosixMemalign(r, a, s); -} -size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); } - -// tcmalloc extension -void sdallocx(void* p, size_t s, int flags) noexcept { - TCMallocInternalSdallocx(p, s, flags); -} - -#if defined(__GLIBC__) || defined(__NEWLIB__) -// SunOS extension -void cfree(void* p) { TCMallocInternalCfree(p); } -#endif - -#if defined(OS_MACOSX) || defined(__BIONIC__) || defined(__GLIBC__) || \ - defined(__NEWLIB__) || defined(__UCLIBC__) -// Obsolete memalign -void* valloc(size_t s) { return TCMallocInternalValloc(s); } -#endif - -#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) -// Obsolete memalign -void* pvalloc(size_t s) { return TCMallocInternalPvalloc(s); } -#endif - -#if defined(__GLIBC__) || defined(__NEWLIB__) || defined(__UCLIBC__) -void malloc_stats(void) { TCMallocInternalMallocStats(); } -#endif - -#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ - defined(__UCLIBC__) -int mallopt(int cmd, int v) { return TCMallocInternalMallOpt(cmd, v); } -#endif - -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO -struct mallinfo mallinfo(void) { - return TCMallocInternalMallocInfo(); -} -#endif - -#if defined(__GLIBC__) -size_t malloc_size(void* p) { return TCMallocInternalMallocSize(p); } -#endif -} // extern "C" - -#endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc index 5a4ad7004385..dc4aeb5f3b5c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc @@ -18,18 +18,108 @@ #include #include +#include #include +#include +#include #include #include +#include #include +#include #include "absl/base/attributes.h" +#include "absl/base/call_once.h" #include "absl/base/internal/low_level_alloc.h" -#include "absl/memory/memory.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/string_view.h" #include "absl/time/time.h" -#include "tcmalloc/internal/parameter_accessors.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" #include "tcmalloc/internal_malloc_extension.h" +#if (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || \ + defined(ABSL_HAVE_THREAD_SANITIZER) || \ + defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_DATAFLOW_SANITIZER) || \ + defined(ABSL_HAVE_LEAK_SANITIZER)) && \ + !defined(TCMALLOC_INTERNAL_SELSAN) +#define TCMALLOC_UNDER_SANITIZERS 1 +static constexpr size_t kTerabyte = (size_t)(1ULL << 40); + +#include + +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) +static size_t SanitizerVirtualMemoryOverhead() { return 20 * kTerabyte; } + +static size_t SanitizerMemoryUsageMultiplier() { return 2; } + +static size_t SanitizerStackSizeMultiplier() { return 4; } +#endif + +#if defined(ABSL_HAVE_THREAD_SANITIZER) +static size_t SanitizerVirtualMemoryOverhead() { return 98 * kTerabyte; } + +static size_t SanitizerMemoryUsageMultiplier() { return 5; } + +static size_t SanitizerStackSizeMultiplier() { return 5; } +#endif + +#if defined(ABSL_HAVE_MEMORY_SANITIZER) +#include + +static size_t SanitizerVirtualMemoryOverhead() { + return (__msan_get_track_origins() ? 40 : 20) * kTerabyte; +} + +static size_t SanitizerMemoryUsageMultiplier() { + return __msan_get_track_origins() ? 3 : 2; +} + +static size_t SanitizerStackSizeMultiplier() { + // Very rough estimate based on analysing "sub $.*, %rsp" instructions. + return 2; +} +#endif + +#if defined(ABSL_HAVE_HWADDRESS_SANITIZER) +static size_t SanitizerVirtualMemoryOverhead() { return 20 * kTerabyte; } + +static size_t SanitizerMemoryUsageMultiplier() { return 1; } + +static size_t SanitizerStackSizeMultiplier() { return 1; } +#endif + +#if defined(ABSL_HAVE_DATAFLOW_SANITIZER) +#include + +static size_t SanitizerVirtualMemoryOverhead() { return 40 * kTerabyte; } + +static size_t SanitizerMemoryUsageMultiplier() { + return dfsan_get_track_origins() ? 3 : 2; +} + +static size_t SanitizerStackSizeMultiplier() { + // Very rough estimate based on analysing "sub $.*, %rsp" instructions. + return dfsan_get_track_origins() ? 3 : 2; +} +#endif + +#if defined(ABSL_HAVE_LEAK_SANITIZER) && \ + !defined(ABSL_HAVE_ADDRESS_SANITIZER) && \ + !defined(ABSL_HAVE_HWADDRESS_SANITIZER) +static size_t SanitizerVirtualMemoryOverhead() { return 0; } + +static size_t SanitizerMemoryUsageMultiplier() { return 1; } + +static size_t SanitizerStackSizeMultiplier() { return 1; } +#endif + +#else +#define TCMALLOC_UNDER_SANITIZERS 0 +#endif + namespace tcmalloc { MallocExtension::AllocationProfilingToken::AllocationProfilingToken( @@ -60,25 +150,31 @@ void Profile::Iterate(absl::FunctionRef f) const { impl_->Iterate(f); } -int64_t Profile::Period() const { +ProfileType Profile::Type() const { if (!impl_) { - return -1; + return ProfileType::kDoNotUse; } - return impl_->Period(); + return impl_->Type(); } -ProfileType Profile::Type() const { +std::optional Profile::StartTime() const { if (!impl_) { - return ProfileType::kDoNotUse; + return std::nullopt; } - return impl_->Type(); + return impl_->StartTime(); } -AddressRegion::~AddressRegion() {} +absl::Duration Profile::Duration() const { + if (!impl_) { + return absl::ZeroDuration(); + } + + return impl_->Duration(); +} -AddressRegionFactory::~AddressRegionFactory() {} +AddressRegion::~AddressRegion() {} size_t AddressRegionFactory::GetStats(absl::Span buffer) { static_cast(buffer); @@ -99,8 +195,12 @@ size_t AddressRegionFactory::InternalBytesAllocated() { void* AddressRegionFactory::MallocInternal(size_t size) { // Use arena without malloc hooks to avoid HeapChecker reporting a leak. - static auto* arena = - absl::base_internal::LowLevelAlloc::NewArena(/*flags=*/0); + ABSL_CONST_INIT static absl::base_internal::LowLevelAlloc::Arena* arena; + ABSL_CONST_INIT static absl::once_flag flag; + + absl::base_internal::LowLevelCallOnce(&flag, [&]() { + arena = absl::base_internal::LowLevelAlloc::NewArena(/*flags=*/0); + }); void* result = absl::base_internal::LowLevelAlloc::AllocWithArena(size, arena); if (result) { @@ -117,13 +217,17 @@ void* AddressRegionFactory::MallocInternal(size_t size) { #endif std::string MallocExtension::GetStats() { - std::string ret; #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_GetStats != nullptr) { + std::string ret; MallocExtension_Internal_GetStats(&ret); + return ret; } #endif - return ret; +#if defined(ABSL_HAVE_THREAD_SANITIZER) + return "NOT IMPLEMENTED"; +#endif + return ""; } void MallocExtension::ReleaseMemoryToSystem(size_t num_bytes) { @@ -186,6 +290,21 @@ MallocExtension::StartAllocationProfiling() { #endif } +MallocExtension::AllocationProfilingToken +MallocExtension::StartLifetimeProfiling() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_StartLifetimeProfiling == nullptr) { + return {}; + } + + return tcmalloc_internal::AllocationProfilingTokenAccessor::MakeToken( + std::unique_ptr( + MallocExtension_Internal_StartLifetimeProfiling())); +#else + return {}; +#endif +} + void MallocExtension::MarkThreadIdle() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_MarkThreadIdle == nullptr) { @@ -194,6 +313,9 @@ void MallocExtension::MarkThreadIdle() { MallocExtension_Internal_MarkThreadIdle(); #endif + // TODO(b/273799005) - move __tsan_on_thread_idle call here from + // testing/tsan/v2/allocator.cconce we have it available in shared tsan + // libraries. } void MallocExtension::MarkThreadBusy() { @@ -206,64 +328,65 @@ void MallocExtension::MarkThreadBusy() { #endif } -MallocExtension::MemoryLimit MallocExtension::GetMemoryLimit() { - MemoryLimit ret; +size_t MallocExtension::GetMemoryLimit(LimitKind limit_kind) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_GetMemoryLimit != nullptr) { - MallocExtension_Internal_GetMemoryLimit(&ret); + return MallocExtension_Internal_GetMemoryLimit(limit_kind); } #endif - return ret; + return 0; } -void MallocExtension::SetMemoryLimit( - const MallocExtension::MemoryLimit& limit) { +void MallocExtension::SetMemoryLimit(const size_t limit, LimitKind limit_kind) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_SetMemoryLimit != nullptr) { - MallocExtension_Internal_SetMemoryLimit(&limit); + // limit == 0 implies no limit. + const size_t new_limit = + (limit > 0) ? limit : std::numeric_limits::max(); + MallocExtension_Internal_SetMemoryLimit(new_limit, limit_kind); } #endif } -int64_t MallocExtension::GetProfileSamplingRate() { +int64_t MallocExtension::GetProfileSamplingInterval() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (&MallocExtension_Internal_GetProfileSamplingRate != nullptr) { - return MallocExtension_Internal_GetProfileSamplingRate(); + if (&MallocExtension_Internal_GetProfileSamplingInterval != nullptr) { + return MallocExtension_Internal_GetProfileSamplingInterval(); } #endif return -1; } -void MallocExtension::SetProfileSamplingRate(int64_t rate) { +void MallocExtension::SetProfileSamplingInterval(int64_t interval) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (&MallocExtension_Internal_SetProfileSamplingRate != nullptr) { - MallocExtension_Internal_SetProfileSamplingRate(rate); + if (&MallocExtension_Internal_SetProfileSamplingInterval != nullptr) { + MallocExtension_Internal_SetProfileSamplingInterval(interval); } #endif - (void)rate; + (void)interval; } -int64_t MallocExtension::GetGuardedSamplingRate() { +int64_t MallocExtension::GetGuardedSamplingInterval() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_GetGuardedSamplingRate == nullptr) { + if (MallocExtension_Internal_GetGuardedSamplingInterval == nullptr) { return -1; } - return MallocExtension_Internal_GetGuardedSamplingRate(); + return MallocExtension_Internal_GetGuardedSamplingInterval(); #else return -1; #endif } -void MallocExtension::SetGuardedSamplingRate(int64_t rate) { +void MallocExtension::SetGuardedSamplingInterval(int64_t interval) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_SetGuardedSamplingRate == nullptr) { + if (MallocExtension_Internal_SetGuardedSamplingInterval == nullptr) { return; } - MallocExtension_Internal_SetGuardedSamplingRate(rate); + MallocExtension_Internal_SetGuardedSamplingInterval(interval); #else - (void)rate; + (void)interval; #endif } @@ -287,16 +410,6 @@ bool MallocExtension::PerCpuCachesActive() { #endif } -void MallocExtension::DeactivatePerCpuCaches() { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { - return; - } - - MallocExtension_Internal_DeactivatePerCpuCaches(); -#endif -} - int32_t MallocExtension::GetMaxPerCpuCacheSize() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { @@ -371,7 +484,161 @@ void MallocExtension::SetSkipSubreleaseInterval(absl::Duration value) { #endif } -absl::optional MallocExtension::GetNumericProperty( +bool MallocExtension::GetBackgroundProcessActionsEnabled() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetBackgroundProcessActionsEnabled == nullptr) { + return false; + } + + return MallocExtension_Internal_GetBackgroundProcessActionsEnabled(); +#else + return false; +#endif +} + +void MallocExtension::SetBackgroundProcessActionsEnabled(bool value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetBackgroundProcessActionsEnabled == nullptr) { + return; + } + + MallocExtension_Internal_SetBackgroundProcessActionsEnabled(value); +#else + (void)value; +#endif +} + +absl::Duration MallocExtension::GetBackgroundProcessSleepInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetBackgroundProcessSleepInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetBackgroundProcessSleepInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetBackgroundProcessSleepInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetBackgroundProcessSleepInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetBackgroundProcessSleepInterval(value); +#else + (void)value; +#endif +} + +absl::Duration MallocExtension::GetSkipSubreleaseShortInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetSkipSubreleaseShortInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetSkipSubreleaseShortInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetSkipSubreleaseShortInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetSkipSubreleaseShortInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetSkipSubreleaseShortInterval(value); +#else + (void)value; +#endif +} + +absl::Duration MallocExtension::GetSkipSubreleaseLongInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetSkipSubreleaseLongInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetSkipSubreleaseLongInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetSkipSubreleaseLongInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetSkipSubreleaseLongInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetSkipSubreleaseLongInterval(value); +#else + (void)value; +#endif +} + +absl::Duration MallocExtension::GetCacheDemandReleaseShortInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetCacheDemandReleaseShortInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetCacheDemandReleaseShortInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetCacheDemandReleaseShortInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetCacheDemandReleaseShortInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetCacheDemandReleaseShortInterval(value); +#else + (void)value; +#endif +} + +absl::Duration MallocExtension::GetCacheDemandReleaseLongInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetCacheDemandReleaseLongInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetCacheDemandReleaseLongInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetCacheDemandReleaseLongInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetCacheDemandReleaseLongInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetCacheDemandReleaseLongInterval(value); +#else + (void)value; +#endif +} + +std::optional MallocExtension::GetNumericProperty( absl::string_view property) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_GetNumericProperty != nullptr) { @@ -382,20 +649,59 @@ absl::optional MallocExtension::GetNumericProperty( } } #endif - return absl::nullopt; +#if TCMALLOC_UNDER_SANITIZERS + // TODO(b/273946827): Add local tcmalloc tests for the various sanitizer + // configs as opposed to depending on + // //testing/sanitizer_common:malloc_extension_test + // LINT.IfChange(SanitizerGetProperty) + if (property == "dynamic_tool.virtual_memory_overhead") { + return SanitizerVirtualMemoryOverhead(); + } + if (property == "dynamic_tool.memory_usage_multiplier") { + return SanitizerMemoryUsageMultiplier(); + } + if (property == "dynamic_tool.stack_size_multiplier") { + return SanitizerStackSizeMultiplier(); + } + if (property == "generic.current_allocated_bytes") { + return __sanitizer_get_current_allocated_bytes(); + } + if (property == "generic.heap_size") { + return __sanitizer_get_heap_size(); + } + if (property == "tcmalloc.per_cpu_caches_active") { + // Queried by ReleasePerCpuMemoryToOS(). + return 0; + } + if (property == "tcmalloc.pageheap_free_bytes") { + return __sanitizer_get_free_bytes(); + } + if (property == "tcmalloc.pageheap_unmapped_bytes") { + return __sanitizer_get_unmapped_bytes(); + } + if (property == "tcmalloc.slack_bytes") { + // Kept for backwards compatibility. + return __sanitizer_get_free_bytes() + __sanitizer_get_unmapped_bytes(); + } + // LINT.ThenChange(:SanitizerGetProperties) +#endif // TCMALLOC_UNDER_SANITIZERS + return std::nullopt; } size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { return nallocx(size, 0); } -absl::optional MallocExtension::GetAllocatedSize(const void* p) { +std::optional MallocExtension::GetAllocatedSize(const void* p) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (MallocExtension_Internal_GetAllocatedSize != nullptr) { return MallocExtension_Internal_GetAllocatedSize(p); } #endif - return absl::nullopt; +#if TCMALLOC_UNDER_SANITIZERS + return __sanitizer_get_allocated_size(p); +#endif + return std::nullopt; } MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { @@ -403,6 +709,11 @@ MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { if (MallocExtension_Internal_GetOwnership != nullptr) { return MallocExtension_Internal_GetOwnership(p); } +#endif +#if TCMALLOC_UNDER_SANITIZERS + return __sanitizer_get_ownership(p) + ? tcmalloc::MallocExtension::Ownership::kOwned + : tcmalloc::MallocExtension::Ownership::kNotOwned; #endif return MallocExtension::Ownership::kUnknown; } @@ -410,10 +721,35 @@ MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { std::map MallocExtension::GetProperties() { std::map ret; +#if TCMALLOC_UNDER_SANITIZERS + // Unlike other extension points this one fills in sanitizer data before the + // weak function is called so that the weak function can override as needed. + // LINT.IfChange(SanitizerGetProperties) + const std::array properties = {"dynamic_tool.virtual_memory_overhead", + "dynamic_tool.memory_usage_multiplier", + "dynamic_tool.stack_size_multiplier", + "generic.current_allocated_bytes", + "generic.heap_size", + "tcmalloc.per_cpu_caches_active", + "tcmalloc.pageheap_free_bytes", + "tcmalloc.pageheap_unmapped_bytes", + "tcmalloc.slack_bytes"}; + // LINT.ThenChange(:SanitizerGetProperty) + + for (const auto& p : properties) { + const auto& value = GetNumericProperty(p); + if (value) { + ret[p].value = *value; + } + } +#endif // TCMALLOC_UNDER_SANITIZERS #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_GetProperties != nullptr) { MallocExtension_Internal_GetProperties(&ret); } + if (&MallocExtension_Internal_GetExperiments != nullptr) { + MallocExtension_Internal_GetExperiments(&ret); + } #endif return ret; } @@ -504,6 +840,9 @@ void MallocExtension::SetSampleUserDataCallbacks( // this weak function with a better implementation. ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE size_t nallocx(size_t size, int) noexcept { +#if TCMALLOC_UNDER_SANITIZERS + return __sanitizer_get_estimated_allocated_size(size); +#endif return size; } @@ -515,8 +854,29 @@ ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void sdallocx(void* ptr, size_t, free(ptr); } +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void free_sized(void* ptr, size_t) { + free(ptr); +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void free_aligned_sized(void* ptr, + size_t, + size_t) { + free(ptr); +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +__size_returning_new(size_t size) { + return {::operator new(size), size}; +} + +// TODO(b/283856455): Remove once experiment is done. +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +__size_returning_new_experiment(size_t size) { + return {::operator new(size), size}; +} + ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t -tcmalloc_size_returning_operator_new(size_t size) { +__size_returning_new_hot_cold(size_t size, __hot_cold_t) { return {::operator new(size), size}; } @@ -526,11 +886,24 @@ tcmalloc_size_returning_operator_new_nothrow(size_t size) noexcept { return {p, p ? size : 0}; } +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new_hot_cold_nothrow(size_t size, + __hot_cold_t) noexcept { + void* p = ::operator new(size, std::nothrow); + return {p, p ? size : 0}; +} + #if defined(_LIBCPP_VERSION) && defined(__cpp_aligned_new) ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t -tcmalloc_size_returning_operator_new_aligned(size_t size, - std::align_val_t alignment) { +__size_returning_new_aligned(size_t size, std::align_val_t alignment) { + return {::operator new(size, alignment), size}; +} + +// TODO(b/283856455): Remove once experiment is done. +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +__size_returning_new_aligned_experiment(size_t size, + std::align_val_t alignment) { return {::operator new(size, alignment), size}; } @@ -541,4 +914,63 @@ tcmalloc_size_returning_operator_new_aligned_nothrow( return {p, p ? size : 0}; } +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +__size_returning_new_aligned_hot_cold(size_t size, std::align_val_t alignment, + __hot_cold_t) { + return {::operator new(size, alignment), size}; +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new_aligned_hot_cold_nothrow( + size_t size, std::align_val_t alignment, __hot_cold_t) noexcept { + void* p = ::operator new(size, alignment, std::nothrow); + return {p, p ? size : 0}; +} + #endif // _LIBCPP_VERSION && __cpp_aligned_new + +ABSL_ATTRIBUTE_WEAK void* operator new(size_t size, + tcmalloc::hot_cold_t) noexcept(false) { + return ::operator new(size); +} + +ABSL_ATTRIBUTE_WEAK void* operator new(size_t size, const std::nothrow_t&, + tcmalloc::hot_cold_t) noexcept { + return ::operator new(size, std::nothrow); +} + +ABSL_ATTRIBUTE_WEAK void* operator new[](size_t size, + tcmalloc::hot_cold_t) noexcept(false) { + return ::operator new[](size); +} + +ABSL_ATTRIBUTE_WEAK void* operator new[](size_t size, const std::nothrow_t&, + tcmalloc::hot_cold_t) noexcept { + return ::operator new[](size, std::nothrow); +} + +#ifdef __cpp_aligned_new +ABSL_ATTRIBUTE_WEAK void* operator new(size_t size, std::align_val_t alignment, + tcmalloc::hot_cold_t) noexcept(false) { + return ::operator new(size, alignment); +} + +ABSL_ATTRIBUTE_WEAK void* operator new(size_t size, std::align_val_t alignment, + const std::nothrow_t&, + tcmalloc::hot_cold_t) noexcept { + return ::operator new(size, alignment, std::nothrow); +} + +ABSL_ATTRIBUTE_WEAK void* operator new[](size_t size, + std::align_val_t alignment, + tcmalloc::hot_cold_t) noexcept(false) { + return ::operator new[](size, alignment); +} + +ABSL_ATTRIBUTE_WEAK void* operator new[](size_t size, + std::align_val_t alignment, + const std::nothrow_t&, + tcmalloc::hot_cold_t) noexcept { + return ::operator new[](size, alignment, std::nothrow); +} +#endif // __cpp_aligned_new diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h index 19b68ba8acf2..ea57e3b0460d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2019 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,32 +18,76 @@ // tuning the internal implementation of TCMalloc. The internal implementation // functions use weak linkage, allowing an application to link against the // extensions without always linking against TCMalloc. +// +// Many of these APIs are also supported when built with sanitizers. #ifndef TCMALLOC_MALLOC_EXTENSION_H_ #define TCMALLOC_MALLOC_EXTENSION_H_ -#include #include #include -#include #include #include #include #include +#include #include #include -#include #include "absl/base/attributes.h" #include "absl/base/macros.h" -#include "absl/base/policy_checks.h" -#include "absl/base/port.h" #include "absl/functional/function_ref.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/time/time.h" #include "absl/types/optional.h" #include "absl/types/span.h" +// Not all versions of Abseil provide this macro. +// TODO(b/323943471): Remove on upgrading to version that provides the macro. +#ifndef ABSL_DEPRECATE_AND_INLINE +#define ABSL_DEPRECATE_AND_INLINE() +#endif + +// Indicates how frequently accessed the allocation is expected to be. +// 0 - The allocation is rarely accessed. +// ... +// 255 - The allocation is accessed very frequently. +enum class __hot_cold_t : uint8_t; + +// TODO(ckennelly): Lifetimes + +namespace tcmalloc { + +// Alias to the newer type in the global namespace, so that existing code works +// as is. +using hot_cold_t = __hot_cold_t; + +constexpr hot_cold_t kDefaultMinHotAccessHint = + static_cast(2); + +} // namespace tcmalloc + +inline bool AbslParseFlag(absl::string_view text, tcmalloc::hot_cold_t* hotness, + std::string* /* error */) { + uint32_t value; + if (!absl::SimpleAtoi(text, &value)) { + return false; + } + // hot_cold_t is a uint8_t, so make sure the flag is within the allowable + // range before casting. + if (value > std::numeric_limits::max()) { + return false; + } + *hotness = static_cast(value); + return true; +} + +inline std::string AbslUnparseFlag(tcmalloc::hot_cold_t hotness) { + return absl::StrCat(hotness); +} + namespace tcmalloc { namespace tcmalloc_internal { class AllocationProfilingTokenAccessor; @@ -66,6 +111,9 @@ enum class ProfileType { // the profile was terminated with Stop(). kAllocations, + // Lifetimes of sampled objects that are live during the profiling session. + kLifetimes, + // Only present to prevent switch statements without a default clause so that // we can extend this enumeration without breaking code. kDoNotUse, @@ -86,23 +134,127 @@ class Profile final { static constexpr int kMaxStackDepth = 64; int64_t sum; - int64_t count; // Total added with this + // The reported count of samples, with possible rounding up for unsample. + // A given sample typically corresponds to some allocated objects, and the + // number of objects is the quotient of weight (number of bytes requested + // between previous and current samples) divided by the requested size. + int64_t count; size_t requested_size; size_t requested_alignment; size_t allocated_size; + // Return whether the allocation was returned with + // tcmalloc_size_returning_operator_new or its variants. + bool requested_size_returning; + + enum class Access : uint8_t { + Hot, + Cold, + + // Only present to prevent switch statements without a default clause so + // that we can extend this enumeration without breaking code. + kDoNotUse, + }; + hot_cold_t access_hint; + Access access_allocated; + + // Whether this sample captures allocations where the deallocation event + // was not observed. Thus the measurements are censored in the statistical + // sense, see https://en.wikipedia.org/wiki/Censoring_(statistics)#Types. + bool is_censored = false; + + // Provide the status of GWP-ASAN guarding for a given sample. + enum class GuardedStatus : int8_t { + // Conditions which represent why a sample was not guarded: + // + // The requested_size of the allocation sample is larger than the + // available pages which are guardable. + LargerThanOnePage = -1, + // By flag, the guarding of samples has been disabled. + Disabled = -2, + // Too many guards have been placed, any further guards will cause + // unexpected load on binary. + RateLimited = -3, + // The requested_size of the allocation sample is too small (= 0) to be + // guarded. + TooSmall = -4, + // Too many samples are already guarded. + NoAvailableSlots = -5, + // Perhaps the only true error, when the mprotect call fails. + MProtectFailed = -6, + // Used in an improved guarding selection algorithm. + Filtered = -7, + // An unexpected state, which represents that branch for selection was + // missed. + Unknown = -100, + // When guarding is not even considered on a sample. + NotAttempted = 0, + // The following values do not represent final states, but rather intent + // based on the applied algorithm for selecting guarded samples: + // + // Request guard: may still not be guarded for other reasons (see + // above) + Requested = 1, + // Unused. + Required = 2, + // The result when a sample is actually guarded by GWP-ASAN. + Guarded = 10, + }; + GuardedStatus guarded_status = GuardedStatus::Unknown; + + // How the memory was allocated (new/malloc/etc.). + enum class AllocationType : uint8_t { + New, + Malloc, + AlignedMalloc, + }; + + AllocationType type; + int depth; void* stack[kMaxStackDepth]; void* user_data; + + // The following vars are used by the lifetime (deallocation) profiler. + uint64_t profile_id; + + // Timestamp of allocation. + absl::Time allocation_time; + + // Aggregated lifetime statistics per callstack. + absl::Duration avg_lifetime; + absl::Duration stddev_lifetime; + absl::Duration min_lifetime; + absl::Duration max_lifetime; + + // For the *_matched vars below we use true = "same", false = "different". + // When the value is unavailable the profile contains "none". For + // right-censored observations, CPU and thread matched values are "none". + std::optional allocator_deallocator_physical_cpu_matched; + std::optional allocator_deallocator_virtual_cpu_matched; + std::optional allocator_deallocator_l3_matched; + std::optional allocator_deallocator_numa_matched; + std::optional allocator_deallocator_thread_matched; + + // The start address of the sampled allocation, used to calculate the + // residency info for the objects represented by this sampled allocation. + void* span_start_address; }; void Iterate(absl::FunctionRef f) const; - int64_t Period() const; ProfileType Type() const; + // Time stamp when the profile collection started. Returns std::nullopt if + // this is not available. + std::optional StartTime() const; + + // The duration the profile was collected for. For instantaneous profiles + // (heap, peakheap, etc.), this returns absl::ZeroDuration(). + absl::Duration Duration() const; + private: explicit Profile(std::unique_ptr); @@ -133,10 +285,17 @@ class AddressRegionFactory { // frequently than normal regions. kInfrequent ABSL_DEPRECATED("Use kInfrequentAllocation") = kInfrequentAllocation, + kMetadata, // Metadata for TCMalloc not returned via new/malloc. + kInfrequentAccess, // TCMalloc places cold allocations in these regions. + // Usage of the below implies numa_aware is enabled. tcmalloc will mbind the + // address region to the hinted socket, but also passes the hint in case + // mbind is not sufficient (e.g. when dealing with pre-faulted memory). + kNormalNumaAwareS0, // Normal usage intended for NUMA S0 under numa_aware. + kNormalNumaAwareS1, // Normal usage intended for NUMA S1 under numa_aware. }; - AddressRegionFactory() {} - virtual ~AddressRegionFactory(); + constexpr AddressRegionFactory() = default; + virtual ~AddressRegionFactory() = default; // Returns an AddressRegion with the specified start address and size. hint // indicates how the caller intends to use the returned region (helpful for @@ -229,7 +388,7 @@ class MallocExtension final { // ------------------------------------------------------------------- // Gets the named property's value or a nullopt if the property is not valid. - static absl::optional GetNumericProperty(absl::string_view property); + static std::optional GetNumericProperty(absl::string_view property); // Marks the current thread as "idle". This function may optionally be called // by threads as a hint to the malloc implementation that any thread-specific @@ -287,42 +446,50 @@ class MallocExtension final { // back in. static void ReleaseMemoryToSystem(size_t num_bytes); - struct MemoryLimit { - // Make a best effort attempt to prevent more than limit bytes of memory - // from being allocated by the system. In particular, if satisfying a given - // malloc call would require passing this limit, release as much memory to - // the OS as needed to stay under it if possible. - // - // If hard is set, crash if returning memory is unable to get below the - // limit. - // - // Note: limit=SIZE_T_MAX implies no limit. - size_t limit = std::numeric_limits::max(); - bool hard = false; - - // Explicitly declare the ctor to put it in the google_malloc section. - MemoryLimit() = default; - }; + enum class LimitKind { kSoft, kHard }; - static MemoryLimit GetMemoryLimit(); - static void SetMemoryLimit(const MemoryLimit& limit); + // Make a best effort attempt to prevent more than limit bytes of memory + // from being allocated by the system. In particular, if satisfying a given + // malloc call would require passing this limit, release as much memory to + // the OS as needed to stay under it if possible. + // + // If limit_kind == kHard, crash if returning memory is unable to get below + // the limit. + static size_t GetMemoryLimit(LimitKind limit_kind); + static void SetMemoryLimit(size_t limit, LimitKind limit_kind); - // Gets the sampling rate. Returns a value < 0 if unknown. - static int64_t GetProfileSamplingRate(); - // Sets the sampling rate for heap profiles. TCMalloc samples approximately - // every rate bytes allocated. - static void SetProfileSamplingRate(int64_t rate); + // Gets the sampling interval. Returns a value < 0 if unknown. + static int64_t GetProfileSamplingInterval(); + // Sets the sampling interval for heap profiles. TCMalloc samples + // approximately every interval bytes allocated. + static void SetProfileSamplingInterval(int64_t interval); // Gets the guarded sampling rate. Returns a value < 0 if unknown. - static int64_t GetGuardedSamplingRate(); - // Sets the guarded sampling rate for sampled allocations. TCMalloc samples - // approximately every rate bytes allocated, subject to implementation - // limitations in GWP-ASan. + static int64_t GetGuardedSamplingInterval(); + // Sets the guarded sampling interval for sampled allocations. TCMalloc + // samples approximately every interval bytes allocated, subject to + // implementation limitations in GWP-ASan. // - // Guarded samples provide probablistic protections against buffer underflow, + // Guarded samples provide probabilistic protections against buffer underflow, // overflow, and use-after-free when GWP-ASan is active (via calling // ActivateGuardedSampling). - static void SetGuardedSamplingRate(int64_t rate); + static void SetGuardedSamplingInterval(int64_t interval); + + // The old names to get and set profile sampling intervals used "rate" to + // refer to intervals. Use of the below is deprecated to avoid confusion. + static int64_t GetProfileSamplingRate() { + return GetProfileSamplingInterval(); + } + static void SetProfileSamplingRate(int64_t rate) { + SetProfileSamplingInterval(rate); + } + ABSL_DEPRECATE_AND_INLINE() + static int64_t GetGuardedSamplingRate() { + return GetGuardedSamplingInterval(); + } + static void SetGuardedSamplingRate(int64_t rate) { + SetGuardedSamplingInterval(rate); + } // Switches TCMalloc to guard sampled allocations for underflow, overflow, and // use-after-free according to the guarded sample parameter value. @@ -331,11 +498,6 @@ class MallocExtension final { // Gets whether TCMalloc is using per-CPU caches. static bool PerCpuCachesActive(); - // Extension for unified agent. - // - // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 - static void DeactivatePerCpuCaches(); - // Gets the current maximum cache size per CPU cache. static int32_t GetMaxPerCpuCacheSize(); // Sets the maximum cache size per CPU cache. This is a per-core limit. @@ -346,10 +508,34 @@ class MallocExtension final { // Sets the maximum thread cache size. This is a whole-process limit. static void SetMaxTotalThreadCacheBytes(int64_t value); - // Gets the delayed subrelease interval (0 if delayed subrelease is disabled) + // Enables or disables background processes. + static bool GetBackgroundProcessActionsEnabled(); + static void SetBackgroundProcessActionsEnabled(bool value); + + // Gets and sets background process sleep time. This controls the interval + // granularity at which the actions are invoked. + static absl::Duration GetBackgroundProcessSleepInterval(); + static void SetBackgroundProcessSleepInterval(absl::Duration value); + + // Gets and sets intervals used for finding recent demand peak, short-term + // demand fluctuation, and long-term demand trend. Zero duration means not + // considering corresponding demand history for delayed subrelease. Delayed + // subrelease is disabled if all intervals are zero. static absl::Duration GetSkipSubreleaseInterval(); - // Sets the delayed subrelease interval (0 to disable delayed subrelease) static void SetSkipSubreleaseInterval(absl::Duration value); + static absl::Duration GetSkipSubreleaseShortInterval(); + static void SetSkipSubreleaseShortInterval(absl::Duration value); + static absl::Duration GetSkipSubreleaseLongInterval(); + static void SetSkipSubreleaseLongInterval(absl::Duration value); + + // Gets and sets intervals used for finding the recent short-term demand + // fluctuation and long-term demand trend in HugeCache. Zero duration means + // not considering corresponding demand history for delayed (demand-based) + // hugepage release. The feature is disabled if both intervals are zero. + static absl::Duration GetCacheDemandReleaseShortInterval(); + static void SetCacheDemandReleaseShortInterval(absl::Duration value); + static absl::Duration GetCacheDemandReleaseLongInterval(); + static void SetCacheDemandReleaseLongInterval(absl::Duration value); // Returns the estimated number of bytes that will be allocated for a request // of "size" bytes. This is an estimate: an allocation of "size" bytes may @@ -372,7 +558,7 @@ class MallocExtension final { // -- that is, must be exactly the pointer returned to by malloc() et al., not // some offset from that -- and should not have been freed yet. p may be // null. - static absl::optional GetAllocatedSize(const void* p); + static std::optional GetAllocatedSize(const void* p); // Returns // * kOwned if TCMalloc allocated the memory pointed to by p, or @@ -444,6 +630,10 @@ class MallocExtension final { // null if the implementation does not support profiling. static AllocationProfilingToken StartAllocationProfiling(); + // Start recording lifetimes of objects live during this profiling + // session. Returns null if the implementation does not support profiling. + static AllocationProfilingToken StartLifetimeProfiling(); + // Runs housekeeping actions for the allocator off of the main allocation path // of new/delete. As of 2020, this includes: // * Inspecting the current CPU mask and releasing memory from inaccessible @@ -498,6 +688,9 @@ class MallocExtension final { // Default weak implementation returns size unchanged, but tcmalloc overrides it // and returns rounded up size. See the following link for details: // http://www.unix.com/man-page/freebsd/3/nallocx/ +// NOTE: prefer using tcmalloc_size_returning_operator_new over nallocx. +// tcmalloc_size_returning_operator_new is more efficienct and provides tcmalloc +// with better telemetry. extern "C" size_t nallocx(size_t size, int flags) noexcept; // The sdallocx function deallocates memory allocated by malloc or memalign. It @@ -507,16 +700,27 @@ extern "C" size_t nallocx(size_t size, int flags) noexcept; // uses the size to improve deallocation performance. extern "C" void sdallocx(void* ptr, size_t size, int flags) noexcept; -namespace tcmalloc { +#if !defined(__STDC_VERSION_STDLIB_H__) || __STDC_VERSION_STDLIB_H__ < 202311L +// Frees ptr allocated with malloc(size) introduced in C23. +extern "C" void free_sized(void* ptr, size_t size); + +// Frees ptr allocated with aligned_alloc/posix_memalign with the specified size +// and alignment introduced in C23. +extern "C" void free_aligned_sized(void* ptr, size_t alignment, size_t size); +#endif -// Pointer / capacity information as returned by -// tcmalloc_size_returning_operator_new(). See -// tcmalloc_size_returning_operator_new() for more information. -struct sized_ptr_t { +// Define __sized_ptr_t in the global namespace so that it can be named by the +// __size_returning_new implementations defined in tcmalloc.cc. +struct __sized_ptr_t { void* p; size_t n; }; +namespace tcmalloc { +// sized_ptr_t constains pointer / capacity information as returned +// by `tcmalloc_size_returning_operator_new()`. +// See `tcmalloc_size_returning_operator_new()` for more information. +using sized_ptr_t = __sized_ptr_t; } // namespace tcmalloc // Allocates memory of at least the requested size. @@ -548,25 +752,73 @@ struct sized_ptr_t { // new" proposal: // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html extern "C" { -tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new(size_t size); -tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( +// The following declarations provide an alternative spelling which should be +// used so that the compiler can identify these as allocator functions. +__sized_ptr_t __size_returning_new(size_t size); +__sized_ptr_t __size_returning_new_hot_cold(size_t, __hot_cold_t); +__sized_ptr_t __size_returning_new_aligned(size_t, std::align_val_t); +__sized_ptr_t __size_returning_new_aligned_hot_cold(size_t, std::align_val_t, + __hot_cold_t); + +ABSL_DEPRECATE_AND_INLINE() +inline __sized_ptr_t tcmalloc_size_returning_operator_new(size_t size) { + return __size_returning_new(size); +} +__sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( size_t size) noexcept; +ABSL_DEPRECATE_AND_INLINE() +inline __sized_ptr_t tcmalloc_size_returning_operator_new_hot_cold( + size_t size, tcmalloc::hot_cold_t hot_cold) { + return __size_returning_new_hot_cold(size, hot_cold); +} +__sized_ptr_t tcmalloc_size_returning_operator_new_hot_cold_nothrow( + size_t size, tcmalloc::hot_cold_t hot_cold) noexcept; -// Aligned size returning new is only supported for libc++ because of issues -// with libstdcxx.so linkage. See http://b/110969867 for background. #if defined(__cpp_aligned_new) // Identical to `tcmalloc_size_returning_operator_new` except that the returned // memory is aligned according to the `alignment` argument. -tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned( - size_t size, std::align_val_t alignment); -tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow( +ABSL_DEPRECATE_AND_INLINE() +inline __sized_ptr_t tcmalloc_size_returning_operator_new_aligned( + size_t size, std::align_val_t alignment) { + return __size_returning_new_aligned(size, alignment); +} +__sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow( size_t size, std::align_val_t alignment) noexcept; +ABSL_DEPRECATE_AND_INLINE() +inline __sized_ptr_t tcmalloc_size_returning_operator_new_aligned_hot_cold( + size_t size, std::align_val_t alignment, tcmalloc::hot_cold_t hot_cold) { + return __size_returning_new_aligned_hot_cold(size, alignment, hot_cold); +} +__sized_ptr_t tcmalloc_size_returning_operator_new_aligned_hot_cold_nothrow( + size_t size, std::align_val_t alignment, + tcmalloc::hot_cold_t hot_cold) noexcept; #endif // __cpp_aligned_new } // extern "C" +void* operator new(size_t size, tcmalloc::hot_cold_t hot_cold) noexcept(false); +void* operator new(size_t size, const std::nothrow_t&, + tcmalloc::hot_cold_t hot_cold) noexcept; +void* operator new[](size_t size, + tcmalloc::hot_cold_t hot_cold) noexcept(false); +void* operator new[](size_t size, const std::nothrow_t&, + tcmalloc::hot_cold_t hot_cold) noexcept; + +#ifdef __cpp_aligned_new +void* operator new(size_t size, std::align_val_t alignment, + tcmalloc::hot_cold_t hot_cold) noexcept(false); +void* operator new(size_t size, std::align_val_t alignment, + const std::nothrow_t&, + tcmalloc::hot_cold_t hot_cold) noexcept; +void* operator new[](size_t size, std::align_val_t alignment, + tcmalloc::hot_cold_t hot_cold) noexcept(false); +void* operator new[](size_t size, std::align_val_t alignment, + const std::nothrow_t&, + tcmalloc::hot_cold_t hot_cold) noexcept; +#endif // __cpp_aligned_new + #ifndef MALLOCX_LG_ALIGN #define MALLOCX_LG_ALIGN(la) (la) #endif @@ -607,14 +859,58 @@ class ProfileBase { virtual void Iterate( absl::FunctionRef f) const = 0; - // The approximate interval between recorded samples of the event of interest. - // A period of 1 means every sample was recorded. - virtual int64_t Period() const = 0; - // The type of profile (live objects, allocated, etc.). virtual ProfileType Type() const = 0; + + virtual std::optional StartTime() const = 0; + + // The duration the profile was collected for. For instantaneous profiles + // (heap, peakheap, etc.), this returns absl::ZeroDuration(). + virtual absl::Duration Duration() const = 0; +}; + +enum class MadvisePreference { + kNever = 0x0, + kDontNeed = 0x1, + kFreeAndDontNeed = 0x3, + kFreeOnly = 0x2, }; +inline bool AbslParseFlag(absl::string_view text, MadvisePreference* preference, + std::string* /* error */) { + if (text == "NEVER") { + *preference = MadvisePreference::kNever; + return true; + } else if (text == "DONTNEED") { + *preference = MadvisePreference::kDontNeed; + return true; + } else if (text == "FREE_AND_DONTNEED") { + *preference = MadvisePreference::kFreeAndDontNeed; + return true; + } else if (text == "FREE_ONLY") { + *preference = MadvisePreference::kFreeOnly; + return true; + } else { + return false; + } +} + +inline std::string AbslUnparseFlag(MadvisePreference preference) { + switch (preference) { + case MadvisePreference::kNever: + return "NEVER"; + case MadvisePreference::kDontNeed: + return "DONTNEED"; + case MadvisePreference::kFreeAndDontNeed: + return "FREE_AND_DONTNEED"; + case MadvisePreference::kFreeOnly: + return "FREE_ONLY"; + } + + ABSL_UNREACHABLE(); + return ""; +} + } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc index 26335bdef825..2a7d268cc852 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc @@ -14,23 +14,26 @@ #include +#include #include +#include #include +#include "fuzztest/fuzztest.h" #include "absl/types/optional.h" #include "tcmalloc/malloc_extension.h" -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { - using tcmalloc::MallocExtension; +namespace tcmalloc { +namespace { - const std::string property(reinterpret_cast(d), size); - absl::optional val = MallocExtension::GetNumericProperty(property); +void FuzzGetProperty(const std::string& property) { + std::optional val = MallocExtension::GetNumericProperty(property); if (!val.has_value()) { // Rather than inspect the result of MallocExtension::GetProperties, we - // defer to the test in //tcmalloc/malloc_extension_test.cc to - // ensure that every key in GetProperties has a value returned by - // GetNumericProperty. - return 0; + // defer to the test in + // //tcmalloc/testing/malloc_extension_test.cc to ensure that + // every key in GetProperties has a value returned by GetNumericProperty. + return; } std::map properties = @@ -38,5 +41,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { if (properties.find(property) == properties.end()) { __builtin_trap(); } - return 0; } + +FUZZ_TEST(MallocExtensionTest, FuzzGetProperty) + ; + +} // namespace +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc deleted file mode 100644 index 5088806ff8fe..000000000000 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Test for TCMalloc implementation of MallocExtension - -#include "tcmalloc/malloc_extension.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "absl/time/time.h" - -namespace tcmalloc { -namespace { - -TEST(MallocExtension, BackgroundReleaseRate) { - - // Mutate via MallocExtension. - MallocExtension::SetBackgroundReleaseRate( - MallocExtension::BytesPerSecond{100 << 20}); - - EXPECT_EQ(static_cast(MallocExtension::GetBackgroundReleaseRate()), - 100 << 20); - - // Disable release - MallocExtension::SetBackgroundReleaseRate(MallocExtension::BytesPerSecond{0}); - - EXPECT_EQ(static_cast(MallocExtension::GetBackgroundReleaseRate()), - 0); -} - -TEST(MallocExtension, SkipSubreleaseInterval) { - - // Mutate via MallocExtension. - MallocExtension::SetSkipSubreleaseInterval(absl::Seconds(10)); - EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::Seconds(10)); - - // Disable skip subrelease - MallocExtension::SetSkipSubreleaseInterval(absl::ZeroDuration()); - EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::ZeroDuration()); -} - -TEST(MallocExtension, Properties) { - // Verify that every property under GetProperties also works with - // GetNumericProperty. - const auto properties = MallocExtension::GetProperties(); - for (const auto& property : properties) { - absl::optional scalar = - MallocExtension::GetNumericProperty(property.first); - // The value of the property itself may have changed, so just check that it - // is present. - EXPECT_THAT(scalar, testing::Ne(absl::nullopt)) << property.first; - } -} - -} // namespace -} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.cc new file mode 100644 index 000000000000..a0756f5963e1 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.cc @@ -0,0 +1,41 @@ +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. + +#include "tcmalloc/malloc_tracing_extension.h" + +#include "absl/base/attributes.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "tcmalloc/internal_malloc_tracing_extension.h" + +namespace tcmalloc { +namespace malloc_tracing_extension { + +absl::StatusOr GetAllocatedAddressRanges() { +#if ABSL_HAVE_ATTRIBUTE_WEAK && !defined(__APPLE__) && !defined(__EMSCRIPTEN__) + if (&MallocTracingExtension_Internal_GetAllocatedAddressRanges != nullptr) { + return MallocTracingExtension_Internal_GetAllocatedAddressRanges(); + } +#endif + return absl::UnimplementedError( + "malloc_tracing_extension routines not exported by the current malloc."); +} + +} // namespace malloc_tracing_extension +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.h b/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.h new file mode 100644 index 000000000000..ed020512fd14 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_tracing_extension.h @@ -0,0 +1,55 @@ +#pragma clang system_header +// Copyright 2022 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. + +#ifndef TCMALLOC_MALLOC_TRACING_EXTENSION_H_ +#define TCMALLOC_MALLOC_TRACING_EXTENSION_H_ + +#include +#include +#include + +#include "absl/status/statusor.h" + +namespace tcmalloc { +namespace malloc_tracing_extension { + +// Type used by GetAllocatedAddressRanges. Contains details of address ranges +// that have a corresponding Span in TCMalloc. +struct AllocatedAddressRanges { + struct SpanDetails { + uintptr_t start_addr; + size_t size; + // For Spans with objects that fit into some size-class, object_size is + // actually the size-class bytes, not the exact object size bytes. + // This is zero for non-size-class objects that are objects larger than + // kMaxSize. + size_t object_size; + }; + // Note that any subset of size-class-sized objects may be currently + // allocated from each Span. + std::vector spans; +}; +// Returns the address ranges currently allocated by TCMalloc. +absl::StatusOr GetAllocatedAddressRanges(); + +} // namespace malloc_tracing_extension +} // namespace tcmalloc + +#endif // TCMALLOC_MALLOC_TRACING_EXTENSION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/metadata_allocator.h b/contrib/libs/tcmalloc/tcmalloc/metadata_allocator.h new file mode 100644 index 000000000000..e0cbaa78711b --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/metadata_allocator.h @@ -0,0 +1,41 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_METADATA_ALLOCATOR_H_ +#define TCMALLOC_METADATA_ALLOCATOR_H_ + +#include + +#include "absl/base/attributes.h" + +namespace tcmalloc::tcmalloc_internal { + +class MetadataAllocator { + public: + MetadataAllocator() = default; + virtual ~MetadataAllocator() = default; + + MetadataAllocator(const MetadataAllocator&) = delete; + MetadataAllocator(MetadataAllocator&&) = delete; + MetadataAllocator& operator=(const MetadataAllocator&) = delete; + MetadataAllocator& operator=(MetadataAllocator&&) = delete; + + // Allocates bytes suitable for metadata. + [[nodiscard]] virtual void* operator()(size_t bytes) = 0; +}; + +} // namespace tcmalloc::tcmalloc_internal + +#endif // TCMALLOC_METADATA_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/metadata_object_allocator.h b/contrib/libs/tcmalloc/tcmalloc/metadata_object_allocator.h new file mode 100644 index 000000000000..b6d34650f59b --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/metadata_object_allocator.h @@ -0,0 +1,142 @@ +#pragma clang system_header +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_METADATA_OBJECT_ALLOCATOR_H_ +#define TCMALLOC_METADATA_OBJECT_ALLOCATOR_H_ + +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/allocation_guard.h" +#include "tcmalloc/internal/config.h" + +#ifdef ABSL_HAVE_ADDRESS_SANITIZER +#include +#endif + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +struct AllocatorStats { + // Number of allocated but unfreed objects + size_t in_use; + // Number of objects created (both free and allocated) + size_t total; +}; + +// Simple allocator for objects of a specified type. External locking +// is required before accessing one of these objects. +template +class MetadataObjectAllocator { + public: + constexpr explicit MetadataObjectAllocator( + Arena& arena ABSL_ATTRIBUTE_LIFETIME_BOUND) + : arena_(&arena), free_list_(nullptr), stats_{0, 0} {} + + // Allocates storage for a T. + // + // Once New() has been invoked to allocate storage, it is no longer safe to + // request an overaligned instance via NewWithSize as the underaligned result + // may be freelisted. + template + [[nodiscard]] ABSL_ATTRIBUTE_RETURNS_NONNULL T* New(Args&&... args) { + return NewWithSize(sizeof(T), static_cast(alignof(T)), + std::forward(args)...); + } + + template + [[nodiscard]] ABSL_ATTRIBUTE_RETURNS_NONNULL T* NewWithSize( + size_t size, std::align_val_t align, Args&&... args) { + T* ret = LockAndAllocMemory(size, align); + return new (ret) T(std::forward(args)...); + } + + void Delete(T* p) ABSL_ATTRIBUTE_NONNULL() { + p->~T(); + LockAndDeleteMemory(p); + } + + AllocatorStats stats() const { + AllocationGuardSpinLockHolder l(&metadata_lock_); + + return stats_; + } + + private: + ABSL_ATTRIBUTE_RETURNS_NONNULL T* LockAndAllocMemory(size_t size, + std::align_val_t align) { + TC_ASSERT_GE(static_cast(align), alignof(T)); + + AllocationGuardSpinLockHolder l(&metadata_lock_); + + // Consult free list + T* result = free_list_; + stats_.in_use++; + if (ABSL_PREDICT_FALSE(result == nullptr)) { + stats_.total++; + result = reinterpret_cast(arena_->Alloc(size, align)); + ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(result, size); + return result; + } else { +#ifdef ABSL_HAVE_ADDRESS_SANITIZER + // Unpoison the object on the freelist. + ASAN_UNPOISON_MEMORY_REGION(result, size); +#endif + } + free_list_ = *(reinterpret_cast(free_list_)); + ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(result, size); + return result; + } + + void LockAndDeleteMemory(T* p) ABSL_ATTRIBUTE_NONNULL() { + AllocationGuardSpinLockHolder l(&metadata_lock_); + + *(reinterpret_cast(p)) = free_list_; +#ifdef ABSL_HAVE_ADDRESS_SANITIZER + // Poison the object on the freelist. We do not dereference it after this + // point. + ASAN_POISON_MEMORY_REGION(p, sizeof(*p)); +#endif + free_list_ = p; + stats_.in_use--; + } + + // Arena from which to allocate memory + Arena* arena_; + + mutable absl::base_internal::SpinLock metadata_lock_{ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; + + // Free list of already carved objects + T* free_list_ ABSL_GUARDED_BY(metadata_lock_); + + AllocatorStats stats_ ABSL_GUARDED_BY(metadata_lock_); +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_METADATA_OBJECT_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc index 13308b947a9c..9fd3d91fb4fe 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc +++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc @@ -15,17 +15,34 @@ #include "tcmalloc/mock_central_freelist.h" #include "absl/base/internal/spinlock.h" +#include "absl/types/span.h" #include "tcmalloc/internal/logging.h" namespace tcmalloc { namespace tcmalloc_internal { -void MinimalFakeCentralFreeList::AllocateBatch(void** batch, int n) { - for (int i = 0; i < n; ++i) batch[i] = &batch[i]; +void RealCentralFreeListForTesting::AllocateBatch(absl::Span batch) { + int total = 0; + + while (total < batch.size()) { + const int to_remove = batch.size() - total; + const int removed = RemoveRange(batch.subspan(total)); + ASSERT_GT(removed, 0); + ASSERT_LE(removed, to_remove); + total += removed; + } +} + +void RealCentralFreeListForTesting::FreeBatch(absl::Span batch) { + InsertRange(batch); +} + +void MinimalFakeCentralFreeList::AllocateBatch(absl::Span batch) { + for (void*& v : batch) v = &v; } void MinimalFakeCentralFreeList::FreeBatch(absl::Span batch) { - for (void* x : batch) CHECK_CONDITION(x != nullptr); + for (void* x : batch) TC_CHECK_NE(x, nullptr); } void MinimalFakeCentralFreeList::InsertRange(absl::Span batch) { @@ -33,14 +50,14 @@ void MinimalFakeCentralFreeList::InsertRange(absl::Span batch) { FreeBatch(batch); } -int MinimalFakeCentralFreeList::RemoveRange(void** batch, int n) { +int MinimalFakeCentralFreeList::RemoveRange(absl::Span batch) { absl::base_internal::SpinLockHolder h(&lock_); - AllocateBatch(batch, n); - return n; + AllocateBatch(batch); + return batch.size(); } -void FakeCentralFreeList::AllocateBatch(void** batch, int n) { - for (int i = 0; i < n; ++i) { +void FakeCentralFreeList::AllocateBatch(absl::Span batch) { + for (int i = 0; i < batch.size(); ++i) { batch[i] = ::operator new(4); } } @@ -55,9 +72,9 @@ void FakeCentralFreeList::InsertRange(absl::Span batch) { FreeBatch(batch); } -int FakeCentralFreeList::RemoveRange(void** batch, int n) { - AllocateBatch(batch, n); - return n; +int FakeCentralFreeList::RemoveRange(absl::Span batch) { + AllocateBatch(batch); + return batch.size(); } } // namespace tcmalloc_internal diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h index c2a56c0c6088..f70690727e9b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h +++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2020 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,10 +21,23 @@ #include "gmock/gmock.h" #include "absl/base/internal/spinlock.h" #include "absl/types/span.h" +#include "tcmalloc/central_freelist.h" namespace tcmalloc { namespace tcmalloc_internal { +// CentralFreeList implementation that uses a real central freelist to allocate +// objects. It implements additional methods used by benchmarks and tests. +// +// This is useful for benchmarking in cases where, for instance, we can test the +// efficiency of TCMalloc's frontend as it has to access real central freelist +// upon a miss. +class RealCentralFreeListForTesting : public CentralFreeList { + public: + void AllocateBatch(absl::Span batch); + void FreeBatch(absl::Span batch); +}; + class FakeCentralFreeListBase { public: FakeCentralFreeListBase() {} @@ -40,9 +54,9 @@ class FakeCentralFreeListBase { class FakeCentralFreeList : public FakeCentralFreeListBase { public: void InsertRange(absl::Span batch); - int RemoveRange(void** batch, int N); + [[nodiscard]] int RemoveRange(absl::Span batch); - void AllocateBatch(void** batch, int n); + void AllocateBatch(absl::Span batch); void FreeBatch(absl::Span batch); }; @@ -53,9 +67,9 @@ class FakeCentralFreeList : public FakeCentralFreeListBase { class MinimalFakeCentralFreeList : public FakeCentralFreeListBase { public: void InsertRange(absl::Span batch); - int RemoveRange(void** batch, int N); + [[nodiscard]] int RemoveRange(absl::Span batch); - void AllocateBatch(void** batch, int n); + void AllocateBatch(absl::Span batch); void FreeBatch(absl::Span batch); private: @@ -72,13 +86,13 @@ class RawMockCentralFreeList : public FakeCentralFreeList { ON_CALL(*this, InsertRange).WillByDefault([this](absl::Span batch) { return static_cast(this)->InsertRange(batch); }); - ON_CALL(*this, RemoveRange).WillByDefault([this](void** batch, int n) { - return static_cast(this)->RemoveRange(batch, n); + ON_CALL(*this, RemoveRange).WillByDefault([this](absl::Span batch) { + return static_cast(this)->RemoveRange(batch); }); } MOCK_METHOD(void, InsertRange, (absl::Span batch)); - MOCK_METHOD(int, RemoveRange, (void** batch, int N)); + MOCK_METHOD(int, RemoveRange, (absl::Span batch)); }; using MockCentralFreeList = testing::NiceMock; diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.cc b/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.cc new file mode 100644 index 000000000000..3625d623510d --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.cc @@ -0,0 +1,13 @@ +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.h b/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.h new file mode 100644 index 000000000000..e1da0f38eca8 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/mock_huge_page_static_forwarder.h @@ -0,0 +1,243 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_MOCK_HUGE_PAGE_STATIC_FORWARDER_H_ +#define TCMALLOC_MOCK_HUGE_PAGE_STATIC_FORWARDER_H_ + +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" +#include "absl/base/internal/low_level_alloc.h" +#include "absl/base/thread_annotations.h" +#include "absl/container/flat_hash_map.h" +#include "absl/hash/hash.h" +#include "absl/numeric/bits.h" +#include "absl/time/time.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/span.h" +#include "tcmalloc/system-alloc.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { +namespace huge_page_allocator_internal { + +class FakeStaticForwarder { + public: + // Runtime parameters. This can change between calls. + absl::Duration filler_skip_subrelease_interval() { + return subrelease_interval_; + } + absl::Duration filler_skip_subrelease_short_interval() { + return short_interval_; + } + absl::Duration filler_skip_subrelease_long_interval() { + return long_interval_; + } + absl::Duration cache_demand_release_short_interval() { + return cache_demand_release_short_interval_; + } + absl::Duration cache_demand_release_long_interval() { + return cache_demand_release_long_interval_; + } + bool release_partial_alloc_pages() { return release_partial_alloc_pages_; } + bool hpaa_subrelease() const { return hpaa_subrelease_; } + + void set_filler_skip_subrelease_interval(absl::Duration value) { + subrelease_interval_ = value; + } + void set_filler_skip_subrelease_short_interval(absl::Duration value) { + short_interval_ = value; + } + void set_filler_skip_subrelease_long_interval(absl::Duration value) { + long_interval_ = value; + } + void set_cache_demand_release_short_interval(absl::Duration value) { + cache_demand_release_short_interval_ = value; + } + void set_cache_demand_release_long_interval(absl::Duration value) { + cache_demand_release_long_interval_ = value; + } + void set_release_partial_alloc_pages(bool value) { + release_partial_alloc_pages_ = value; + } + void set_hpaa_subrelease(bool value) { hpaa_subrelease_ = value; } + bool release_succeeds() const { return release_succeeds_; } + void set_release_succeeds(bool value) { release_succeeds_ = value; } + + bool huge_region_demand_based_release() const { + return huge_region_demand_based_release_; + } + void set_huge_region_demand_based_release(bool value) { + huge_region_demand_based_release_ = value; + } + + bool huge_cache_demand_based_release() const { + return huge_cache_demand_based_release_; + } + void set_huge_cache_demand_based_release(bool value) { + huge_cache_demand_based_release_ = value; + } + + // Arena state. + Arena& arena() { return arena_; } + + // PageAllocator state. + + // Check page heap memory limit. `n` indicates the size of the allocation + // currently being made, which will not be included in the sampled memory heap + // for realized fragmentation estimation. + void ShrinkToUsageLimit(Length n) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {} + + // PageMap state. + [[nodiscard]] void* GetHugepage(HugePage p) { + auto it = trackers_.find(p); + if (it == trackers_.end()) { + return nullptr; + } + return it->second; + } + [[nodiscard]] bool Ensure(Range r) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return true; + } + void Set(PageId page, Span* span) {} + void SetHugepage(HugePage p, void* pt) { trackers_[p] = pt; } + + // SpanAllocator state. + [[nodiscard]] Span* NewSpan(Range r) +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_ATTRIBUTE_RETURNS_NONNULL { + Span* span; + void* result = absl::base_internal::LowLevelAlloc::AllocWithArena( + sizeof(*span) + alignof(Span) + sizeof(void*), ll_arena()); + span = new (reinterpret_cast( + (reinterpret_cast(result) + alignof(Span) - 1u) & + ~(alignof(Span) - 1u))) Span(r); + *(reinterpret_cast(span + 1)) = + reinterpret_cast(result); + return span; + } + void DeleteSpan(Span* span) +#ifdef TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) +#endif // TCMALLOC_INTERNAL_LEGACY_LOCKING + ABSL_ATTRIBUTE_NONNULL() { + absl::base_internal::LowLevelAlloc::Free( + reinterpret_cast(*(reinterpret_cast(span + 1)))); + } + + // SystemAlloc state. + [[nodiscard]] AddressRange AllocatePages(size_t bytes, size_t align, + MemoryTag tag) { + TC_CHECK(absl::has_single_bit(align), "align=%v", align); + uintptr_t allocation, aligned_allocation, new_allocation; + do { + allocation = fake_allocation_.load(std::memory_order_relaxed); + aligned_allocation = (allocation + align - 1u) & ~(align - 1u); + new_allocation = aligned_allocation + bytes; + } while (!fake_allocation_.compare_exchange_weak( + allocation, new_allocation, std::memory_order_relaxed)); + + AddressRange ret{ + reinterpret_cast(aligned_allocation | + (static_cast(tag) << kTagShift)), + bytes}; + return ret; + } + void Back(Range r) {} + [[nodiscard]] bool ReleasePages(Range r) { + const uintptr_t start = + reinterpret_cast(r.p.start_addr()) & ~kTagMask; + const uintptr_t end = start + r.n.in_bytes(); + TC_CHECK_LE(end, fake_allocation_); + + return release_succeeds_; + } + + private: + static absl::base_internal::LowLevelAlloc::Arena* ll_arena() { + ABSL_CONST_INIT static absl::base_internal::LowLevelAlloc::Arena* a; + ABSL_CONST_INIT static absl::once_flag flag; + absl::base_internal::LowLevelCallOnce(&flag, [&]() { + a = absl::base_internal::LowLevelAlloc::NewArena( + absl::base_internal::LowLevelAlloc::kAsyncSignalSafe); + }); + return a; + } + absl::Duration subrelease_interval_; + absl::Duration short_interval_ = absl::Seconds(60); + absl::Duration long_interval_ = absl::Seconds(300); + absl::Duration cache_demand_release_short_interval_ = absl::Seconds(10); + absl::Duration cache_demand_release_long_interval_ = absl::Seconds(30); + bool release_partial_alloc_pages_ = false; + bool hpaa_subrelease_ = true; + bool release_succeeds_ = true; + bool huge_region_demand_based_release_ = false; + bool huge_cache_demand_based_release_ = false; + Arena arena_; + + std::atomic fake_allocation_ = 0x1000; + + template + class AllocAdaptor final { + public: + using value_type = T; + + AllocAdaptor() = default; + AllocAdaptor(const AllocAdaptor&) = default; + + template + using rebind = AllocAdaptor; + + template + explicit AllocAdaptor(const AllocAdaptor&) {} + + T* allocate(size_t n) { + // Check if n is too big to allocate. + TC_ASSERT_EQ((n * sizeof(T)) / sizeof(T), n); + return static_cast(absl::base_internal::LowLevelAlloc::AllocWithArena( + n * sizeof(T), ll_arena())); + } + void deallocate(T* p, size_t n) { + absl::base_internal::LowLevelAlloc::Free(p); + } + }; + + absl::flat_hash_map, + std::equal_to, + AllocAdaptor>> + trackers_; +}; + +} // namespace huge_page_allocator_internal +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_MOCK_HUGE_PAGE_STATIC_FORWARDER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_metadata_allocator.h b/contrib/libs/tcmalloc/tcmalloc/mock_metadata_allocator.h new file mode 100644 index 000000000000..eee60193bf90 --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/mock_metadata_allocator.h @@ -0,0 +1,47 @@ +#pragma clang system_header +// Copyright 2023 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_MOCK_METADATA_ALLOCATOR_H_ +#define TCMALLOC_MOCK_METADATA_ALLOCATOR_H_ + +#include +#include + +#include "absl/base/attributes.h" +#include "tcmalloc/metadata_allocator.h" + +namespace tcmalloc::tcmalloc_internal { + +class FakeMetadataAllocator final : public MetadataAllocator { + public: + ~FakeMetadataAllocator() override { + for (void* p : metadata_allocs_) { + free(p); + } + } + + [[nodiscard]] void* operator()(size_t size) override { + void* ptr = malloc(size); + metadata_allocs_.push_back(ptr); + return ptr; + } + + private: + std::vector metadata_allocs_; +}; + +} // namespace tcmalloc::tcmalloc_internal + +#endif // TCMALLOC_MOCK_METADATA_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_static_forwarder.h b/contrib/libs/tcmalloc/tcmalloc/mock_static_forwarder.h new file mode 100644 index 000000000000..5854dbcc6fab --- /dev/null +++ b/contrib/libs/tcmalloc/tcmalloc/mock_static_forwarder.h @@ -0,0 +1,242 @@ +#pragma clang system_header +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_MOCK_STATIC_FORWARDER_H_ +#define TCMALLOC_MOCK_STATIC_FORWARDER_H_ + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "tcmalloc/pages.h" +#include "tcmalloc/span.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class FakeStaticForwarder { + public: + FakeStaticForwarder() : class_size_(0), pages_() {} + void Init(size_t class_size, size_t pages, size_t num_objects_to_move, + bool use_large_spans) { + class_size_ = class_size; + pages_ = Length(pages); + num_objects_to_move_ = num_objects_to_move; + use_large_spans_ = use_large_spans; + TC_ASSERT_LE(max_span_cache_size(), max_span_cache_array_size()); + clock_ = 1234; + } + uint64_t clock_now() const { return clock_; } + double clock_frequency() const { + return absl::ToDoubleNanoseconds(absl::Seconds(2)); + } + void AdvanceClock(absl::Duration d) { + clock_ += absl::ToDoubleSeconds(d) * clock_frequency(); + } + + size_t class_to_size(int size_class) const { return class_size_; } + Length class_to_pages(int size_class) const { return pages_; } + size_t num_objects_to_move() const { return num_objects_to_move_; } + uint32_t max_span_cache_size() const { + return use_large_spans_ ? Span::kLargeCacheSize : Span::kCacheSize; + } + uint32_t max_span_cache_array_size() const { + return use_large_spans_ ? Span::kLargeCacheArraySize : Span::kCacheSize; + } + + void MapObjectsToSpans(absl::Span batch, Span** spans, + int expected_size_class) { + for (size_t i = 0; i < batch.size(); ++i) { + spans[i] = MapObjectToSpan(batch[i]); + } + } + + [[nodiscard]] Span* MapObjectToSpan(const void* object) { + const PageId page = PageIdContaining(object); + + absl::MutexLock l(&mu_); + auto it = map_.lower_bound(page); + if (it->first != page && it != map_.begin()) { + --it; + } + + if (it->first <= page && page <= it->second.span->last_page()) { + return it->second.span; + } + + return nullptr; + } + + [[nodiscard]] Span* AllocateSpan(int, size_t objects_per_span, + Length pages_per_span) { + void* backing = + ::operator new(pages_per_span.in_bytes(), std::align_val_t(kPageSize)); + PageId page = PageIdContaining(backing); + + void* span_buf = + ::operator new(Span::CalcSizeOf(max_span_cache_array_size()), + Span::CalcAlignOf(max_span_cache_array_size())); + TC_ASSERT_LE(max_span_cache_size(), max_span_cache_array_size()); + + auto* span = new (span_buf) Span(Range(page, pages_per_span)); + + absl::MutexLock l(&mu_); + SpanInfo info; + info.span = span; + SpanAllocInfo span_alloc_info = { + .objects_per_span = objects_per_span, + .density = AccessDensityPrediction::kSparse}; + info.span_alloc_info = span_alloc_info; + map_.emplace(page, info); + return span; + } + + void DeallocateSpans(size_t, absl::Span free_spans) { + { + absl::MutexLock l(&mu_); + for (Span* span : free_spans) { + auto it = map_.find(span->first_page()); + EXPECT_NE(it, map_.end()); + map_.erase(it); + } + } + + const std::align_val_t span_alignment = + Span::CalcAlignOf(max_span_cache_array_size()); + + for (Span* span : free_spans) { + ::operator delete(span->start_address(), std::align_val_t(kPageSize)); + + span->~Span(); + ::operator delete(span, span_alignment); + } + } + + private: + struct SpanInfo { + Span* span; + SpanAllocInfo span_alloc_info; + }; + + absl::Mutex mu_; + std::map map_ ABSL_GUARDED_BY(mu_); + size_t class_size_; + Length pages_; + size_t num_objects_to_move_; + bool use_large_spans_; + uint64_t clock_; +}; + +class RawMockStaticForwarder : public FakeStaticForwarder { + public: + RawMockStaticForwarder() { + ON_CALL(*this, class_to_size).WillByDefault([this](int size_class) { + return FakeStaticForwarder::class_to_size(size_class); + }); + ON_CALL(*this, class_to_pages).WillByDefault([this](int size_class) { + return FakeStaticForwarder::class_to_pages(size_class); + }); + ON_CALL(*this, num_objects_to_move).WillByDefault([this]() { + return FakeStaticForwarder::num_objects_to_move(); + }); + ON_CALL(*this, Init) + .WillByDefault([this](size_t size_class, size_t pages, + size_t num_objects_to_move, + bool use_large_spans) { + FakeStaticForwarder::Init(size_class, pages, num_objects_to_move, + use_large_spans); + }); + + ON_CALL(*this, MapObjectsToSpans) + .WillByDefault([this](absl::Span batch, Span** spans, + int expected_size_class) { + return FakeStaticForwarder::MapObjectsToSpans(batch, spans, + expected_size_class); + }); + ON_CALL(*this, AllocateSpan) + .WillByDefault([this](int size_class, size_t objects_per_span, + Length pages_per_span) { + return FakeStaticForwarder::AllocateSpan(size_class, objects_per_span, + pages_per_span); + }); + ON_CALL(*this, DeallocateSpans) + .WillByDefault([this](size_t objects_per_span, + absl::Span free_spans) { + FakeStaticForwarder::DeallocateSpans(objects_per_span, free_spans); + }); + } + + MOCK_METHOD(size_t, class_to_size, (int size_class)); + MOCK_METHOD(Length, class_to_pages, (int size_class)); + MOCK_METHOD(size_t, num_objects_to_move, ()); + MOCK_METHOD(void, Init, + (size_t class_size, size_t pages, size_t num_objects_to_move, + bool use_large_spans)); + MOCK_METHOD(void, MapObjectsToSpans, + (absl::Span batch, Span** spans, int expected_size_class)); + MOCK_METHOD(Span*, AllocateSpan, + (int size_class, size_t objects_per_span, Length pages_per_span)); + MOCK_METHOD(void, DeallocateSpans, + (size_t object_per_span, absl::Span free_spans)); +}; + +using MockStaticForwarder = testing::NiceMock; + +// Wires up a largely functional CentralFreeList + MockStaticForwarder. +// +// By default, it fills allocations and responds sensibly. Because it backs +// onto malloc/free, it will detect leaks and memory misuse when run under +// sanitizers. +// +// Exposes the underlying mocks to allow for more whitebox tests. +template +class FakeCentralFreeListEnvironment { + public: + using CentralFreeList = CentralFreeListT; + using Forwarder = typename CentralFreeListT::Forwarder; + + static constexpr int kSizeClass = 1; + size_t objects_per_span() { + return forwarder().class_to_pages(kSizeClass).in_bytes() / + forwarder().class_to_size(kSizeClass); + } + size_t batch_size() { return forwarder().num_objects_to_move(); } + + explicit FakeCentralFreeListEnvironment(size_t class_size, size_t pages, + size_t num_objects_to_move, + bool use_large_spans) { + forwarder().Init(class_size, pages, num_objects_to_move, use_large_spans); + cache_.Init(kSizeClass); + } + + ~FakeCentralFreeListEnvironment() { EXPECT_EQ(cache_.length(), 0); } + + CentralFreeList& central_freelist() { return cache_; } + + Forwarder& forwarder() { return cache_.forwarder(); } + + private: + CentralFreeList cache_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_MOCK_STATIC_FORWARDER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc index b8b2bcf13102..075928919849 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc @@ -17,8 +17,12 @@ namespace tcmalloc { namespace tcmalloc_internal { -int FakeTransferCacheManager::DetermineSizeClassToEvict() { return 3; } -bool FakeTransferCacheManager::ShrinkCache(int) { return true; } - +ABSL_CONST_INIT bool + ArenaBasedFakeTransferCacheManager::partial_legacy_transfer_cache_(false); +ABSL_CONST_INIT bool FakeShardedTransferCacheManager::enable_generic_cache_( + false); +ABSL_CONST_INIT bool + FakeShardedTransferCacheManager::enable_cache_for_large_classes_only_( + false); } // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h index 5b5192f6dc9b..511ec8d9b4a4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h @@ -1,3 +1,4 @@ +#pragma clang system_header // Copyright 2020 The TCMalloc Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,13 +20,17 @@ #include #include +#include #include +#include #include "gmock/gmock.h" +#include "gtest/gtest.h" #include "absl/random/distributions.h" #include "absl/random/random.h" #include "tcmalloc/common.h" #include "tcmalloc/mock_central_freelist.h" +#include "tcmalloc/transfer_cache.h" #include "tcmalloc/transfer_cache_internals.h" namespace tcmalloc { @@ -33,66 +38,91 @@ namespace tcmalloc_internal { inline constexpr size_t kClassSize = 8; inline constexpr size_t kNumToMove = 32; -inline constexpr int kSizeClass = 0; +inline constexpr int kSizeClass = 1; -class FakeTransferCacheManagerBase { +// TransferCacheManager with basic stubs for everything. +// +// Useful for benchmarks where you want to unrelated expensive operations. +class FakeTransferCacheManager { public: constexpr static size_t class_to_size(int size_class) { return kClassSize; } constexpr static size_t num_objects_to_move(int size_class) { // TODO(b/170732338): test with multiple different num_objects_to_move return kNumToMove; } - void* Alloc(size_t size) { - memory_.emplace_back(::operator new(size)); - return memory_.back().get(); + void* Alloc(size_t size, std::align_val_t alignment = kAlignment) { + memory_.push_back(std::make_unique( + ::operator new(size, alignment), alignment)); + return memory_.back()->ptr; } - struct Free { - void operator()(void* b) { ::operator delete(b); } - }; private: - std::vector> memory_; + struct AlignedPtr { + AlignedPtr(void* ptr, std::align_val_t alignment) + : ptr(ptr), alignment(alignment) {} + ~AlignedPtr() { ::operator delete(ptr, alignment); } + void* ptr; + std::align_val_t alignment; + }; + std::vector> memory_; }; -// TransferCacheManager with basic stubs for everything. +// A transfer cache manager which wraps malloc. // -// Useful for benchmarks where you want to unrelated expensive operations. -class FakeTransferCacheManager : public FakeTransferCacheManagerBase { +// TODO(b/175334169): Remove this once the locks are no longer used. +class ArenaBasedFakeTransferCacheManager { public: - int DetermineSizeClassToEvict(); - bool ShrinkCache(int); + ArenaBasedFakeTransferCacheManager() = default; + constexpr static size_t class_to_size(int size_class) { + // Chosen >= min size for the sharded transfer cache to kick in. + if (size_class == kSizeClass) return 4096; + return 0; + } + constexpr static size_t num_objects_to_move(int size_class) { + if (size_class == kSizeClass) return kNumToMove; + return 0; + } + void* Alloc(size_t size, std::align_val_t alignment = kAlignment) { + { + // Bounce pageheap_lock to verify we can take it. + // + // TODO(b/175334169): Remove this. + PageHeapSpinLockHolder l; + } + used_ += size; + return ::operator new(size, alignment); + } + size_t used() const { return used_; } + + static void SetPartialLegacyTransferCache(bool value) { + partial_legacy_transfer_cache_ = value; + } + + private: + size_t used_ = 0; + static bool partial_legacy_transfer_cache_; }; -// TransferCacheManager which allows intercepting intersting methods. -// -// Useful for intrusive unit tests that want to verify internal behavior. -class RawMockTransferCacheManager : public FakeTransferCacheManagerBase { +// A manager that may provide different configurations of sharded transfer +// cache. +class FakeShardedTransferCacheManager + : public ArenaBasedFakeTransferCacheManager { public: - RawMockTransferCacheManager() : FakeTransferCacheManagerBase() { - // We want single threaded tests to be deterministic, so we use a - // deterministic generator. Because we don't know about the threading for - // our tests we cannot keep the generator in a local variable. - ON_CALL(*this, ShrinkCache).WillByDefault([]() { - thread_local std::mt19937 gen{0}; - return absl::Bernoulli(gen, 0.8); - }); - ON_CALL(*this, GrowCache).WillByDefault([]() { - thread_local std::mt19937 gen{0}; - return absl::Bernoulli(gen, 0.8); - }); - ON_CALL(*this, DetermineSizeClassToEvict).WillByDefault([]() { - thread_local std::mt19937 gen{0}; - return absl::Uniform(gen, 1, kNumClasses); - }); + static void Init() {} + static bool UseGenericCache() { return enable_generic_cache_; } + static void SetGenericCache(bool value) { enable_generic_cache_ = value; } + static bool EnableCacheForLargeClassesOnly() { + return enable_cache_for_large_classes_only_; + } + static void SetCacheForLargeClassesOnly(bool value) { + enable_cache_for_large_classes_only_ = value; } - MOCK_METHOD(int, DetermineSizeClassToEvict, ()); - MOCK_METHOD(bool, ShrinkCache, (int size_class)); - MOCK_METHOD(bool, GrowCache, (int size_class)); + private: + static bool enable_generic_cache_; + static bool enable_cache_for_large_classes_only_; }; -using MockTransferCacheManager = testing::NiceMock; - // Wires up a largely functional TransferCache + TransferCacheManager + // MockCentralFreeList. // @@ -114,30 +144,30 @@ class FakeTransferCacheEnvironment { ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove; static constexpr int kBatchSize = Manager::num_objects_to_move(1); - FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) {} + FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) { Init(); } ~FakeTransferCacheEnvironment() { Drain(); } - void Shrink() { cache_.ShrinkCache(kSizeClass); } - void Grow() { cache_.GrowCache(kSizeClass); } + bool Shrink() { return cache_.ShrinkCache(kSizeClass); } + bool Grow() { return cache_.IncreaseCacheCapacity(kSizeClass); } - void Insert(int n) { + void Insert(int n, int batch = kBatchSize) { std::vector bufs; while (n > 0) { - int b = std::min(n, kBatchSize); + int b = std::min(n, batch); bufs.resize(b); - central_freelist().AllocateBatch(&bufs[0], b); + central_freelist().AllocateBatch(absl::MakeSpan(bufs)); cache_.InsertRange(kSizeClass, absl::MakeSpan(bufs)); n -= b; } } - void Remove(int n) { + void Remove(int n, int batch = kBatchSize) { std::vector bufs; while (n > 0) { - int b = std::min(n, kBatchSize); + int b = std::min(n, batch); bufs.resize(b); - int removed = cache_.RemoveRange(kSizeClass, &bufs[0], b); + int removed = cache_.RemoveRange(kSizeClass, absl::MakeSpan(bufs)); // Ensure we make progress. ASSERT_GT(removed, 0); ASSERT_LE(removed, b); @@ -146,6 +176,8 @@ class FakeTransferCacheEnvironment { } } + void TryPlunder() { cache_.TryPlunder(kSizeClass); } + void Drain() { Remove(cache_.tc_length()); } void RandomlyPoke() { @@ -160,20 +192,26 @@ class FakeTransferCacheEnvironment { Grow(); } else if (choice < 0.3) { cache_.HasSpareCapacity(kSizeClass); - } else if (choice < 0.65) { + } else if (choice < 0.4) { Insert(absl::Uniform(gen, 1, kBatchSize)); - } else { + } else if (choice < 0.5) { Remove(absl::Uniform(gen, 1, kBatchSize)); + } else if (choice < 0.7) { + Insert(kBatchSize); + } else if (choice < 0.9) { + Remove(kBatchSize); + } else { + TryPlunder(); } } TransferCache& transfer_cache() { return cache_; } - Manager& transfer_cache_manager() { return manager_; } - FreeList& central_freelist() { return cache_.freelist(); } private: + void Init() {}; + Manager manager_; TransferCache cache_; }; @@ -183,23 +221,22 @@ class FakeTransferCacheEnvironment { // inside the cache manager, like in production code. template class TransferCacheT> -class TwoSizeClassManager : public FakeTransferCacheManagerBase { +class TwoSizeClassManager : public FakeTransferCacheManager { public: using FreeList = FreeListT; using TransferCache = TransferCacheT; - // This is 3 instead of 2 because we hard code cl == 0 to be invalid in many - // places. We only use cl 1 and 2 here. - static constexpr int kSizeClasses = 3; + // This is 3 instead of 2 because we hard code size_class == 0 to be invalid + // in many places. We only use size_class 1 and 2 here. static constexpr size_t kClassSize1 = 8; - static constexpr size_t kClassSize2 = 16; + static constexpr size_t kClassSize2 = 16 << 10; static constexpr size_t kNumToMove1 = 32; - static constexpr size_t kNumToMove2 = 16; + static constexpr size_t kNumToMove2 = 2; TwoSizeClassManager() { - caches_.push_back(absl::make_unique(this, 0)); - caches_.push_back(absl::make_unique(this, 1)); - caches_.push_back(absl::make_unique(this, 2)); + caches_.push_back(std::make_unique(this, 0)); + caches_.push_back(std::make_unique(this, 1)); + caches_.push_back(std::make_unique(this, 2)); } constexpr static size_t class_to_size(int size_class) { @@ -223,87 +260,187 @@ class TwoSizeClassManager : public FakeTransferCacheManagerBase { } } - int DetermineSizeClassToEvict() { return evicting_from_; } - - bool ShrinkCache(int size_class) { - return caches_[size_class]->ShrinkCache(size_class); + void InsertRange(int size_class, absl::Span batch) { + caches_[size_class]->InsertRange(size_class, batch); } - FreeList& central_freelist(int cl) { return caches_[cl]->freelist(); } - - void InsertRange(int cl, absl::Span batch) { - caches_[cl]->InsertRange(cl, batch); + int RemoveRange(int size_class, absl::Span batch) { + return caches_[size_class]->RemoveRange(size_class, batch); } - int RemoveRange(int cl, void** batch, int N) { - return caches_[cl]->RemoveRange(cl, batch, N); + size_t tc_length(int size_class) { return caches_[size_class]->tc_length(); } + TransferCacheStats GetStats(int size_class) { + return caches_[size_class]->GetStats(); } - bool HasSpareCapacity(int cl) { return caches_[cl]->HasSpareCapacity(cl); } + std::vector> caches_; +}; + +class FakeCpuLayout { + public: + static constexpr int kNumCpus = 6; + static constexpr int kCpusPerShard = 2; - size_t tc_length(int cl) { return caches_[cl]->tc_length(); } + void Init(int shards) { + TC_ASSERT_GT(shards, 0); + TC_ASSERT_LE(shards * kCpusPerShard, kNumCpus); + num_shards_ = shards; + } - std::vector> caches_; + void SetCurrentCpu(int cpu) { + TC_ASSERT_GE(cpu, 0); + TC_ASSERT_LT(cpu, kNumCpus); + current_cpu_ = cpu; + } + + unsigned NumShards() { return num_shards_; } + int CurrentCpu() { return current_cpu_; } + unsigned CpuShard(int cpu) { + return std::min(cpu / kCpusPerShard, num_shards_ - 1); + } - // From which size class to evict. - int evicting_from_ = 1; + private: + int current_cpu_ = 0; + int num_shards_ = 0; }; -template