Skip to content

Commit

Permalink
[#1509]: Take stack traces in parallel
Browse files Browse the repository at this point in the history
Summary:
When stack traces of thread group is requested via the web interface, we get the stack trace of each thread individually.
Since usually there are hundreds of threads it takes a significant amount of time.
To speed up the process we could request stacks concurrently and then collect results at once.

Test Plan:
Launch local cluster.
Go to http://127.0.0.1:7000/threadz?group=all
Check all stack traces are shown.

Reviewers: mikhail, bogdan

Reviewed By: bogdan

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D6729
  • Loading branch information
spolitov committed Jun 13, 2019
1 parent 10190af commit 67b4a5b
Show file tree
Hide file tree
Showing 12 changed files with 362 additions and 153 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,11 @@ if (NOT APPLE)
ADD_THIRDPARTY_LIB(unwind
STATIC_LIB "${UNWIND_STATIC_LIB}"
SHARED_LIB "${UNWIND_SHARED_LIB}")
ADD_THIRDPARTY_LIB(unwind-arch
STATIC_LIB "${UNWIND_STATIC_ARCH_LIB}"
SHARED_LIB "${UNWIND_SHARED_ARCH_LIB}")
list(APPEND YB_BASE_LIBS unwind)
list(APPEND YB_BASE_LIBS unwind-arch)
endif()

# libuuid
Expand Down
21 changes: 20 additions & 1 deletion cmake_modules/FindLibUnwind.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,35 @@
# UNWIND_SHARED_LIB, path to libunwind's shared library
# UNWIND_STATIC_LIB, path to libunwind's static library

# For some reason, we have to link to two libunwind shared object files:
# one arch-specific and one not.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
SET(LIBUNWIND_ARCH "arm")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
SET(LIBUNWIND_ARCH "x86_64")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$")
SET(LIBUNWIND_ARCH "x86")
endif()

find_path(UNWIND_INCLUDE_DIR libunwind.h
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)

find_library(UNWIND_SHARED_LIB unwind
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)
find_library(UNWIND_SHARED_ARCH_LIB unwind-${LIBUNWIND_ARCH}
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)

find_library(UNWIND_STATIC_LIB libunwind.a
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)
find_library(UNWIND_STATIC_ARCH_LIB libunwind-${LIBUNWIND_ARCH}.a
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(UNWIND REQUIRED_VARS
UNWIND_SHARED_LIB UNWIND_STATIC_LIB UNWIND_INCLUDE_DIR)
UNWIND_SHARED_LIB UNWIND_SHARED_ARCH_LIB UNWIND_STATIC_LIB UNWIND_STATIC_ARCH_LIB
UNWIND_INCLUDE_DIR)
4 changes: 3 additions & 1 deletion src/yb/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ if(NOT APPLE)
set(UTIL_LIBS
${UTIL_LIBS}
rt
libbacktrace)
libbacktrace
unwind
unwind-arch)
endif()

if(${YB_TCMALLOC_AVAILABLE})
Expand Down
57 changes: 35 additions & 22 deletions src/yb/util/debug-util-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ namespace yb {

class DebugUtilTest : public YBTest {
protected:
void WaitForSleeperThreadNameInStackTrace(int64_t thread_id) {
void WaitForSleeperThreadNameInStackTrace(ThreadIdForStack thread_id) {
string stack;
for (int i = 0; i < 10000; i++) {
stack = DumpThreadStack(thread_id);
Expand Down Expand Up @@ -133,9 +133,33 @@ TEST_F(DebugUtilTest, TestGetStackTrace) {
// on the tgkill syscall which is not portable.
//
// TODO: it might be possible to enable other tests in this section to work on macOS.

TEST_F(DebugUtilTest, TestStackTraceInvalidTid) {
#if defined(__linux__)
ThreadIdForStack bad_tid = 1;
#else
ThreadIdForStack bad_tid = reinterpret_cast<ThreadIdForStack>(1);
#endif
string s = DumpThreadStack(bad_tid);
ASSERT_STR_CONTAINS(s, "Unable to deliver signal");
}

TEST_F(DebugUtilTest, TestStackTraceSelf) {
string s = DumpThreadStack(Thread::CurrentThreadIdForStack());
ASSERT_STR_CONTAINS(s, "yb::DebugUtilTest_TestStackTraceSelf_Test::TestBody()");
}

#if defined(__linux__)

TEST_F(DebugUtilTest, TestStackTraceMainThread) {
string s = DumpThreadStack(getpid());
ASSERT_STR_CONTAINS(s, "yb::DebugUtilTest_TestStackTraceMainThread_Test::TestBody()");
}

#endif

namespace {

void SleeperThread(CountDownLatch* l) {
// We use an infinite loop around WaitFor() instead of a normal Wait()
// so that this test passes in TSAN. Without this, we run into this TSAN
Expand All @@ -152,22 +176,8 @@ bool IsSignalHandlerRegistered(int signum) {
CHECK_EQ(0, sigaction(signum, nullptr, &cur_action));
return cur_action.sa_handler != SIG_DFL;
}
} // anonymous namespace

TEST_F(DebugUtilTest, TestStackTraceInvalidTid) {
string s = DumpThreadStack(1);
ASSERT_STR_CONTAINS(s, "Unable to deliver signal");
}

TEST_F(DebugUtilTest, TestStackTraceSelf) {
string s = DumpThreadStack(Thread::CurrentThreadId());
ASSERT_STR_CONTAINS(s, "yb::DebugUtilTest_TestStackTraceSelf_Test::TestBody()");
}

TEST_F(DebugUtilTest, TestStackTraceMainThread) {
string s = DumpThreadStack(getpid());
ASSERT_STR_CONTAINS(s, "yb::DebugUtilTest_TestStackTraceMainThread_Test::TestBody()");
}
} // anonymous namespace

TEST_F(DebugUtilTest, TestSignalStackTrace) {
CountDownLatch l(1);
Expand All @@ -176,7 +186,7 @@ TEST_F(DebugUtilTest, TestSignalStackTrace) {

// We have to loop a little bit because it takes a little while for the thread
// to start up and actually call our function.
WaitForSleeperThreadNameInStackTrace(t->tid());
WaitForSleeperThreadNameInStackTrace(t->tid_for_stack());

// Test that we can change the signal and that the stack traces still work,
// on the new signal.
Expand All @@ -191,7 +201,7 @@ TEST_F(DebugUtilTest, TestSignalStackTrace) {

// Stack traces should work using the new handler. We've had a test failure here when we ust had
// a one-time check, so we do the same waiting loop as in the beginning of the test.
WaitForSleeperThreadNameInStackTrace(t->tid());
WaitForSleeperThreadNameInStackTrace(t->tid_for_stack());

// Switch back to SIGUSR2 and ensure it changes back.
ASSERT_OK(SetStackTraceSignal(SIGUSR2));
Expand All @@ -200,7 +210,7 @@ TEST_F(DebugUtilTest, TestSignalStackTrace) {

// Stack traces should work using the new handler. Also has a test failure here, so using a retry
// loop.
WaitForSleeperThreadNameInStackTrace(t->tid());
WaitForSleeperThreadNameInStackTrace(t->tid_for_stack());

// Register our own signal handler on SIGUSR1, and ensure that
// we get a bad Status if we try to use it.
Expand All @@ -210,7 +220,7 @@ TEST_F(DebugUtilTest, TestSignalStackTrace) {
signal(SIGUSR1, SIG_IGN);

// Stack traces should be disabled
ASSERT_STR_CONTAINS(DumpThreadStack(t->tid()), "Unable to take thread stack");
ASSERT_STR_CONTAINS(DumpThreadStack(t->tid_for_stack()), "Unable to take thread stack");

// Re-enable so that other tests pass.
ASSERT_OK(SetStackTraceSignal(SIGUSR2));
Expand All @@ -220,17 +230,21 @@ TEST_F(DebugUtilTest, TestSignalStackTrace) {
t->Join();
}

#if defined(__linux__)

// Test which dumps all known threads within this process.
// We don't validate the results in any way -- but this verifies that we can
// dump library threads such as the libc timer_thread and properly time out.
TEST_F(DebugUtilTest, TestDumpAllThreads) {
vector<pid_t> tids;
std::vector<pid_t> tids;
ASSERT_OK(ListThreads(&tids));
for (pid_t tid : tids) {
LOG(INFO) << DumpThreadStack(tid);
}
}

#endif

// This will probably be really slow on Mac OS X, so only enabling on Linux.
TEST_F(DebugUtilTest, TestGetStackTraceInALoop) {
for (int i = 1; i <= 10000; ++i) {
Expand Down Expand Up @@ -258,7 +272,6 @@ TEST_F(DebugUtilTest, TestConcurrentStackTrace) {
thread.join();
}
}
#endif

TEST_F(DebugUtilTest, LongOperationTracker) {
struct TestLogSink : public google::LogSink {
Expand Down
Loading

0 comments on commit 67b4a5b

Please sign in to comment.