From 23a160aa13c2f15a6de17da8bb0786b8287cc222 Mon Sep 17 00:00:00 2001 From: Jonathan Hu Date: Fri, 4 Aug 2023 19:58:36 +0000 Subject: [PATCH] [BACKPORT 2.16.6][#18465] docdb: Dump PeerManager to tserver debug ui consensus page Summary: Original commit: 12a44ba1620045aee5dc328c2e605056114f7ee1 / D27568 To debug a case where a follower is possibly not tracked at leader side, we can dump PeerManager to the consensus state page and get a clear view of it. When dumping the peer manager, need to acquire the peer manager lock and the peer lock, if the peer is in a bad state (deadlock, etc), might get http request timed out. Jira: DB-7438 Test Plan: Manual test: Start rf-3 unverse, create a tablet. curl `LEADER_IP:9000/tablet-consensus-status?id=TABLET_ID` to check the result. Reviewers: qhu, bogdan Reviewed By: qhu, bogdan Subscribers: bogdan, ybase Differential Revision: https://phorge.dev.yugabyte.com/D28077 --- src/yb/consensus/consensus_peers.cc | 14 ++++++++++++++ src/yb/consensus/consensus_peers.h | 2 ++ src/yb/consensus/peer_manager.cc | 12 ++++++++++++ src/yb/consensus/peer_manager.h | 2 ++ src/yb/consensus/raft_consensus.cc | 2 ++ 5 files changed, 32 insertions(+) diff --git a/src/yb/consensus/consensus_peers.cc b/src/yb/consensus/consensus_peers.cc index 69949f3979b..670f8ec70f2 100644 --- a/src/yb/consensus/consensus_peers.cc +++ b/src/yb/consensus/consensus_peers.cc @@ -69,6 +69,7 @@ #include "yb/util/status_format.h" #include "yb/util/threadpool.h" #include "yb/util/tsan_util.h" +#include "yb/util/url-coding.h" using namespace std::literals; using namespace std::placeholders; @@ -199,6 +200,19 @@ Status Peer::SignalRequest(RequestTriggerMode trigger_mode) { return status; } +void Peer::DumpToHtml(std::ostream& out) const { + const auto peer_pb_str = EscapeForHtmlToString("Peer PB: " + peer_pb_.DebugString()); + out << "Peer:" << std::endl; + std::lock_guard lock(peer_lock_); + out << Format( + "", + EscapeForHtmlToString(Format("State: $0", state_)), + EscapeForHtmlToString(Format("Current Heartbeat Id: $0", cur_heartbeat_id_)), + EscapeForHtmlToString(Format("Failed Attempts: $0", failed_attempts_)), + peer_pb_str) + << std::endl; +} + void Peer::SendNextRequest(RequestTriggerMode trigger_mode) { auto retain_self = shared_from_this(); DCHECK(performing_update_mutex_.is_locked()) << "Cannot send request"; diff --git a/src/yb/consensus/consensus_peers.h b/src/yb/consensus/consensus_peers.h index f1fe0477426..7e4e7ef8eea 100644 --- a/src/yb/consensus/consensus_peers.h +++ b/src/yb/consensus/consensus_peers.h @@ -179,6 +179,8 @@ class Peer : public std::enable_shared_from_this { return failed_attempts_; } + void DumpToHtml(std::ostream& out) const; + private: void SendNextRequest(RequestTriggerMode trigger_mode); diff --git a/src/yb/consensus/peer_manager.cc b/src/yb/consensus/peer_manager.cc index e8720507819..7da618cc633 100644 --- a/src/yb/consensus/peer_manager.cc +++ b/src/yb/consensus/peer_manager.cc @@ -153,6 +153,18 @@ void PeerManager::ClosePeersNotInConfig(const RaftConfigPB& config) { } } +void PeerManager::DumpToHtml(std::ostream& out) const { + out << "

Peer Manager

" << std::endl; + out << "" << std::endl; +} + std::string PeerManager::LogPrefix() const { return MakeTabletLogPrefix(tablet_id_, local_uuid_); } diff --git a/src/yb/consensus/peer_manager.h b/src/yb/consensus/peer_manager.h index 2a74f56c7ec..d0ce9123491 100644 --- a/src/yb/consensus/peer_manager.h +++ b/src/yb/consensus/peer_manager.h @@ -91,6 +91,8 @@ class PeerManager { // Closes connections to those peers that are not in config. virtual void ClosePeersNotInConfig(const RaftConfigPB& config); + virtual void DumpToHtml(std::ostream& out) const; + private: std::string LogPrefix() const; diff --git a/src/yb/consensus/raft_consensus.cc b/src/yb/consensus/raft_consensus.cc index 8b6e3c705ea..5e7e00eec58 100644 --- a/src/yb/consensus/raft_consensus.cc +++ b/src/yb/consensus/raft_consensus.cc @@ -3206,6 +3206,8 @@ void RaftConsensus::DumpStatusHtml(std::ostream& out) const { role = state_->GetActiveRoleUnlocked(); } if (role == PeerRole::LEADER) { + peer_manager_->DumpToHtml(out); + out << "
" << std::endl; out << "

Queue overview

" << std::endl; out << "
" << EscapeForHtmlToString(queue_->ToString()) << "
" << std::endl; out << "
" << std::endl;