Skip to content

Commit

Permalink
[#12768] DocDB: Add TServer with faulty drive to LB blacklist
Browse files Browse the repository at this point in the history
Summary:
A TServer with faulty drive has fewer tablets than other TServers, which can cause Load Balancer to move tablets to this TServer, but moving tablets to TServer with faulty drive leads to tablet count disbalances on leftover healthy drives. Also, the replaced drive starts with 0 tablets while other drives keep tablets.
With this change, the Load Balancer effectively blacklists the TServer. The corresponding platform change ensures that the user is alerted about it and would fix the underlying issue. Once the user fixes the underlying disk issue and brings the TServer back up, then the TServer no longer reports faulty drive issues to the master and as a result the blacklist gets removed.

Test Plan: ybd --cxx-test load_balancer_mini_cluster-test

Reviewers: skedia, jhe, rthallam

Reviewed By: jhe

Subscribers: ybase, bogdan

Differential Revision: https://phabricator.dev.yugabyte.com/D17680
  • Loading branch information
mikhpolitov committed Jun 17, 2022
1 parent 1dcbebf commit 2549499
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 68 deletions.
45 changes: 2 additions & 43 deletions src/yb/fs/fs_manager-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

#include "yb/gutil/strings/util.h"

#include "yb/util/multi_drive_test_env.h"
#include "yb/util/status.h"
#include "yb/util/result.h"
#include "yb/util/test_macros.h"
Expand Down Expand Up @@ -308,52 +309,10 @@ TEST_F(FsManagerTestBase, MultiDriveWithoutMeta) {
ASSERT_OK(fs_manager()->ListTabletIds());
}

class FailedEmuEnv : public EnvWrapper {
public:
FailedEmuEnv() : EnvWrapper(Env::Default()) { }

Status NewRandomAccessFile(const std::string& f,
std::unique_ptr<RandomAccessFile>* r) override {
if (IsFailed(f)) {
return STATUS(IOError, "Test Error");
}
return target()->NewRandomAccessFile(f, r);
}

Status NewTempWritableFile(const WritableFileOptions& o, const std::string& t,
std::string* f, std::unique_ptr<WritableFile>* r) override {
if (IsFailed(t)) {
return STATUS(IOError, "Test Error");
}
return target()->NewTempWritableFile(o, t, f, r);
}

void AddFailedPath(const std::string& path) {
std::lock_guard<std::mutex> lock(data_mutex_);
failed_set_.emplace(path);
}

private:
bool IsFailed(const std::string& filename) const {
std::lock_guard<std::mutex> lock(data_mutex_);
if (failed_set_.empty()) {
return false;
}
auto it = failed_set_.lower_bound(filename);
if ((it == failed_set_.end() || *it != filename) && it != failed_set_.begin()) {
--it;
}
return boost::starts_with(filename, *it);
}

std::set<std::string> failed_set_ GUARDED_BY(data_mutex_);
mutable std::mutex data_mutex_;
};

class FsManagerTestDriveFault : public YBTest {
public:
void SetUp() override {
FailedEmuEnv* new_env = new FailedEmuEnv();
MultiDriveTestEnv* new_env = new MultiDriveTestEnv();
env_.reset(new_env);
new_env->AddFailedPath(GetTestPath(kFailedDrive));
YBTest::SetUp();
Expand Down
7 changes: 3 additions & 4 deletions src/yb/fs/fs_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ FsManager::FsManager(Env* env, const string& root_path, const std::string& serve
wal_fs_roots_({ root_path }),
data_fs_roots_({ root_path }),
server_type_(server_type),
metric_registry_(nullptr),
initted_(false) {
metric_registry_(nullptr) {
}

FsManager::FsManager(Env* env,
Expand All @@ -163,8 +162,7 @@ FsManager::FsManager(Env* env,
data_fs_roots_(opts.data_paths),
server_type_(opts.server_type),
metric_registry_(opts.metric_registry),
parent_mem_tracker_(opts.parent_mem_tracker),
initted_(false) {
parent_mem_tracker_(opts.parent_mem_tracker) {
}

FsManager::~FsManager() {
Expand Down Expand Up @@ -271,6 +269,7 @@ Status FsManager::CheckAndOpenFileSystemRoots() {
<< " Write Result: " << write_result;
canonicalized_wal_fs_roots_.erase(root);
canonicalized_data_fs_roots_.erase(root);
has_faulty_drive_ = true;
CreateAndSetFaultDriveMetric(root);
continue;
}
Expand Down
11 changes: 10 additions & 1 deletion src/yb/fs/fs_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,10 @@ class FsManager {
return read_only_;
}

bool has_faulty_drive() const {
return has_faulty_drive_;
}

// ==========================================================================
// file-system helpers
// ==========================================================================
Expand Down Expand Up @@ -300,7 +304,12 @@ class FsManager {

std::unique_ptr<InstanceMetadataPB> metadata_;

bool initted_;
// Keep references to counters, counters without reference will be retired.
std::vector<scoped_refptr<Counter>> counters_;

bool initted_ = false;

bool has_faulty_drive_ = false;

DISALLOW_COPY_AND_ASSIGN(FsManager);
};
Expand Down
67 changes: 49 additions & 18 deletions src/yb/integration-tests/load_balancer_mini_cluster-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "yb/tserver/tablet_server_options.h"

#include "yb/util/monotime.h"
#include "yb/util/multi_drive_test_env.h"

DECLARE_bool(enable_load_balancing);
DECLARE_bool(load_balancer_drive_aware);
Expand Down Expand Up @@ -179,31 +180,13 @@ class LoadBalancerMiniClusterTestWithoutData : public LoadBalancerMiniClusterTes

class LoadBalancerMiniClusterTest : public LoadBalancerMiniClusterTestBase {
protected:
void SetUp() override {
emu_env = new StatEmuEnv();
ts_env_.reset(emu_env);
ts_rocksdb_env_ = std::make_unique<rocksdb::EnvWrapper>(rocksdb::Env::Default());
YBTableTestBase::SetUp();
}

void BeforeStartCluster() override {
for (size_t i = 0; i < num_tablet_servers(); ++i) {
// ts (free, used, total)
emu_env->AddPathStats(mini_cluster()->GetTabletServerDrive(i, 0), {150, 50, 200});
emu_env->AddPathStats(mini_cluster()->GetTabletServerDrive(i, 1), {100, 100, 200});
emu_env->AddPathStats(mini_cluster()->GetTabletServerDrive(i, 2), { 50, 150, 200});
}
}

int num_drives() override {
return 3;
}

int num_tablets() override {
return 4;
}

StatEmuEnv* emu_env;
};

// See issue #6278. This test tests the segfault that used to occur during a rare race condition,
Expand Down Expand Up @@ -478,5 +461,53 @@ TEST_F(LoadBalancerMiniClusterTest, CheckLoadBalanceDriveAware) {
ASSERT_TRUE(found);
}

class LoadBalancerFailedDrive : public LoadBalancerMiniClusterTestBase {
protected:
void SetUp() override {
ts_env_.reset(new MultiDriveTestEnv());
ts_rocksdb_env_.reset(new rocksdb::MultiDriveTestEnv());
YBTableTestBase::SetUp();
}

void BeforeStartCluster() override {
auto ts1_drive0 = mini_cluster()->GetTabletServerDrive(0, 0);
dynamic_cast<MultiDriveTestEnv*>(ts_env_.get())->AddFailedPath(ts1_drive0);
dynamic_cast<rocksdb::MultiDriveTestEnv*>(ts_rocksdb_env_.get())->AddFailedPath(ts1_drive0);
}

int num_drives() override {
return 3;
}

int num_tablets() override {
return 4;
}

size_t num_tablet_servers() override {
return 4;
}
};

TEST_F(LoadBalancerFailedDrive, CheckTabletSizeData) {
WaitLoadBalancerActive(client_.get());
WaitLoadBalancerIdle(client_.get());

auto& catalog_manager =
ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster())->catalog_manager();

scoped_refptr<master::TableInfo> tbl_info = catalog_manager.
GetTableInfoFromNamespaceNameAndTableName(table_name().namespace_type(),
table_name().namespace_name(),
table_name().table_name());
auto tablets = tbl_info->GetTablets();
const auto ts1_uuid = mini_cluster_->mini_tablet_server(0)->server()->permanent_uuid();
for (const auto& tablet : tablets) {
auto replica_map = tablet->GetReplicaLocations();
for (const auto& replica : *replica_map.get()) {
EXPECT_NE(ts1_uuid, replica.first);
}
}
}

} // namespace integration_tests
} // namespace yb
6 changes: 6 additions & 0 deletions src/yb/master/cluster_balance_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,12 @@ void PerTableLoadState::UpdateTabletServer(std::shared_ptr<TSDescriptor> ts_desc
}
}

// Mark as blacklisted if ts has faulty drive.
if (!is_blacklisted && ts_meta.descriptor->has_faulty_drive()) {
blacklisted_servers_.insert(ts_uuid);
is_blacklisted = true;
}

// Mark as blacklisted leader if it matches.
bool is_leader_blacklisted = false;
for (const auto& hp : leader_blacklist_.hosts()) {
Expand Down
3 changes: 3 additions & 0 deletions src/yb/master/master_heartbeat.proto
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ message TSHeartbeatRequestPB {
reserved 13;

repeated TabletDriveStorageMetadataPB storage_metadata = 14;

// Tablet server has at least one faulty drive.
optional bool faulty_drive = 15;
}

message TSHeartbeatResponsePB {
Expand Down
9 changes: 9 additions & 0 deletions src/yb/master/ts_descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ TSDescriptor::TSDescriptor(std::string perm_id)
: permanent_uuid_(std::move(perm_id)),
last_heartbeat_(MonoTime::Now()),
has_tablet_report_(false),
has_faulty_drive_(false),
recent_replica_creations_(0),
last_replica_creations_decay_(MonoTime::Now()),
num_live_replicas_(0) {
Expand Down Expand Up @@ -171,6 +172,9 @@ void TSDescriptor::UpdateHeartbeat(const TSHeartbeatRequestPB* req) {
physical_time_ = req->ts_physical_time();
hybrid_time_ = HybridTime::FromPB(req->ts_hybrid_time());
heartbeat_rtt_ = MonoDelta::FromMicroseconds(req->rtt_us());
if (req->has_faulty_drive()) {
has_faulty_drive_ = req->faulty_drive();
}
}
}

Expand All @@ -195,6 +199,11 @@ void TSDescriptor::set_has_tablet_report(bool has_report) {
has_tablet_report_ = has_report;
}

bool TSDescriptor::has_faulty_drive() const {
SharedLock<decltype(lock_)> l(lock_);
return has_faulty_drive_;
}

bool TSDescriptor::registered_through_heartbeat() const {
SharedLock<decltype(lock_)> l(lock_);
return registered_through_heartbeat_;
Expand Down
5 changes: 5 additions & 0 deletions src/yb/master/ts_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ class TSDescriptor {
bool has_tablet_report() const;
void set_has_tablet_report(bool has_report);

bool has_faulty_drive() const;

bool registered_through_heartbeat() const;

// Returns TSRegistrationPB for this TSDescriptor.
Expand Down Expand Up @@ -369,6 +371,9 @@ class TSDescriptor {
// Set to true once this instance has reported all of its tablets.
bool has_tablet_report_;

// Tablet server has at least one faulty drive.
bool has_faulty_drive_;

// The number of times this tablet server has recently been selected to create a
// tablet replica. This value decays back to 0 over time.
double recent_replica_creations_;
Expand Down
3 changes: 3 additions & 0 deletions src/yb/tserver/heartbeater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,9 @@ Status Heartbeater::Thread::TryHeartbeat() {
req.set_config_index(server_->GetCurrentMasterIndex());
req.set_cluster_config_version(server_->cluster_config_version());
req.set_rtt_us(heartbeat_rtt_.ToMicroseconds());
if (server_->has_faulty_drive()) {
req.set_faulty_drive(true);
}

// Include the hybrid time of this tablet server in the heartbeat.
auto* hybrid_clock = dynamic_cast<server::HybridClock*>(server_->Clock());
Expand Down
2 changes: 2 additions & 0 deletions src/yb/tserver/tablet_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class TabletServer : public DbServerBase, public TabletServerIf {

const std::string& permanent_uuid() const override { return fs_manager_->uuid(); }

bool has_faulty_drive() const { return fs_manager_->has_faulty_drive(); }

// Returns the proxy to call this tablet server locally.
const std::shared_ptr<TabletServerServiceProxy>& proxy() const { return proxy_; }

Expand Down
4 changes: 2 additions & 2 deletions src/yb/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ add_dependencies(yb_util gen_version_info)

ADD_YB_TEST_LIBRARY(
yb_test_util
SRCS logging_test_util.cc memory/memory_usage_test_util.cc test_util.cc test_thread_holder.cc
DEPS gflags glog gmock yb_util)
SRCS logging_test_util.cc memory/memory_usage_test_util.cc test_util.cc test_thread_holder.cc multi_drive_test_env.cc
DEPS gflags glog gmock rocksdb yb_util)

#######################################
# yb_test_main
Expand Down

0 comments on commit 2549499

Please sign in to comment.