Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

fix task in recovered balance plan is still marked as FAILED #528

Merged
merged 3 commits into from
Jul 21, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/meta/processors/admin/AdminClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ folly::Future<Status> AdminClient::transLeader(GraphSpaceID spaceId,
if (it == peers.end()) {
return Status::PartNotFound();
}
if (peers.size() == 1 && peers.front() == leader) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make sure the leader is being selected?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, transfer leader will failed in some case when replica is 1. see #458.

// if there is only one replica, skip transfer leader phase
return Status::OK();
}
auto target = dst;
if (dst == kRandomPeer) {
for (auto& p : peers) {
Expand Down
7 changes: 2 additions & 5 deletions src/meta/processors/admin/Balancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ nebula::cpp2::ErrorCode Balancer::recovery() {
return recRet;
}
}
return nebula::cpp2::ErrorCode::SUCCEEDED;
// save the balance plan again because FAILED tasks would be marked as IN_PROGRESS again
return plan_->saveInStore();
}

nebula::cpp2::ErrorCode
Expand Down Expand Up @@ -303,10 +304,6 @@ Balancer::genTasks(GraphSpaceID spaceId,
}
}

if (confirmedHostParts.size() < 2) {
LOG(INFO) << "Too few hosts, no need for balance!";
return nebula::cpp2::ErrorCode::E_NO_VALID_HOST;
}
// 2. Make all hosts in confirmedHostParts balanced
if (balanceParts(plan_->id_, spaceId, confirmedHostParts, totalParts, tasks)) {
return tasks;
Expand Down
4 changes: 2 additions & 2 deletions src/meta/processors/partsMan/ListHostsProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ nebula::cpp2::ErrorCode ListHostsProcessor::fillLeaders() {
}
auto it = std::find(activeHosts.begin(), activeHosts.end(), host);
if (it == activeHosts.end()) {
LOG(INFO) << "skip inactive host: " << host;
VLOG(1) << "skip inactive host: " << host;
continue; // skip inactive host
}

Expand All @@ -209,7 +209,7 @@ nebula::cpp2::ErrorCode ListHostsProcessor::fillLeaders() {
});

if (hostIt == hostItems_.end()) {
LOG(INFO) << "skip inactive host";
VLOG(1) << "skip inactive host";
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion src/storage/StorageFlags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ DEFINE_int32(waiting_catch_up_retry_times, 30, "retry times when waiting for cat
DEFINE_int32(waiting_catch_up_interval_in_secs, 30,
"interval between two requests for catching up state");

DEFINE_int32(waiting_new_leader_retry_times, 30, "retry times when waiting for catching up data");
DEFINE_int32(waiting_new_leader_retry_times, 5, "retry times when waiting for new leader");

DEFINE_int32(waiting_new_leader_interval_in_secs, 5,
"interval between two requests for catching up state");
Expand Down