From c0a424e7ecdd5edaccb0031bc867f40f051332b0 Mon Sep 17 00:00:00 2001 From: Lei Zhao Date: Tue, 7 Jan 2020 18:21:10 +0800 Subject: [PATCH] deadlock: more solid role change observer (#6415) (#6431) Signed-off-by: youjiali1995 --- src/import/kv_service.rs | 2 +- src/import/metrics.rs | 2 +- src/import/mod.rs | 2 +- src/import/sst_importer.rs | 2 +- src/import/sst_service.rs | 2 +- src/server/lock_manager/deadlock.rs | 253 ++++++++++++++++++++-- src/server/lock_manager/mod.rs | 57 ++--- tests/integrations/server/lock_manager.rs | 194 +++++++++++++---- 8 files changed, 406 insertions(+), 108 deletions(-) diff --git a/src/import/kv_service.rs b/src/import/kv_service.rs index 45049e674bc9..e047e1ae18c6 100644 --- a/src/import/kv_service.rs +++ b/src/import/kv_service.rs @@ -17,7 +17,7 @@ use tikv_util::time::Instant; use super::client::*; use super::metrics::*; use super::service::*; -use super::{Config, Error, KVImporter, error_inc}; +use super::{error_inc, Config, Error, KVImporter}; #[derive(Clone)] pub struct ImportKVService { diff --git a/src/import/metrics.rs b/src/import/metrics.rs index c054848f86bf..0855f3a59abe 100644 --- a/src/import/metrics.rs +++ b/src/import/metrics.rs @@ -88,7 +88,7 @@ lazy_static! { "Counter of wait store available", &["store_id"] ) - .unwrap(); + .unwrap(); pub static ref IMPORTER_DOWNLOAD_DURATION: HistogramVec = register_histogram_vec!( "tikv_import_download_duration", "Bucketed histogram of importer download duration", diff --git a/src/import/mod.rs b/src/import/mod.rs index f0f3e0968378..a7fce65903e0 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -39,7 +39,7 @@ mod sst_service; pub mod test_helpers; pub use self::config::Config; -pub use self::errors::{Error, Result, error_inc}; +pub use self::errors::{error_inc, Error, Result}; pub use self::kv_importer::KVImporter; pub use self::kv_server::ImportKVServer; pub use self::kv_service::ImportKVService; diff --git a/src/import/sst_importer.rs b/src/import/sst_importer.rs index 6d618c81ec95..38e2bd6e87f6 100644 --- a/src/import/sst_importer.rs +++ b/src/import/sst_importer.rs @@ -21,8 +21,8 @@ use engine::rocks::{IngestExternalFileOptions, SeekKey, SstReader, SstWriter, DB use engine::CF_WRITE; use external_storage::{create_storage, url_of_backend}; -use super::{Error, Result}; use super::metrics::*; +use super::{Error, Result}; use crate::raftstore::store::keys; /// SSTImporter manages SST files that are waiting for ingesting. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 020a7c61dd82..6097e3633385 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -21,7 +21,7 @@ use tikv_util::time::Instant; use super::import_mode::*; use super::metrics::*; use super::service::*; -use super::{Config, Error, SSTImporter, error_inc}; +use super::{error_inc, Config, Error, SSTImporter}; /// ImportSSTService provides tikv-server with the ability to ingest SST files. /// diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 24c0b8c92e6c..9e999387b06f 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -6,7 +6,10 @@ use super::metrics::*; use super::waiter_manager::Scheduler as WaiterMgrScheduler; use super::{Error, Result}; use crate::pd::{PdClient, INVALID_ID}; -use crate::raftstore::coprocessor::{Coprocessor, ObserverContext, RoleObserver}; +use crate::raftstore::coprocessor::{ + Coprocessor, CoprocessorHost, ObserverContext, RegionChangeEvent, RegionChangeObserver, + RoleObserver, +}; use crate::server::resolve::StoreAddrResolver; use crate::storage::lock_manager::Lock; use futures::{Future, Sink, Stream}; @@ -22,7 +25,7 @@ use raft::StateRole; use std::cell::RefCell; use std::fmt::{self, Display, Formatter}; use std::rc::Rc; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tikv_util::collections::{HashMap, HashSet}; use tikv_util::future::paired_future_callback; use tikv_util::security::SecurityManager; @@ -215,16 +218,6 @@ impl DetectTable { } } -/// The leader of the region containing the LEADER_KEY is the leader of deadlock detector. -const LEADER_KEY: &[u8] = b""; - -/// Returns true if the region containing the LEADER_KEY. -fn is_leader_region(region: &'_ Region) -> bool { - !region.get_peers().is_empty() - && region.get_start_key() <= LEADER_KEY - && (region.get_end_key().is_empty() || LEADER_KEY < region.get_end_key()) -} - /// The role of the detector. #[derive(Debug, PartialEq, Clone, Copy)] pub enum Role { @@ -273,6 +266,9 @@ pub enum Task { /// /// It's the only way to change the node from leader to follower, and vice versa. ChangeRole(Role), + /// Task only used for test + #[cfg(test)] + GetRole(Box), } impl Display for Task { @@ -285,6 +281,8 @@ impl Display for Task { ), Task::DetectRpc { .. } => write!(f, "Detect Rpc"), Task::ChangeRole(role) => write!(f, "ChangeRole {{ role: {:?} }}", role), + #[cfg(test)] + Task::GetRole(_) => write!(f, "Get role of the deadlock detector"), } } } @@ -334,20 +332,99 @@ impl Scheduler { fn change_role(&self, role: Role) { self.notify_scheduler(Task::ChangeRole(role)); } + + #[cfg(test)] + pub fn get_role(&self, f: Box) { + self.notify_scheduler(Task::GetRole(f)); + } } -impl Coprocessor for Scheduler {} +/// The leader region is the region containing the LEADER_KEY and the leader of the +/// leader region is also the leader of the deadlock detector. +const LEADER_KEY: &[u8] = b""; + +/// `RoleChangeNotifier` observes region or role change events of raftstore. If the +/// region is the leader region and the role of this node is changed, a `ChangeRole` +/// task will be scheduled to the deadlock detector. It's the only way to change the +/// node from the leader of deadlock detector to follower, and vice versa. +#[derive(Clone)] +pub(crate) struct RoleChangeNotifier { + /// The id of the valid leader region. + // raftstore.coprocessor needs it to be Sync + Send. + leader_region_id: Arc>, + scheduler: Scheduler, +} -/// Implements observer traits for `Scheduler`. -/// If the role of the node in the leader region changes, notifys the deadlock detector. -/// -/// If the leader region is merged or splited in the node, the role of the node won't change. -/// If the leader region is removed and the node is the leader, it will change to follower first. -/// So there is no need to observe region change events. -impl RoleObserver for Scheduler { +impl RoleChangeNotifier { + fn is_leader_region(region: &Region) -> bool { + // The key range of a new created region is empty which misleads the leader + // of the deadlock detector stepping down. + // + // If the peers of a region is not empty, the region info is complete. + !region.get_peers().is_empty() + && region.get_start_key() <= LEADER_KEY + && (region.get_end_key().is_empty() || LEADER_KEY < region.get_end_key()) + } + + pub(crate) fn new(scheduler: Scheduler) -> Self { + Self { + leader_region_id: Arc::new(Mutex::new(INVALID_ID)), + scheduler, + } + } + + pub(crate) fn register(self, host: &mut CoprocessorHost) { + host.registry + .register_role_observer(1, Box::new(self.clone())); + host.registry + .register_region_change_observer(1, Box::new(self)); + } +} + +impl Coprocessor for RoleChangeNotifier {} + +impl RoleObserver for RoleChangeNotifier { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, role: StateRole) { - if is_leader_region(ctx.region()) { - self.change_role(role.into()); + let region = ctx.region(); + // A region is created first, so the leader region id must be valid. + if Self::is_leader_region(region) + && *self.leader_region_id.lock().unwrap() == region.get_id() + { + self.scheduler.change_role(role.into()); + } + } +} + +impl RegionChangeObserver for RoleChangeNotifier { + fn on_region_changed( + &self, + ctx: &mut ObserverContext<'_>, + event: RegionChangeEvent, + role: StateRole, + ) { + let region = ctx.region(); + if Self::is_leader_region(region) { + match event { + RegionChangeEvent::Create | RegionChangeEvent::Update => { + *self.leader_region_id.lock().unwrap() = region.get_id(); + self.scheduler.change_role(role.into()); + } + RegionChangeEvent::Destroy => { + // When one region is merged to target region, it will be destroyed. + // If the leader region is merged to the target region and the node + // is also the leader of the target region, the RoleChangeNotifier will + // receive one RegionChangeEvent::Update of the target region and one + // RegionChangeEvent::Destroy of the leader region. To prevent the + // destroy event misleading the leader stepping down, it saves the + // valid leader region id and only when the id equals to the destroyed + // region id, it sends a ChangeRole(Follower) task to the deadlock detector. + let mut leader_region_id = self.leader_region_id.lock().unwrap(); + if *leader_region_id == region.get_id() { + *leader_region_id = INVALID_ID; + self.scheduler.change_role(Role::Follower); + } + } + } } } } @@ -730,6 +807,8 @@ where self.handle_detect_rpc(handle, stream, sink); } Task::ChangeRole(role) => self.handle_change_role(role), + #[cfg(test)] + Task::GetRole(f) => f(self.inner.borrow().role), } } } @@ -793,8 +872,11 @@ impl deadlock_grpc::Deadlock for Service { } #[cfg(test)] -mod tests { +pub mod tests { use super::*; + use crate::server::resolve::Callback; + use tikv_util::security::SecurityConfig; + use tikv_util::worker::FutureWorker; #[test] fn test_detect_table() { @@ -936,4 +1018,129 @@ mod tests { assert!(detect_table.detect(3, 1, 3).is_none()); assert_eq!(detect_table.wait_for_map.len(), 1); } + + pub(crate) struct MockPdClient; + + impl PdClient for MockPdClient {} + + #[derive(Clone)] + pub(crate) struct MockResolver; + + impl StoreAddrResolver for MockResolver { + fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { + Err(Error::Other(box_err!("unimplemented"))) + } + } + + fn start_deadlock_detector(host: &mut CoprocessorHost) -> (FutureWorker, Scheduler) { + let waiter_mgr_worker = FutureWorker::new("dummy-waiter-mgr"); + let waiter_mgr_scheduler = WaiterMgrScheduler::new(waiter_mgr_worker.scheduler()); + let mut detector_worker = FutureWorker::new("test-deadlock-detector"); + let detector_runner = Detector::new( + 1, + Arc::new(MockPdClient {}), + MockResolver {}, + Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()), + waiter_mgr_scheduler, + &Config::default(), + ); + let detector_scheduler = Scheduler::new(detector_worker.scheduler()); + let role_change_notifier = RoleChangeNotifier::new(detector_scheduler.clone()); + role_change_notifier.register(host); + detector_worker.start(detector_runner).unwrap(); + (detector_worker, detector_scheduler) + } + + // Region with non-empty peers is valid. + fn new_region(id: u64, start_key: &[u8], end_key: &[u8], valid: bool) -> Region { + let mut region = Region::default(); + region.set_id(id); + region.set_start_key(start_key.to_vec()); + region.set_end_key(end_key.to_vec()); + if valid { + region.set_peers(vec![kvproto::metapb::Peer::default()].into()); + } + region + } + + #[test] + fn test_role_change_notifier() { + let mut host = CoprocessorHost::default(); + let (mut worker, scheduler) = start_deadlock_detector(&mut host); + + let mut region = new_region(1, b"", b"", true); + let invalid = new_region(2, b"", b"", false); + let other = new_region(3, b"0", b"", true); + let follower_roles = [ + StateRole::Follower, + StateRole::PreCandidate, + StateRole::Candidate, + ]; + let events = [ + RegionChangeEvent::Create, + RegionChangeEvent::Update, + RegionChangeEvent::Destroy, + ]; + let check_role = |role| { + let (tx, f) = paired_future_callback(); + scheduler.get_role(tx); + assert_eq!(f.wait().unwrap(), role); + }; + + // Region changed + for &event in &events[..2] { + for &follower_role in &follower_roles { + host.on_region_changed(®ion, event, follower_role); + check_role(Role::Follower); + host.on_region_changed(&invalid, event, StateRole::Leader); + check_role(Role::Follower); + host.on_region_changed(&other, event, StateRole::Leader); + check_role(Role::Follower); + host.on_region_changed(®ion, event, StateRole::Leader); + check_role(Role::Leader); + host.on_region_changed(&invalid, event, follower_role); + check_role(Role::Leader); + host.on_region_changed(&other, event, follower_role); + check_role(Role::Leader); + host.on_region_changed(®ion, event, follower_role); + check_role(Role::Follower); + } + } + host.on_region_changed(®ion, RegionChangeEvent::Create, StateRole::Leader); + host.on_region_changed(&invalid, RegionChangeEvent::Destroy, StateRole::Leader); + host.on_region_changed(&other, RegionChangeEvent::Destroy, StateRole::Leader); + check_role(Role::Leader); + host.on_region_changed(®ion, RegionChangeEvent::Destroy, StateRole::Leader); + check_role(Role::Follower); + // Leader region id is changed. + region.set_id(2); + host.on_region_changed(®ion, RegionChangeEvent::Update, StateRole::Leader); + // Destroy the previous leader region. + region.set_id(1); + host.on_region_changed(®ion, RegionChangeEvent::Destroy, StateRole::Leader); + check_role(Role::Leader); + + // Role changed + let region = new_region(1, b"", b"", true); + host.on_region_changed(®ion, RegionChangeEvent::Create, StateRole::Follower); + check_role(Role::Follower); + for &follower_role in &follower_roles { + host.on_role_change(®ion, follower_role); + check_role(Role::Follower); + host.on_role_change(&invalid, StateRole::Leader); + check_role(Role::Follower); + host.on_role_change(&other, StateRole::Leader); + check_role(Role::Follower); + host.on_role_change(®ion, StateRole::Leader); + check_role(Role::Leader); + host.on_role_change(&invalid, follower_role); + check_role(Role::Leader); + host.on_role_change(&other, follower_role); + check_role(Role::Leader); + host.on_role_change(®ion, follower_role); + check_role(Role::Follower); + } + + worker.stop(); + } } diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index b1e8c792aea5..f1f72f211f81 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -9,21 +9,22 @@ pub mod waiter_manager; pub use self::config::Config; pub use self::deadlock::Service as DeadlockService; -use self::deadlock::{Detector, Scheduler as DetectorScheduler}; +use self::deadlock::{Detector, RoleChangeNotifier, Scheduler as DetectorScheduler}; use self::waiter_manager::{Scheduler as WaiterMgrScheduler, WaiterManager}; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread::JoinHandle; + use crate::pd::PdClient; use crate::raftstore::coprocessor::CoprocessorHost; use crate::server::resolve::StoreAddrResolver; use crate::server::{Error, Result}; use crate::storage::txn::{execute_callback, ProcessResult}; use crate::storage::{lock_manager::Lock, LockMgr, StorageCb}; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::sync::Mutex; -use std::thread::JoinHandle; + use tikv_util::collections::HashSet; use tikv_util::security::SecurityManager; use tikv_util::worker::FutureWorker; @@ -175,11 +176,11 @@ impl LockManager { } } - /// Creates a `Scheduler` of the deadlock detector worker and registers it to + /// Creates a `RoleChangeNotifier` of the deadlock detector worker and registers it to /// the `CoprocessorHost` to observe the role change events of the leader region. pub fn register_detector_role_change_observer(&self, host: &mut CoprocessorHost) { - host.registry - .register_role_observer(1, Box::new(self.detector_scheduler.clone())); + let role_change_notifier = RoleChangeNotifier::new(self.detector_scheduler.clone()); + role_change_notifier.register(host); } /// Creates a `DeadlockService` to handle deadlock detect requests from other nodes. @@ -258,12 +259,11 @@ impl LockMgr for LockManager { #[cfg(test)] mod tests { + use self::deadlock::tests::*; use self::metrics::*; use self::waiter_manager::tests::*; use super::*; - use crate::pd::{RegionInfo, Result as PdResult}; - use crate::raftstore::coprocessor::Config as CopConfig; - use crate::server::resolve::Callback; + use crate::raftstore::coprocessor::RegionChangeEvent; use tikv_util::security::SecurityConfig; use std::sync::mpsc; @@ -274,29 +274,8 @@ mod tests { use kvproto::metapb::{Peer, Region}; use raft::StateRole; - struct MockPdClient; - - impl PdClient for MockPdClient { - fn get_region_info(&self, _key: &[u8]) -> PdResult { - unimplemented!(); - } - } - - #[derive(Clone)] - struct MockResolver; - - impl StoreAddrResolver for MockResolver { - fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { - Err(Error::Other(box_err!("unimplemented"))) - } - } - fn start_lock_manager() -> LockManager { - use protobuf::RepeatedField; - - let (tx, _rx) = mpsc::sync_channel(100); - let mut coprocessor_host = CoprocessorHost::new(CopConfig::default(), tx); - + let mut coprocessor_host = CoprocessorHost::default(); let mut lock_mgr = LockManager::new(); lock_mgr.register_detector_role_change_observer(&mut coprocessor_host); lock_mgr @@ -313,8 +292,12 @@ mod tests { let mut leader_region = Region::new(); leader_region.set_start_key(b"".to_vec()); leader_region.set_end_key(b"foo".to_vec()); - leader_region.set_peers(RepeatedField::from_vec(vec![Peer::new()])); - coprocessor_host.on_role_change(&leader_region, StateRole::Leader); + leader_region.set_peers(vec![Peer::default()].into()); + coprocessor_host.on_region_changed( + &leader_region, + RegionChangeEvent::Create, + StateRole::Leader, + ); thread::sleep(Duration::from_millis(100)); lock_mgr diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index c77cd81b6ad7..ef493acf0da7 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -39,26 +39,49 @@ fn must_acquire_pessimistic_lock(client: &TikvClient, ctx: Context, key: Vec assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); } +fn delete_pessimistic_lock( + client: &TikvClient, + ctx: Context, + key: Vec, + ts: u64, +) -> PessimisticRollbackResponse { + let mut req = PessimisticRollbackRequest::default(); + req.set_context(ctx); + req.set_keys(vec![key].into_iter().collect()); + req.start_version = ts; + req.for_update_ts = ts; + client.kv_pessimistic_rollback(&req).unwrap() +} + +fn must_delete_pessimistic_lock(client: &TikvClient, ctx: Context, key: Vec, ts: u64) { + let resp = delete_pessimistic_lock(client, ctx, key, ts); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); +} + fn must_deadlock(client: &TikvClient, ctx: Context, key1: &[u8], ts: u64) { let key1 = key1.to_vec(); let mut key2 = key1.clone(); - key2.push(1); + key2.push(0); must_acquire_pessimistic_lock(client, ctx.clone(), key1.clone(), ts); must_acquire_pessimistic_lock(client, ctx.clone(), key2.clone(), ts + 1); - let client1 = client.clone(); - let ctx1 = ctx.clone(); + let (client_clone, ctx_clone, key1_clone) = (client.clone(), ctx.clone(), key1.clone()); let (tx, rx) = mpsc::sync_channel(1); thread::spawn(move || { - let _ = acquire_pessimistic_lock(&client1, ctx1, key1, ts + 1); + let _ = acquire_pessimistic_lock(&client_clone, ctx_clone, key1_clone, ts + 1); tx.send(1).unwrap(); }); // Sleep to make sure txn(ts+1) is waiting for txn(ts) thread::sleep(Duration::from_millis(500)); - let resp = acquire_pessimistic_lock(client, ctx, key2, ts); + let resp = acquire_pessimistic_lock(client, ctx.clone(), key2.clone(), ts); assert_eq!(resp.errors.len(), 1); - assert!(resp.errors[0].has_deadlock()); + assert!(resp.errors[0].has_deadlock(), "{:?}", resp.get_errors()); rx.recv().unwrap(); + + // Clean up + must_delete_pessimistic_lock(client, ctx.clone(), key1, ts); + must_delete_pessimistic_lock(client, ctx, key2, ts); } fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (TikvClient, Context) { @@ -79,6 +102,12 @@ fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (Tik (client, ctx) } +/// Creates a deadlock on the store containing key. +fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u64) { + let (client, ctx) = build_leader_client(cluster, key); + must_deadlock(&client, ctx, key, ts); +} + fn deadlock_detector_leader_must_be(cluster: &mut Cluster, store_id: u64) { let leader_region = cluster.get_region(b""); assert_eq!( @@ -104,6 +133,41 @@ fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], cluster.must_get(region_key); } +/// Transfers the region containing region_key from source store to target peer. +/// +/// REQUIRE: The source store must be the leader the region and the target store must not have +/// this region. +fn must_transfer_region( + cluster: &mut Cluster, + region_key: &[u8], + source_store_id: u64, + target_store_id: u64, + target_peer_id: u64, +) { + let target_peer = new_peer(target_store_id, target_peer_id); + let region = cluster.get_region(region_key); + cluster + .pd_client + .must_add_peer(region.get_id(), target_peer.clone()); + must_transfer_leader(cluster, region_key, target_store_id); + let source_peer = find_peer_of_store(®ion, source_store_id); + cluster + .pd_client + .must_remove_peer(region.get_id(), source_peer); +} + +fn must_merge_region( + cluster: &mut Cluster, + source_region_key: &[u8], + target_region_key: &[u8], +) { + let (source_id, target_id) = ( + cluster.get_region(source_region_key).get_id(), + cluster.get_region(target_region_key).get_id(), + ); + cluster.pd_client.must_merge(source_id, target_id); +} + fn find_peer_of_store(region: &Region, store_id: u64) -> Peer { region .get_peers() @@ -113,56 +177,100 @@ fn find_peer_of_store(region: &Region, store_id: u64) -> Peer { .clone() } -#[test] -fn test_detect_deadlock_when_shuffle_region() { - let mut cluster = new_server_cluster(0, 4); +/// Creates a cluster with only one region and store(1) is the leader of the region. +fn new_cluster_for_deadlock_test(count: usize) -> Cluster { + let mut cluster = new_server_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); - // Region 1 has 3 peers. And peer(1, 1) is the leader of region 1. let region_id = cluster.run_conf_change(); pd_client.must_add_peer(region_id, new_peer(2, 2)); pd_client.must_add_peer(region_id, new_peer(3, 3)); - - // The leader of deadlock detector is the leader of region 1. deadlock_detector_leader_must_be(&mut cluster, 1); - let (client, ctx) = build_leader_client(&mut cluster, b"k1"); - must_deadlock(&client, ctx, b"k1", 10); + must_detect_deadlock(&mut cluster, b"k", 10); + cluster +} - // The leader of region 1 has transfered. The leader of deadlock detector should also transfer. +#[test] +fn test_detect_deadlock_when_transfer_leader() { + let mut cluster = new_cluster_for_deadlock_test(3); + // Transfer the leader of region 1 to store(2). + // The leader of deadlock detector should also be transfered to store(2). must_transfer_leader(&mut cluster, b"", 2); deadlock_detector_leader_must_be(&mut cluster, 2); - let (client, ctx) = build_leader_client(&mut cluster, b"k2"); - must_deadlock(&client, ctx, b"k2", 20); + must_detect_deadlock(&mut cluster, b"k", 10); +} - // Split region and transfer the leader of new region to store(3). +#[test] +fn test_detect_deadlock_when_split_region() { + let mut cluster = new_cluster_for_deadlock_test(3); let region = cluster.get_region(b""); - cluster.must_split(®ion, b"k10"); - must_transfer_leader(&mut cluster, b"k10", 3); - // Deadlock occurs on store(3) and the leader of deadlock detector is store(2). - deadlock_detector_leader_must_be(&mut cluster, 2); - let (client, ctx) = build_leader_client(&mut cluster, b"k10"); - must_deadlock(&client, ctx, b"k10", 30); - - // Transfer the new region from store(1, 2, 3) to store(2, 3, 4). - let new_region = cluster.get_region(b"k10"); - pd_client.must_add_peer(new_region.get_id(), new_peer(4, 4)); - must_transfer_leader(&mut cluster, b"k10", 4); - let peer = find_peer_of_store(&new_region, 1); - pd_client.must_remove_peer(region_id, peer); - - // Transfer the leader of deadlock detector to store(1) and - // deadlock occours on store(4) again. - must_transfer_leader(&mut cluster, b"", 1); + cluster.must_split(®ion, b"k1"); + // After split, the leader is still store(1). deadlock_detector_leader_must_be(&mut cluster, 1); - let (client, ctx) = build_leader_client(&mut cluster, b"k11"); - must_deadlock(&client, ctx, b"k11", 30); - - // Add store(1) back again which will send a role change message with empty region key range to - // the deadlock detector. It misleads the leader of deadlock detector stepping down. - pd_client.must_add_peer(new_region.get_id(), new_peer(1, 5)); + must_detect_deadlock(&mut cluster, b"k", 10); + // Transfer the new region's leader to store(2) and deadlock occours on it. + must_transfer_leader(&mut cluster, b"k1", 2); deadlock_detector_leader_must_be(&mut cluster, 1); - let (client, ctx) = build_leader_client(&mut cluster, b"k3"); - must_deadlock(&client, ctx, b"k3", 10); + must_detect_deadlock(&mut cluster, b"k1", 10); +} + +#[test] +fn test_detect_deadlock_when_transfer_region() { + let mut cluster = new_cluster_for_deadlock_test(4); + // Transfer the leader region to store(4) and the leader of deadlock detector should be + // also transfered. + must_transfer_region(&mut cluster, b"k", 1, 4, 4); + deadlock_detector_leader_must_be(&mut cluster, 4); + must_detect_deadlock(&mut cluster, b"k", 10); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k1"); + // Transfer the new region to store(1). It shouldn't affect deadlock detector. + must_transfer_region(&mut cluster, b"k1", 4, 1, 5); + deadlock_detector_leader_must_be(&mut cluster, 4); + must_detect_deadlock(&mut cluster, b"k", 10); + must_detect_deadlock(&mut cluster, b"k1", 10); + + // Transfer the new region back to store(4) which will send a role change message with empty + // key range. It shouldn't affect deadlock detector. + must_transfer_region(&mut cluster, b"k1", 1, 4, 6); + deadlock_detector_leader_must_be(&mut cluster, 4); + must_detect_deadlock(&mut cluster, b"k", 10); + must_detect_deadlock(&mut cluster, b"k1", 10); +} + +#[test] +fn test_detect_deadlock_when_merge_region() { + let mut cluster = new_cluster_for_deadlock_test(3); + + // Source region will be destroyed. + for as_target in &[false, true] { + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k1"); + if *as_target { + must_merge_region(&mut cluster, b"k1", b""); + } else { + must_merge_region(&mut cluster, b"", b"k1"); + } + deadlock_detector_leader_must_be(&mut cluster, 1); + must_detect_deadlock(&mut cluster, b"k", 10); + } + + // Leaders of two regions are on different store. + for as_target in &[false, true] { + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k1"); + must_transfer_leader(&mut cluster, b"k1", 2); + if *as_target { + must_merge_region(&mut cluster, b"k1", b""); + deadlock_detector_leader_must_be(&mut cluster, 1); + } else { + must_merge_region(&mut cluster, b"", b"k1"); + deadlock_detector_leader_must_be(&mut cluster, 2); + } + must_detect_deadlock(&mut cluster, b"k", 10); + must_transfer_leader(&mut cluster, b"", 1); + } }