Skip to content

Commit a751a0b

Browse files
author
Hemant Dangi
committed
Bug#26024253: GR MEMBER STOP MUST STOP ALL ASYNCHRONOUS CHANNELS THAT ARE FEEDING DATA INTO IT
Issue ===== If a member that has asynchronous channels feeding data into it - either on multi-primary deploy or the primary on single-primary deploy - does stop, all asynchronous channels must be stopped to avoid that changes are made locally (not replicated to the group). Even when super_read_only it is set on STOP GROUP_REPLICATION, that will not affect already running asynchronous which can still do changes. Solution ======== When group replication stops, due to error or due to executing stop group_replication command, all asynchronous channels are stopped.
1 parent 0bfb6a9 commit a751a0b

10 files changed

+421
-15
lines changed

rapid/plugin/group_replication/include/gcs_event_handlers.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ class Plugin_gcs_events_handler: public Gcs_communication_event_listener,
6767
Plugin_gcs_events_handler(Applier_module_interface* applier_module,
6868
Recovery_module* recovery_module,
6969
Plugin_gcs_view_modification_notifier* vc_notifier,
70-
Compatibility_module* compatibility_manager);
70+
Compatibility_module* compatibility_manager,
71+
ulong components_stop_timeout);
7172
virtual ~Plugin_gcs_events_handler();
7273

7374
/*
@@ -80,6 +81,14 @@ class Plugin_gcs_events_handler: public Gcs_communication_event_listener,
8081
void on_suspicions(const std::vector<Gcs_member_identifier>& members,
8182
const std::vector<Gcs_member_identifier>& unreachable) const;
8283

84+
/**
85+
Sets the component stop timeout.
86+
87+
@param[in] timeout the timeout
88+
*/
89+
void set_stop_wait_timeout (ulong timeout){
90+
stop_wait_timeout= timeout;
91+
}
8392

8493
private:
8594
/*
@@ -275,6 +284,9 @@ class Plugin_gcs_events_handler: public Gcs_communication_event_listener,
275284
/**The status of this member when it joins*/
276285
st_compatibility_types* joiner_compatibility_status;
277286

287+
/* Component stop timeout on shutdown */
288+
ulong stop_wait_timeout;
289+
278290
#ifndef DBUG_OFF
279291
bool set_number_of_members_on_view_changed_to_10;
280292
#endif

rapid/plugin/group_replication/src/applier.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,16 @@ void Applier_module::leave_group_on_failure()
680680
bool set_read_mode= false;
681681
Gcs_operations::enum_leave_state state= gcs_module->leave();
682682

683+
int error= channel_stop_all(CHANNEL_APPLIER_THREAD|CHANNEL_RECEIVER_THREAD,
684+
stop_wait_timeout);
685+
if (error)
686+
{
687+
log_message(MY_ERROR_LEVEL,
688+
"Error stopping all replication channels while server was"
689+
" leaving the group. Please check the error log for additional"
690+
" details. Got error: %d", error);
691+
}
692+
683693
std::stringstream ss;
684694
plugin_log_level log_severity= MY_WARNING_LEVEL;
685695
switch (state)

rapid/plugin/group_replication/src/gcs_event_handlers.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@ Plugin_gcs_events_handler::
2929
Plugin_gcs_events_handler(Applier_module_interface* applier_module,
3030
Recovery_module* recovery_module,
3131
Plugin_gcs_view_modification_notifier* vc_notifier,
32-
Compatibility_module* compatibility_module)
32+
Compatibility_module* compatibility_module,
33+
ulong components_stop_timeout)
3334
: applier_module(applier_module), recovery_module(recovery_module),
3435
view_change_notifier(vc_notifier),
35-
compatibility_manager(compatibility_module)
36+
compatibility_manager(compatibility_module),
37+
stop_wait_timeout(components_stop_timeout)
3638
{
3739
this->temporary_states= new std::set<Group_member_info*,
3840
Group_member_info_pointer_comparator>();
@@ -1530,6 +1532,16 @@ void
15301532
Plugin_gcs_events_handler::leave_group_on_error() const
15311533
{
15321534
Gcs_operations::enum_leave_state state= gcs_module->leave();
1535+
int error= channel_stop_all(CHANNEL_APPLIER_THREAD|CHANNEL_RECEIVER_THREAD,
1536+
stop_wait_timeout);
1537+
if (error)
1538+
{
1539+
log_message(MY_ERROR_LEVEL,
1540+
"Error stopping all replication channels while server was"
1541+
" leaving the group. Please check the error log for additional"
1542+
" details. Got error: %d", error);
1543+
}
1544+
15331545
std::stringstream ss;
15341546
plugin_log_level log_severity= MY_WARNING_LEVEL;
15351547
switch (state)

rapid/plugin/group_replication/src/plugin.cc

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ void initialize_group_partition_handler();
234234
int start_group_communication();
235235
void declare_plugin_running();
236236
int leave_group();
237-
int terminate_plugin_modules();
237+
int terminate_plugin_modules(bool flag_stop_async_channel= false);
238238
int terminate_applier_module();
239239
int terminate_recovery_module();
240240
void terminate_asynchronous_channels_observer();
@@ -813,7 +813,7 @@ int plugin_group_replication_stop()
813813
/* first leave all joined groups (currently one) */
814814
leave_group();
815815

816-
int error= terminate_plugin_modules();
816+
int error= terminate_plugin_modules(true);
817817

818818
group_replication_running= false;
819819
shared_plugin_stop_lock->release_write_lock();
@@ -836,7 +836,7 @@ int plugin_group_replication_stop()
836836
DBUG_RETURN(error);
837837
}
838838

839-
int terminate_plugin_modules()
839+
int terminate_plugin_modules(bool flag_stop_async_channel)
840840
{
841841

842842
if(terminate_recovery_module())
@@ -867,6 +867,23 @@ int terminate_plugin_modules()
867867

868868
terminate_asynchronous_channels_observer();
869869

870+
if (flag_stop_async_channel)
871+
{
872+
int channel_err= channel_stop_all(CHANNEL_APPLIER_THREAD|CHANNEL_RECEIVER_THREAD,
873+
components_stop_timeout_var);
874+
if (channel_err)
875+
{
876+
log_message(MY_ERROR_LEVEL,
877+
"Error stopping all replication channels while server was"
878+
" leaving the group. Please check the error log for "
879+
"additional details. Got error: %d", channel_err);
880+
if (!error)
881+
{
882+
error= GROUP_REPLICATION_CONFIGURATION_ERROR;
883+
}
884+
}
885+
}
886+
870887
delete group_partition_handler;
871888
group_partition_handler= NULL;
872889

@@ -1357,7 +1374,8 @@ int start_group_communication()
13571374
events_handler= new Plugin_gcs_events_handler(applier_module,
13581375
recovery_module,
13591376
view_change_notifier,
1360-
compatibility_mgr);
1377+
compatibility_mgr,
1378+
components_stop_timeout_var);
13611379

13621380
view_change_notifier->start_view_modification();
13631381

@@ -1870,6 +1888,10 @@ static void update_component_timeout(MYSQL_THD thd, SYS_VAR *var,
18701888
{
18711889
recovery_module->set_stop_wait_timeout(in_val);
18721890
}
1891+
if (events_handler != NULL)
1892+
{
1893+
events_handler->set_stop_wait_timeout(in_val);
1894+
}
18731895

18741896
DBUG_VOID_RETURN;
18751897
}

rapid/plugin/group_replication/src/recovery.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -187,6 +187,16 @@ void Recovery_module::leave_group_on_recovery_failure()
187187

188188
Gcs_operations::enum_leave_state state= gcs_module->leave();
189189

190+
int error= channel_stop_all(CHANNEL_APPLIER_THREAD|CHANNEL_RECEIVER_THREAD,
191+
stop_wait_timeout);
192+
if (error)
193+
{
194+
log_message(MY_ERROR_LEVEL,
195+
"Error stopping all replication channels while server was"
196+
" leaving the group. Please check the error log for additional"
197+
" details. Got error: %d", error);
198+
}
199+
190200
std::stringstream ss;
191201
plugin_log_level log_severity= MY_WARNING_LEVEL;
192202
switch (state)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
include/group_replication.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection server1]
6+
7+
############################################################
8+
# 1. Setup Group Replication on server 1.
9+
[connection server1]
10+
include/start_and_bootstrap_group_replication.inc
11+
12+
############################################################
13+
# 2. Setup a asynchronous replication connection from server 2
14+
# into group (server 1)
15+
[connection server1]
16+
CHANGE MASTER TO MASTER_HOST='localhost', MASTER_USER='root', MASTER_PORT=SERVER_2_PORT, MASTER_AUTO_POSITION=1 for channel 'ch2_1';
17+
Warnings:
18+
Note 1759 Sending passwords in plain text without SSL/TLS is extremely insecure.
19+
Note 1760 Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
20+
include/start_slave.inc
21+
22+
############################################################
23+
# 3. Execute some transactions on server 2.
24+
[connection server2]
25+
CREATE TABLE t1 (c1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB;
26+
INSERT INTO t1 VALUES (0);
27+
28+
############################################################
29+
# 4. Wait until transactions executed on server 2 are applied
30+
# on group.
31+
include/sync_slave_sql_with_master.inc
32+
33+
############################################################
34+
# 5. Activate group_replication_stop_all_channels_failure
35+
# debug sync point, which will return
36+
# ER_GROUP_REPLICATION_CONFIGURATION error, when stop
37+
# group replication calls to stop all replication channels.
38+
[connection server1]
39+
# 5.1. Stop Group Replication
40+
SET @debug_save= @@GLOBAL.DEBUG;
41+
SET @@GLOBAL.DEBUG='d,group_replication_stop_all_channels_failure';
42+
STOP GROUP_REPLICATION;
43+
ERROR HY000: The server is not configured properly to be an active member of the group. Please see more details on error log.
44+
SET @@GLOBAL.DEBUG= @debug_save;
45+
# 5.2. Verify member is OFFLINE
46+
include/gr_wait_for_member_state.inc
47+
# 5.3. Verify occurrence of error message
48+
Occurrences of 'Error stopping all replication channels while server was leaving the group. Please check the error log for additional details.' in the input file: 1
49+
Occurrences of 'Error stopping channel: ch2_1. Got error: 1, Error_code: 1' in the input file: 1
50+
Occurrences of 'Error stopping channel: . Got error: 1, Error_code: 1' in the input file: 1
51+
52+
############################################################
53+
# 6. Execute data on server 2 and ensure it is not accepted on server 1.
54+
[connection server2]
55+
INSERT INTO t1 VALUES (1);
56+
57+
############################################################
58+
# 7. Verify channel ch2_1 IO and SQL THREADS are OFF
59+
[connection server1]
60+
include/wait_for_slave_to_stop.inc
61+
include/assert.inc [Verify channel ch2_1 IO_THREAD is OFF]
62+
include/assert.inc [Verify channel ch2_1 SQL_THREAD is OFF]
63+
64+
############################################################
65+
# 8. Verify data isn't replicated to group i.e. server1
66+
include/assert.inc ['There is no value 1 in table t1']
67+
68+
############################################################
69+
# 9. Clean Up
70+
[connection server1]
71+
SET GLOBAL read_only= 0;
72+
RESET SLAVE ALL FOR CHANNEL 'ch2_1';
73+
DROP TABLE test.t1;
74+
[connection server2]
75+
DROP TABLE test.t1;
76+
include/group_replication_end.inc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--no-console --log_error=$MYSQLTEST_VARDIR/tmp/gr_stop_async_on_stop_gr.err

0 commit comments

Comments
 (0)