|
| 1 | +################################################################################ |
| 2 | +# === Purpose === |
| 3 | +# |
| 4 | +# This test checks that the waiting block for commit tickets is responsive in case |
| 5 | +# of missed signals and stop instructions. |
| 6 | +# |
| 7 | +# ==== Requirements ==== |
| 8 | +# |
| 9 | +# When multiple views are logged in a member, the system should never be stuck waiting for |
| 10 | +# a signal in order to apply one of these views. |
| 11 | +# |
| 12 | +# === Implementation ==== |
| 13 | +# |
| 14 | +# 0. There are 3 members that will form a group (server 1,2 and 4). |
| 15 | +# There is an asynchronous replication connection from server 3 to server 1 |
| 16 | +# 1. Start GR on server 1. Create an asynchronous connection to server 3 |
| 17 | +# Add some data to server 3 that will be replicated to server 1 |
| 18 | +# 2. Insert one last transaction on server 3 that will block on commit on server 1 |
| 19 | +# Use a point that blocks the transaction after certification but before commit |
| 20 | +# Wait for the transaction to block |
| 21 | +# 3. Join member 2 to the group |
| 22 | +# Wait for the VCLE to reach application where it will be stuck waiting for its ticket |
| 23 | +# 4. Unblock the transaction from the async channel, but stop it again before it pops the ticket |
| 24 | +# 5. Join member 4. The new VCLE will pop the ticket with no broadcast |
| 25 | +# Wait for this new VCLE to be queued |
| 26 | +# 6. Unblock the stuck ticket |
| 27 | +# All members should now be online |
| 28 | +# 7. Cleaning up |
| 29 | +# |
| 30 | +# === References === |
| 31 | +# |
| 32 | +# Bug#35392640: Group Replication primary with replica blocked by view change |
| 33 | +# |
| 34 | + |
| 35 | +--source include/have_debug_sync.inc |
| 36 | +--source include/have_group_replication_plugin.inc |
| 37 | +--let $rpl_skip_group_replication_start= 1 |
| 38 | +--let $rpl_server_count= 4 |
| 39 | +--source include/group_replication.inc |
| 40 | + |
| 41 | +--echo # |
| 42 | +--echo # 1. Start GR on server 1. Create an asynchronous connection to server 3 |
| 43 | +--echo # Add some data to server 3 that will be replicated to server 1 |
| 44 | + |
| 45 | +--let $rpl_connection_name= server1 |
| 46 | +--source include/rpl_connection.inc |
| 47 | + |
| 48 | +--let $sysvars_to_save = [ "GLOBAL.group_replication_view_change_uuid" ] |
| 49 | +--source include/save_sysvars.inc |
| 50 | + |
| 51 | +SET GLOBAL group_replication_view_change_uuid = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; |
| 52 | +--source include/start_and_bootstrap_group_replication.inc |
| 53 | + |
| 54 | +# Async connection to channel 3 |
| 55 | +--replace_result $SERVER_MYPORT_3 SERVER_3_PORT |
| 56 | +--eval CHANGE REPLICATION SOURCE TO SOURCE_HOST='127.0.0.1', SOURCE_USER='root', SOURCE_AUTO_POSITION=1, SOURCE_PORT=$SERVER_MYPORT_3 FOR CHANNEL 'ch1' |
| 57 | + |
| 58 | +--let $rpl_channel_name='ch1' |
| 59 | +--source include/start_slave.inc |
| 60 | +--let $rpl_channel_name= |
| 61 | + |
| 62 | +--echo # Add some data on 3 and sync |
| 63 | + |
| 64 | +--let $rpl_connection_name= server3 |
| 65 | +--source include/rpl_connection.inc |
| 66 | + |
| 67 | +CREATE TABLE t1 (c1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB; |
| 68 | +INSERT INTO t1 VALUES (1); |
| 69 | + |
| 70 | +--let $sync_slave_connection=server1 |
| 71 | +--source include/sync_slave_sql_with_master.inc |
| 72 | + |
| 73 | +--echo # |
| 74 | +--echo # 2. Insert one last transaction on server 3 that will block on commit on server 1 |
| 75 | +--echo # Use a point that blocks the transaction after certification but before commit |
| 76 | +--echo # Wait for the transaction to block |
| 77 | + |
| 78 | +--let $rpl_connection_name= server1 |
| 79 | +--source include/rpl_connection.inc |
| 80 | + |
| 81 | +# Block the last transaction from completing |
| 82 | +# Block it when it is already registered/certified but not committed. |
| 83 | +--let $debug_point = ordered_commit_blocked |
| 84 | +--source include/add_debug_point.inc |
| 85 | + |
| 86 | +--let $rpl_connection_name= server3 |
| 87 | +--source include/rpl_connection.inc |
| 88 | + |
| 89 | +INSERT INTO t1 VALUES (2); |
| 90 | + |
| 91 | +--let $rpl_connection_name= server1 |
| 92 | +--source include/rpl_connection.inc |
| 93 | + |
| 94 | +# Wait for the debug sync to be reached. |
| 95 | +SET DEBUG_SYNC= "now WAIT_FOR signal.ordered_commit_waiting"; |
| 96 | +--source include/remove_debug_point.inc |
| 97 | + |
| 98 | +--echo # |
| 99 | +--echo # 3. Join member 2 to the group |
| 100 | +--echo # Wait for the VCLE to reach application where it will be stuck waiting for its ticket |
| 101 | + |
| 102 | +--let $rpl_connection_name= server2 |
| 103 | +--source include/rpl_connection.inc |
| 104 | + |
| 105 | +--let $sysvars_to_save = [ "GLOBAL.group_replication_view_change_uuid" ] |
| 106 | +--source include/save_sysvars.inc |
| 107 | +SET GLOBAL group_replication_view_change_uuid = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; |
| 108 | + |
| 109 | +# Start GR gets stuck on RECOVERY state |
| 110 | +--replace_result $group_replication_group_name GROUP_REPLICATION_GROUP_NAME |
| 111 | +--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name" |
| 112 | +--source include/start_group_replication_command.inc |
| 113 | + |
| 114 | +--let $rpl_connection_name= server1 |
| 115 | +--source include/rpl_connection.inc |
| 116 | + |
| 117 | +# Wait for the VCLE to reach application |
| 118 | +--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_applier_status_by_worker WHERE channel_name = "group_replication_applier" AND applying_transaction= "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:2" |
| 119 | +--source include/wait_condition.inc |
| 120 | + |
| 121 | +--let $assert_text= 'There is a worker whose stage reports it is waiting on a ticket' |
| 122 | +--let $assert_cond= [SELECT COUNT(*) AS count FROM performance_schema.threads WHERE name="thread/sql/replica_worker" AND processlist_state="Waiting for Binlog Group Commit ticket", count, 1] = 1 |
| 123 | +--source include/assert.inc |
| 124 | + |
| 125 | +--echo # |
| 126 | +--echo # 4. Unblock the transaction from the async channel, but stop it again before it pops the ticket |
| 127 | + |
| 128 | +--let $debug_point = rpl_end_of_ticket_blocked |
| 129 | +--source include/add_debug_point.inc |
| 130 | + |
| 131 | +SET DEBUG_SYNC= "now SIGNAL signal.ordered_commit_continue"; |
| 132 | + |
| 133 | +# Wait it to block after already acknowledging the transaction was processed, but before popping the ticket |
| 134 | +SET DEBUG_SYNC= "now WAIT_FOR signal.end_of_ticket_waiting"; |
| 135 | + |
| 136 | +--source include/remove_debug_point.inc |
| 137 | + |
| 138 | +--echo # |
| 139 | +--echo # 5. Join member 4. The new VCLE will pop the ticket with no broadcast |
| 140 | +--echo # Wait for this new VCLE to be queued |
| 141 | + |
| 142 | +--let $rpl_connection_name= server4 |
| 143 | +--source include/rpl_connection.inc |
| 144 | + |
| 145 | +--let $sysvars_to_save = [ "GLOBAL.group_replication_view_change_uuid" ] |
| 146 | +--source include/save_sysvars.inc |
| 147 | +SET GLOBAL group_replication_view_change_uuid = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; |
| 148 | + |
| 149 | +# The new View Change will cause a pop with no signal |
| 150 | +--replace_result $group_replication_group_name GROUP_REPLICATION_GROUP_NAME |
| 151 | +--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name" |
| 152 | +--source include/start_group_replication_command.inc |
| 153 | + |
| 154 | +--let $rpl_connection_name= server1 |
| 155 | +--source include/rpl_connection.inc |
| 156 | + |
| 157 | +# Wait for the VCLE for the member 4 join to be queued |
| 158 | +# Not stuck waiting for the ticket, the VCLE is still stuck waiting for the flush stage lock |
| 159 | +--let $wait_condition= SELECT COUNT_TRANSACTIONS_REMOTE_IN_APPLIER_QUEUE = 1 from performance_schema.replication_group_member_stats where member_id in (SELECT @@server_uuid) |
| 160 | +--source include/wait_condition.inc |
| 161 | + |
| 162 | +--echo # |
| 163 | +--echo # 6. Unblock the stuck ticket |
| 164 | +--echo # All members should now be online |
| 165 | + |
| 166 | +SET DEBUG_SYNC= "now SIGNAL signal.end_of_ticket_continue"; |
| 167 | + |
| 168 | +--let $wait_condition=SELECT COUNT(*)=3 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE" |
| 169 | +--source include/wait_condition.inc |
| 170 | + |
| 171 | +--echo # |
| 172 | +--echo # 7. Cleaning up |
| 173 | + |
| 174 | +--let $rpl_connection_name= server3 |
| 175 | +--source include/rpl_connection.inc |
| 176 | + |
| 177 | +DROP TABLE t1; |
| 178 | + |
| 179 | +--let $sync_slave_connection=server1 |
| 180 | +--source include/sync_slave_sql_with_master.inc |
| 181 | + |
| 182 | +--let $rpl_connection_name= server1 |
| 183 | +--source include/rpl_connection.inc |
| 184 | + |
| 185 | +SET DEBUG_SYNC= 'RESET'; |
| 186 | + |
| 187 | +--source include/stop_slave.inc |
| 188 | +CHANGE REPLICATION SOURCE TO SOURCE_AUTO_POSITION=0 FOR CHANNEL "ch1"; |
| 189 | + |
| 190 | +--source include/stop_group_replication.inc |
| 191 | +--source include/restore_sysvars.inc |
| 192 | + |
| 193 | +--let $rpl_connection_name= server2 |
| 194 | +--source include/rpl_connection.inc |
| 195 | + |
| 196 | +--source include/stop_group_replication.inc |
| 197 | +--source include/restore_sysvars.inc |
| 198 | + |
| 199 | +--let $rpl_connection_name= server4 |
| 200 | +--source include/rpl_connection.inc |
| 201 | + |
| 202 | +--source include/stop_group_replication.inc |
| 203 | +--source include/restore_sysvars.inc |
| 204 | + |
| 205 | +--source include/group_replication_end.inc |
0 commit comments