Skip to content

Commit dee4952

Browse files
committed
BUG#33405699: After secondary node is killed, it can not rejoined
It was detected that when a group was running with `group_replication_consistency = AFTER`, when a secondary failed due to external conditions (like unstable network), when it rejoined sometimes the secondary could face the error Plugin group_replication reported: 'Transaction 'GTID' does not exist on Group Replication consistency manager while receiving remote transaction prepare.' The root cause of this issue is that the primary may log the View_change_log_event on which the secondary rejoined out of order, which when the secondary uses the primary as the group donor may cause the secondary to improperly catch-up with the group and eventually generate incorrect GTIDs for the group transactions. The primary on Group Replication does ensure that the View_change_log_event is logged after all precedent transactions, but there is a window on which transactions ordered after the View_change_log_event on the group global order can be logged before the View_change_log_event. In order to solve this issue, the primary must ensure: 1) transactions ordered before a view are always logged into binary log before the View_change_log_event; 2) transactions ordered after a view are always logged into binary log after the View_change_log_event; That guarantee is now provided by the Binlog Ticket Manager, which ensures a given order on which transactions on the Binlog Group Commit do commit. Components on the server, in this case Group Replication, do assign a commit ticket order to each transaction, the binlog ordered commit does follow that order. Example: * The ticket manager is committing ticket 5, all transactions will be assigned to ticket 5, meaning that there is no guarantee in the commit order among them. * When a new member joins, the ticket is incremented to 6 and assigned to the view. The ticket is incremented again, so the new transactions will be assigned to ticket 7. * This will ensure that the order between previous transactions, view and future transactions is respected. This means that transactions with ticket 7 cannot be committed until all previous transactions are committed. -------------------------> | T1 T2 | V | T4 T3 | +--------+-----+---------+ | 5 5 | 6 | 7 7 | tickets +--------+-----+---------+ Change-Id: Ifa881894a65626ae8102761f23c6ddfe205e80bc
1 parent 75146d5 commit dee4952

File tree

75 files changed

+3095
-2295
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+3095
-2295
lines changed

include/mysql/group_replication_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929

3030
#include "my_sys.h"
3131
#include "my_thread.h"
32+
#include "sql/binlog/group_commit/bgc_ticket_manager.h"
3233
#include "sql/binlog_ostream.h"
3334
#include "sql/binlog_reader.h"
3435
#include "sql/debug_sync.h"
3536
#include "sql/log_event.h"
3637
#include "sql/replication.h"
3738
#include "sql/rpl_channel_service_interface.h"
39+
#include "sql/rpl_commit_stage_manager.h"
3840
#include "sql/rpl_gtid.h"
3941
#include "sql/rpl_write_set_handler.h"
4042

libbinlogevents/include/control_events.h

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,18 @@ class Gtid_event : public Binary_log_event {
10421042
void print_event_info(std::ostream &) override {}
10431043
void print_long_info(std::ostream &) override {}
10441044
#endif
1045+
/*
1046+
Commit group ticket consists of: 1st bit, used internally for
1047+
synchronization purposes ("is in use"), followed by 63 bits for
1048+
the ticket value.
1049+
*/
1050+
static constexpr int COMMIT_GROUP_TICKET_LENGTH = 8;
1051+
/*
1052+
Default value of commit_group_ticket, which means it is not
1053+
being used.
1054+
*/
1055+
static constexpr std::uint64_t kGroupTicketUnset = 0;
1056+
10451057
protected:
10461058
static const int ENCODED_FLAG_LENGTH = 1;
10471059
static const int ENCODED_SID_LENGTH = 16; // Uuid::BYTE_LENGTH;
@@ -1115,9 +1127,10 @@ class Gtid_event : public Binary_log_event {
11151127
On the originating master, the event has only one timestamp as the two
11161128
timestamps are equal. On every other server we have two timestamps.
11171129
*/
1118-
static const int MAX_DATA_LENGTH = FULL_COMMIT_TIMESTAMP_LENGTH +
1119-
TRANSACTION_LENGTH_MAX_LENGTH +
1120-
FULL_SERVER_VERSION_LENGTH;
1130+
static const int MAX_DATA_LENGTH =
1131+
FULL_COMMIT_TIMESTAMP_LENGTH + TRANSACTION_LENGTH_MAX_LENGTH +
1132+
FULL_SERVER_VERSION_LENGTH +
1133+
COMMIT_GROUP_TICKET_LENGTH; /* 64-bit unsigned integer */
11211134

11221135
static const int MAX_EVENT_LENGTH =
11231136
LOG_EVENT_HEADER_LEN + POST_HEADER_LENGTH + MAX_DATA_LENGTH;
@@ -1137,6 +1150,28 @@ class Gtid_event : public Binary_log_event {
11371150
uint32_t original_server_version;
11381151
/** The version of the immediate server */
11391152
uint32_t immediate_server_version;
1153+
1154+
/** Ticket number used to group sessions together during the BGC. */
1155+
std::uint64_t commit_group_ticket{kGroupTicketUnset};
1156+
1157+
/**
1158+
Returns the length of the packed `commit_group_ticket` field. It may be
1159+
8 bytes or 0 bytes, depending on whether or not the value is
1160+
instantiated.
1161+
1162+
@return The length of the packed `commit_group_ticket` field
1163+
*/
1164+
int get_commit_group_ticket_length() const;
1165+
1166+
/**
1167+
Set the commit_group_ticket and update the transaction length if
1168+
needed, that is, if the commit_group_ticket was not set already
1169+
account it on the transaction size.
1170+
1171+
@param value The commit_group_ticket value.
1172+
*/
1173+
void set_commit_group_ticket_and_update_transaction_length(
1174+
std::uint64_t value);
11401175
};
11411176

11421177
/**

libbinlogevents/src/control_events.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ Gtid_event::Gtid_event(const char *buf, const Format_description_event *fde)
448448
+------------+
449449
| 4/8 bytes| original/immediate_server_version (see timestamps*)
450450
+------------+
451+
| 8 bytes| Commit group ticket
452+
+------------+
451453
452454
The 'Flags' field contains gtid flags.
453455
@@ -558,6 +560,10 @@ Gtid_event::Gtid_event(const char *buf, const Format_description_event *fde)
558560
ORIGINAL_SERVER_VERSION_LENGTH);
559561
} else
560562
original_server_version = immediate_server_version;
563+
564+
if (READER_CALL(can_read, COMMIT_GROUP_TICKET_LENGTH)) {
565+
READER_TRY_SET(this->commit_group_ticket, read<uint64_t>);
566+
}
561567
}
562568
}
563569
}
@@ -567,6 +573,26 @@ Gtid_event::Gtid_event(const char *buf, const Format_description_event *fde)
567573
BAPI_VOID_RETURN;
568574
}
569575

576+
int Gtid_event::get_commit_group_ticket_length() const {
577+
if (kGroupTicketUnset != commit_group_ticket) {
578+
return COMMIT_GROUP_TICKET_LENGTH;
579+
}
580+
return 0;
581+
}
582+
583+
void Gtid_event::set_commit_group_ticket_and_update_transaction_length(
584+
std::uint64_t value) {
585+
/*
586+
Add the commit_group_ticket length to the transaction length if
587+
it was not yet considered.
588+
*/
589+
assert(value > 0);
590+
set_trx_length(transaction_length + (kGroupTicketUnset == commit_group_ticket
591+
? COMMIT_GROUP_TICKET_LENGTH
592+
: 0));
593+
commit_group_ticket = value;
594+
}
595+
570596
Previous_gtids_event::Previous_gtids_event(const char *buffer,
571597
const Format_description_event *fde)
572598
: Binary_log_event(&buffer, fde) {
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# ==== Purpose ====
2+
#
3+
# Resume a statement that was previously executed up until a conditional
4+
# timestamp sync point.
5+
#
6+
# ==== About conditional sync points for timestamp ====
7+
#
8+
# Conditional timestamp sync points are used to control the execution flow
9+
# of a statement for which the session variable `TIMESTAMP` as been set to
10+
# a given value, meaning, the given named sync point will only be activated
11+
# when the transaction being executed has a given session `TIMESTAMP` value
12+
# associated.
13+
#
14+
# Note that the usage of the `TIMESTAMP` session variable doens't imply
15+
# that the conditional execution is dependent on time measuring or the
16+
# measured value of any clock. The `TIMESTAMP` variable is used for mere
17+
# convenience and due to low impact on server behavior.
18+
#
19+
# Usage example for executing a statement and triggering a conditional sync
20+
# point for timestamp on a single server:
21+
#
22+
# - On the server code, setup the conditional timestamp using
23+
# `CONDITIONAL_SYNC_POINT_FOR_TIMESTAMP`, as an example, just before the
24+
# transaction is committed in the storage engines:
25+
#
26+
# bool trx_coordinator::commit_in_engines(THD *thd, bool all,
27+
# bool run_after_commit) {
28+
# if (all) CONDITIONAL_SYNC_POINT_FOR_TIMESTAMP("before_commit_in_engines");
29+
# ...
30+
#
31+
# - On the MTR test, include this script, passing the connection used to
32+
# execute the statement and the connection used to activate the sync
33+
# point.
34+
#
35+
# --connect(con1, localhost, root,,)
36+
# --connection con1
37+
#
38+
# --let $timestamp = 100
39+
# --let $auxiliary_connection = default
40+
# --let $statement_connection = con1
41+
# --let $statement = INSERT INTO t VALUES (100);
42+
# --let $sync_point = before_commit_in_engines
43+
# --source include/execute_to_conditional_timestamp_sync_point.inc
44+
# ...
45+
#
46+
# Usage example for executing a statement on the source server and
47+
# triggering a conditional sync point for timestamp on a replica:
48+
#
49+
# - On the server code, setup the conditional timestamp using
50+
# `CONDITIONAL_SYNC_POINT_FOR_TIMESTAMP`, as an example, just before the
51+
# transaction is committed in the storage engines:
52+
#
53+
# bool trx_coordinator::commit_in_engines(THD *thd, bool all,
54+
# bool run_after_commit) {
55+
# if (all) CONDITIONAL_SYNC_POINT_FOR_TIMESTAMP("before_commit_in_engines");
56+
# ...
57+
#
58+
# - On the MTR test, include this script, passing the connection used to
59+
# execute the statement and the connection used to activate the sync
60+
# point.
61+
#
62+
# --let $timestamp = 100
63+
# --let $auxiliary_connection = slave
64+
# --let $statement_connection = master
65+
# --let $statement = INSERT INTO t VALUES (100);
66+
# --let $sync_point = before_commit_in_engines
67+
# --source include/execute_to_conditional_timestamp_sync_point.inc
68+
# ...
69+
#
70+
# ==== Usage ====
71+
#
72+
# --let $statement_connection = {connection name}
73+
# --let $statement = {statement to execute}
74+
# --let $sync_point = {name of the sync point}
75+
# [--let $timestamp = {timestamp to set}]
76+
# [--let $auxiliary_connection = {connection name}]
77+
# [--let $reap_statement_connection = 0|1]
78+
# --source include/execute_from_conditional_timestamp_sync_point.inc
79+
#
80+
# $statement_connection
81+
# The connection on which $statement is to be executed.
82+
# This must be different from $auxiliary_connection.
83+
#
84+
# $statement
85+
# The statement that will be paused on the sync point.
86+
#
87+
# $sync_point
88+
# The name of the sync point (not quoted).
89+
#
90+
# $timestamp
91+
# By default, the current unix timestamp, it's the value used in `SET
92+
# TIMESTAMP = ...` in order to be used with the conditional timestamp
93+
# sync point.
94+
#
95+
# $auxiliary_connection
96+
# The connection used to set the debug sync points and
97+
# symbols. $auxiliary_connection is set to the currenct active
98+
# connection, if no value is passed. It must be different from the
99+
# $statement connection.
100+
#
101+
# $reap_statement_connection
102+
# If the statement was executed using `--send`, reap the
103+
# $statement_connection.
104+
#
105+
# Side effects:
106+
# If $auxiliary_connection and $statement_connection target the same
107+
# server, the $statement is executed using `--send` and without invoking
108+
# `--reap`. Default value is 0.
109+
#
110+
111+
if ($sync_point == '') {
112+
--die ERROR IN TEST: You must set $sync_point before sourcing execute_to_conditional_timestamp_sync_point.inc
113+
}
114+
115+
if ($timestamp == '') {
116+
--let $timestamp = 1
117+
}
118+
119+
--let $include_filename= execute_from_conditional_timestamp_sync_point.inc [$timestamp@$sync_point]
120+
--source include/begin_include_file.inc
121+
122+
if ($statement_connection == '') {
123+
--die ERROR IN TEST: You must set $statement_connection before sourcing execute_to_conditional_timestamp_sync_point.inc
124+
}
125+
126+
--let $_eftsp_auxiliary_connection = $CURRENT_CONNECTION
127+
if ($auxiliary_connection != '') {
128+
--let $_eftsp_auxiliary_connection = $auxiliary_connection
129+
}
130+
131+
if ($statement_connection == $_eftsp_auxiliary_connection) {
132+
--echo statement_connection=$statement_connection
133+
--echo auxiliary_connection=$_eftsp_auxiliary_connection
134+
--die ERROR IN TEST: You must set $statement_connection to something else than $auxiliary_connection
135+
}
136+
137+
--let $underscore = _
138+
139+
--connection $_eftsp_auxiliary_connection
140+
if ($to_sync_point != '') {
141+
--let $debug_point_silent = 1
142+
--let $debug_point = syncpoint_$to_sync_point$underscore$timestamp
143+
--source include/add_debug_point.inc
144+
}
145+
146+
--disable_query_log
147+
--eval SET @@SESSION.DEBUG_SYNC = "now SIGNAL continue_$sync_point$underscore$timestamp"
148+
--enable_query_log
149+
150+
if ($to_sync_point != '') {
151+
--connection $_eftsp_auxiliary_connection
152+
--disable_query_log
153+
--eval SET @@SESSION.DEBUG_SYNC = "now WAIT_FOR reached_$to_sync_point$underscore$timestamp"
154+
--enable_query_log
155+
156+
--source include/remove_debug_point.inc
157+
--let $debug_point_silent = 0
158+
}
159+
160+
if ($reap_statement_connection == 1) {
161+
--connection $statement_connection
162+
if ($_ectsp_server_aux == $_ectsp_server_stmt) {
163+
--reap
164+
}
165+
}
166+
167+
--let $skip_restore_connection= 1
168+
--let $include_filename= execute_from_conditional_timestamp_sync_point.inc
169+
--source include/end_include_file.inc
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
RESET MASTER;
2+
CREATE TABLE t (
3+
c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
4+
c2 INT,
5+
c3 DATETIME DEFAULT CURRENT_TIMESTAMP);
6+
# Configuration:
7+
# · MYSQL_SLAP theads: 100
8+
# · MYSQL_SLAP iterations: 200
9+
# · BGC tickets: 100
10+
# · Expected table count: 20100
11+
# · Sleep: 0.201
12+
# Adding debug point 'begin_new_bgc_ticket' to @@SESSION.debug
13+
# Removing debug point 'begin_new_bgc_ticket' from @@SESSION.debug
14+
DROP TABLE t;
15+
RESET MASTER;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
--source include/big_test.inc
2+
--source include/have_debug.inc
3+
--source include/have_binlog_format_row.inc
4+
5+
RESET MASTER;
6+
CREATE TABLE t (
7+
c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
8+
c2 INT,
9+
c3 DATETIME DEFAULT CURRENT_TIMESTAMP);
10+
11+
--let $time_unit = 0.00001
12+
--let $threads = 100
13+
--let $iterations = 200
14+
--let $n_tickets = 100
15+
--expr $records = $threads * $iterations
16+
--expr $records = $records + $n_tickets
17+
--expr $sleep = $records * $time_unit
18+
19+
--echo # Configuration:
20+
--echo # · MYSQL_SLAP theads: $threads
21+
--echo # · MYSQL_SLAP iterations: $iterations
22+
--echo # · BGC tickets: $n_tickets
23+
--echo # · Expected table count: $records
24+
--echo # · Sleep: $sleep
25+
26+
exec_in_background $MYSQL_SLAP
27+
--silent
28+
--delimiter=";"
29+
--create-schema="test"
30+
-uroot
31+
--password=""
32+
--query="INSERT INTO t (c2) VALUES (0)"
33+
--concurrency=$threads
34+
--iterations=$iterations
35+
--commit=1;
36+
37+
--let $debug_point = begin_new_bgc_ticket
38+
--let $debug_type = SESSION
39+
--source include/add_debug_point.inc
40+
41+
--let $new_ticket = 0
42+
while ($new_ticket != $n_tickets) {
43+
--disable_query_log
44+
--eval INSERT INTO t (c2) VALUES ($new_ticket + 1)
45+
--enable_query_log
46+
--inc $new_ticket
47+
--sleep $sleep
48+
}
49+
50+
--source include/remove_debug_point.inc
51+
52+
--let $wait_condition= SELECT COUNT(1) = $records FROM t
53+
--let $wait_timeout= 3000
54+
--source include/wait_condition.inc
55+
56+
DROP TABLE t;
57+
RESET MASTER;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
include/only_with_option.inc [GLOBAL.binlog_order_commits = 1]
2+
CREATE TABLE t (
3+
c1 INT NOT NULL PRIMARY KEY,
4+
c2 INT
5+
);
6+
include/save_binlog_position.inc
7+
include/execute_to_conditional_timestamp_sync_point.inc [before_assign_session_to_bgc_ticket]
8+
SET gtid_next = "435aeb3e-cc91-11ec-a715-6ba04eb9487e:1"; INSERT INTO t VALUES (1, 1);
9+
include/execute_to_conditional_timestamp_sync_point.inc [before_wait_on_ticket]
10+
SET gtid_next = "435aeb3e-cc91-11ec-a715-6ba04eb9487e:2"; INSERT INTO t VALUES (2, 2);
11+
include/execute_to_conditional_timestamp_sync_point.inc [before_wait_on_ticket]
12+
SET gtid_next = "435aeb3e-cc91-11ec-a715-6ba04eb9487e:3"; INSERT INTO t VALUES (3, 3);
13+
include/execute_from_conditional_timestamp_sync_point.inc [1@before_assign_session_to_bgc_ticket]
14+
include/execute_from_conditional_timestamp_sync_point.inc [1@inside_wait_on_ticket]
15+
include/execute_from_conditional_timestamp_sync_point.inc [2@before_wait_on_ticket]
16+
include/execute_from_conditional_timestamp_sync_point.inc [3@before_wait_on_ticket]
17+
include/assert_binlog_events.inc [Gtid/.*GTID_NEXT= '435aeb3e-cc91-11ec-a715-6ba04eb9487e:2' # !Begin # !Insert # !Commit # Gtid/.*GTID_NEXT= '435aeb3e-cc91-11ec-a715-6ba04eb9487e:3' # !Begin # !Insert # !Commit # Gtid/.*GTID_NEXT= '435aeb3e-cc91-11ec-a715-6ba04eb9487e:1' # !Begin # !Insert # !Commit]
18+
DROP TABLE t;
19+
RESET MASTER;

0 commit comments

Comments
 (0)