Skip to content

Commit 31c0adf

Browse files
committed
Bug#34930219 Missing synchronization of access to THD::m_protocol causing SIGSEGV
A stack allocated protocol instance was popped while i_s.processlist was about to check whether the client connection was still alive. When the protocol instance went out of scope, the call to connection_alive() accessed an invalid pointer, causing SIGSEGV. The fix is to cache the return value from the current protocol's connection_alive() method when pushing, popping or getting the protocol. This might leave results that are slightly out of sync with reality, but a better synchronization is likely to cause performance degradation. Change-Id: I3d512fadaa0df145af3f25d4cc03fa20143c5310 (cherry picked from commit 99c1919e0596bb6fe0292e602299e6da880fa352)
1 parent d1a860a commit 31c0adf

11 files changed

+174
-28
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# I. Reproduce race condition between popping the protocol and selecting
2+
# from I_S processlist (Bug#34930219):
3+
# 1. Create X Protocol session (X1) using "x_root" account and hang it
4+
# before popping the protocol (sync point: wait_before_popping_protocol)
5+
# 2. Create classic protocol session(C1) and list all sessions,
6+
# hang it at sync point 'wait_before_returning_protocol'.
7+
# 3. Wait until C1 is at the expected sync point, then wakeup X1.
8+
9+
10+
#
11+
# Connect X1, send signal.
12+
13+
#
14+
# Connect Cl, wait for signal.
15+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_popping';
16+
17+
#
18+
# Use C1 and list all sessions, wait before returning the protocol.
19+
SET DEBUG_SYNC= 'wait_before_checking_alive SIGNAL waiting_checking WAIT_FOR continue_checking';
20+
SELECT COUNT(*) > 0 from information_schema.processlist;
21+
22+
#
23+
# From the default connection, wait for threads being halted, then
24+
# let both clients continue.
25+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_checking';
26+
SET DEBUG_SYNC= 'now SIGNAL continue_popping';
27+
SET DEBUG_SYNC= 'now SIGNAL continue_checking';
28+
29+
#
30+
# Go back to C1 and reap.
31+
COUNT(*) > 0
32+
1
33+
Warnings:
34+
Warning 1287 'INFORMATION_SCHEMA.PROCESSLIST' is deprecated and will be removed in a future release. Please use performance_schema.processlist instead
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# This test is created to reproduce a race condition where the session is popping
2+
# the protocol while I_S processlist is accessing the protocol pointer.
3+
4+
--source include/have_debug_sync.inc
5+
--source include/have_debug.inc
6+
7+
--source include/xplugin_preamble.inc
8+
--source include/xplugin_create_user.inc
9+
10+
--echo # I. Reproduce race condition between popping the protocol and selecting
11+
--echo # from I_S processlist (Bug#34930219):
12+
--echo # 1. Create X Protocol session (X1) using "x_root" account and hang it
13+
--echo # before popping the protocol (sync point: wait_before_popping_protocol)
14+
--echo # 2. Create classic protocol session(C1) and list all sessions,
15+
--echo # hang it at sync point 'wait_before_returning_protocol'.
16+
--echo # 3. Wait until C1 is at the expected sync point, then wakeup X1.
17+
--echo
18+
19+
## Test setup.
20+
--let $xtest_file= $MYSQL_TMP_DIR/is_processlist_mysqlx.xpl
21+
--write_file $xtest_file
22+
23+
SET DEBUG_SYNC= 'wait_before_popping_protocol SIGNAL waiting_popping WAIT_FOR continue_popping';
24+
SELECT 1;
25+
EOF
26+
27+
## Start the X client.
28+
--echo
29+
--echo #
30+
--echo # Connect X1, send signal.
31+
--exec_in_background $MYSQLXTEST -ux_root --password='' --file=$xtest_file 2>&1
32+
33+
## Connect C1
34+
--echo
35+
--echo #
36+
--echo # Connect Cl, wait for signal.
37+
--connect(con1,localhost,root,,)
38+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_popping';
39+
40+
## Let C1 wait while returning the protocol.
41+
--echo
42+
--echo #
43+
--echo # Use C1 and list all sessions, wait before returning the protocol.
44+
SET DEBUG_SYNC= 'wait_before_checking_alive SIGNAL waiting_checking WAIT_FOR continue_checking';
45+
send SELECT COUNT(*) > 0 from information_schema.processlist;
46+
47+
## Both C1 and X1 are now halted. First let X1 continue, then C1.
48+
--echo
49+
--echo #
50+
--echo # From the default connection, wait for threads being halted, then
51+
--echo # let both clients continue.
52+
--connection default
53+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_checking';
54+
SET DEBUG_SYNC= 'now SIGNAL continue_popping';
55+
--sleep 1
56+
SET DEBUG_SYNC= 'now SIGNAL continue_checking';
57+
58+
--echo
59+
--echo #
60+
--echo # Go back to C1 and reap.
61+
--connection con1
62+
reap;
63+
64+
## Cleanup
65+
--connection default
66+
--disconnect con1
67+
--source ../include/xplugin_cleanup.inc

plugin/test_service_sql_api/test_sql_sleep_is_connected.cc

+10
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ static void ensure_api_ok(const char *function, MYSQL_SESSION result) {
6464
struct Callback_data {
6565
bool limit_is_connected{false};
6666
int is_connected_calls{0};
67+
int is_disconnected_calls{2}; // Calls after disconnect.
68+
int is_preamble_calls{4}; // Calls before starting query.
6769
int handle_ok_calls{0};
6870
};
6971

@@ -210,6 +212,14 @@ static bool sql_connection_alive(void *ctx) {
210212
Callback_data *cbd = static_cast<Callback_data *>(ctx);
211213

212214
if (cbd->limit_is_connected) {
215+
// Patch for bug#34930219 changes the way connection_alive() is called.
216+
// Hence, we must adjust the expected number of calls.
217+
if (cbd->is_preamble_calls-- > 0) return true;
218+
if (cbd->is_connected_calls == 0 && cbd->is_disconnected_calls > 0) {
219+
cbd->is_disconnected_calls--;
220+
return false;
221+
}
222+
213223
// Connection is disconnected
214224
// after concrete number of calls
215225
cbd->is_connected_calls--;

sql/mdl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class MDL_context_owner {
126126
/**
127127
Does the owner still have connection to the client?
128128
*/
129-
virtual bool is_connected() = 0;
129+
virtual bool is_connected(bool = false) = 0;
130130

131131
/**
132132
Indicates that owner thread might have some commit order (non-MDL) waits

sql/mdl_context_backup.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class MDL_context_backup_manager::MDL_context_backup
8282

8383
uint get_rand_seed() const override { return 1; }
8484

85-
bool is_connected() override { return true; }
85+
bool is_connected(bool = false) override { return true; }
8686
// End implementation of MDL_context_owner interface.
8787

8888
private:

sql/protocol_callback.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "m_ctype.h"
3131
#include "mysql_com.h"
3232
#include "sql/current_thd.h"
33+
#include "sql/debug_sync.h"
3334
#include "sql/field.h"
3435
#include "sql/item.h"
3536
#include "sql/item_func.h"
@@ -277,8 +278,10 @@ int Protocol_callback::shutdown(bool server_shutdown) {
277278
false disconnected
278279
*/
279280
bool Protocol_callback::connection_alive() const {
280-
if (callbacks.connection_alive)
281+
if (callbacks.connection_alive) {
282+
DEBUG_SYNC(current_thd, "wait_before_checking_alive");
281283
return callbacks.connection_alive(callbacks_ctx);
284+
}
282285

283286
return true;
284287
}

sql/sql_class.cc

+24-1
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ THD::THD(bool enable_plugins)
814814
protocol_text->init(this);
815815
protocol_binary->init(this);
816816
protocol_text->set_client_capabilities(0); // minimalistic client
817+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
817818

818819
/*
819820
Make sure thr_lock_info_init() is called for threads which do not get
@@ -3146,32 +3147,54 @@ bool THD::is_classic_protocol() const {
31463147
get_protocol()->type() == Protocol::PROTOCOL_TEXT;
31473148
}
31483149

3149-
bool THD::is_connected() {
3150+
bool THD::is_connected(bool use_cached_connection_alive) {
31503151
/*
31513152
All system threads (e.g., the slave IO thread) are connected but
31523153
not using vio. So this function always returns true for all
31533154
system threads.
31543155
*/
31553156
if (system_thread) return true;
31563157

3158+
/*
3159+
In some situations, e.g. when generating information_schema.processlist,
3160+
we can live with a cached value to avoid introducing mutex usage.
3161+
*/
3162+
if (use_cached_connection_alive) {
3163+
DEBUG_SYNC(current_thd, "wait_before_checking_alive");
3164+
return m_cached_is_connection_alive.load();
3165+
}
3166+
31573167
if (is_classic_protocol())
31583168
return get_protocol()->connection_alive() &&
31593169
vio_is_connected(get_protocol_classic()->get_vio());
31603170

31613171
return get_protocol()->connection_alive();
31623172
}
31633173

3174+
Protocol *THD::get_protocol() {
3175+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
3176+
return m_protocol;
3177+
}
3178+
3179+
Protocol_classic *THD::get_protocol_classic() {
3180+
assert(is_classic_protocol());
3181+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
3182+
return pointer_cast<Protocol_classic *>(m_protocol);
3183+
}
3184+
31643185
void THD::push_protocol(Protocol *protocol) {
31653186
assert(m_protocol != nullptr);
31663187
assert(protocol != nullptr);
31673188
m_protocol->push_protocol(protocol);
31683189
m_protocol = protocol;
3190+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
31693191
}
31703192

31713193
void THD::pop_protocol() {
31723194
assert(m_protocol != nullptr);
31733195
m_protocol = m_protocol->pop_protocol();
31743196
assert(m_protocol != nullptr);
3197+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
31753198
}
31763199

31773200
void THD::set_time() {

sql/sql_class.h

+11-6
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,14 @@ class THD : public MDL_context_owner,
12451245
private:
12461246
mysql_mutex_t LOCK_query_plan;
12471247

1248+
/**
1249+
Keep a cached value saying whether the connection is alive. Update when
1250+
pushing, popping or getting the protocol. Used by
1251+
information_schema.processlist to avoid locking mutexes that might
1252+
affect performance.
1253+
*/
1254+
std::atomic<bool> m_cached_is_connection_alive;
1255+
12481256
public:
12491257
/// Locks the query plan of this THD
12501258
void lock_query_plan() { mysql_mutex_lock(&LOCK_query_plan); }
@@ -1296,7 +1304,7 @@ class THD : public MDL_context_owner,
12961304

12971305
const Protocol *get_protocol() const { return m_protocol; }
12981306

1299-
Protocol *get_protocol() { return m_protocol; }
1307+
Protocol *get_protocol();
13001308

13011309
SSL_handle get_ssl() const {
13021310
#ifndef NDEBUG
@@ -1322,10 +1330,7 @@ class THD : public MDL_context_owner,
13221330
return pointer_cast<const Protocol_classic *>(m_protocol);
13231331
}
13241332

1325-
Protocol_classic *get_protocol_classic() {
1326-
assert(is_classic_protocol());
1327-
return pointer_cast<Protocol_classic *>(m_protocol);
1328-
}
1333+
Protocol_classic *get_protocol_classic();
13291334

13301335
private:
13311336
Protocol *m_protocol; // Current protocol
@@ -3179,7 +3184,7 @@ class THD : public MDL_context_owner,
31793184
bool is_classic_protocol() const;
31803185

31813186
/** Return false if connection to client is broken. */
3182-
bool is_connected() final;
3187+
bool is_connected(bool use_cached_connection_alive = false) final;
31833188

31843189
/**
31853190
Mark the current error as fatal. Warning: this does not

sql/sql_show.cc

+20-17
Original file line numberDiff line numberDiff line change
@@ -2841,16 +2841,16 @@ class List_process_list : public Do_THD_Impl {
28412841
LEX_CSTRING inspect_sctx_host = inspect_sctx->host();
28422842
LEX_CSTRING inspect_sctx_host_or_ip = inspect_sctx->host_or_ip();
28432843

2844-
{
2845-
MUTEX_LOCK(grd, &inspect_thd->LOCK_thd_protocol);
2846-
2847-
if ((!(inspect_thd->get_protocol() &&
2848-
inspect_thd->get_protocol()->connection_alive()) &&
2849-
!inspect_thd->system_thread) ||
2850-
(m_user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
2851-
strcmp(inspect_sctx_user.str, m_user)))) {
2852-
return;
2853-
}
2844+
/*
2845+
Since we only access a cached value of connection_alive, which is
2846+
also an atomic, we do not need to lock LOCK_thd_protocol here. We
2847+
may get a value that is slightly outdated, but we will not get a crash
2848+
due to reading invalid memory at least.
2849+
*/
2850+
if (!inspect_thd->is_connected(true) ||
2851+
(m_user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
2852+
strcmp(inspect_sctx_user.str, m_user)))) {
2853+
return;
28542854
}
28552855

28562856
thd_info = new (m_client_thd->mem_root) thread_info;
@@ -3089,13 +3089,16 @@ class Fill_process_list : public Do_THD_Impl {
30893089
? NullS
30903090
: client_priv_user;
30913091

3092-
{
3093-
MUTEX_LOCK(grd, &inspect_thd->LOCK_thd_protocol);
3094-
if ((!inspect_thd->get_protocol()->connection_alive() &&
3095-
!inspect_thd->system_thread) ||
3096-
(user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
3097-
strcmp(inspect_sctx_user.str, user))))
3098-
return;
3092+
/*
3093+
Since we only access a cached value of connection_alive, which is
3094+
also an atomic, we do not need to lock LOCK_thd_protocol here. We
3095+
may get a value that is slightly outdated, but we will not get a crash
3096+
due to reading invalid memory at least.
3097+
*/
3098+
if (!inspect_thd->is_connected(true) ||
3099+
(user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
3100+
strcmp(inspect_sctx_user.str, user)))) {
3101+
return;
30993102
}
31003103

31013104
DBUG_EXECUTE_IF(

sql/srv_session.cc

+1
Original file line numberDiff line numberDiff line change
@@ -1181,6 +1181,7 @@ int Srv_session::execute_command(enum enum_server_command command,
11811181
}
11821182
int ret = dispatch_command(m_thd, data, command);
11831183

1184+
DEBUG_SYNC(m_thd, "wait_before_popping_protocol");
11841185
m_thd->pop_protocol();
11851186
assert(m_thd->get_protocol() == &m_protocol_error);
11861187
return ret;

unittest/gunit/test_mdl_context_owner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Test_MDL_context_owner : public MDL_context_owner {
3636
int) override {}
3737

3838
int is_killed() const final { return 0; }
39-
bool is_connected() override { return true; }
39+
bool is_connected(bool = false) override { return true; }
4040
bool might_have_commit_order_waiters() const override { return false; }
4141

4242
THD *get_thd() override {

0 commit comments

Comments
 (0)