Skip to content

Commit c81d2c8

Browse files
committed
Bug#34930219 Missing synchronization of access to THD::m_protocol causing SIGSEGV
A stack allocated protocol instance was popped while i_s.processlist was about to check whether the client connection was still alive. When the protocol instance went out of scope, the call to connection_alive() accessed an invalid pointer, causing SIGSEGV. The fix is to cache the return value from the current protocol's connection_alive() method when pushing, popping or getting the protocol. This might leave results that are slightly out of sync with reality, but a better synchronization is likely to cause performance degradation. Change-Id: I3d512fadaa0df145af3f25d4cc03fa20143c5310
1 parent 34be526 commit c81d2c8

11 files changed

+174
-28
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# I. Reproduce race condition between popping the protocol and selecting
2+
# from I_S processlist (Bug#34930219):
3+
# 1. Create X Protocol session (X1) using "x_root" account and hang it
4+
# before popping the protocol (sync point: wait_before_popping_protocol)
5+
# 2. Create classic protocol session(C1) and list all sessions,
6+
# hang it at sync point 'wait_before_returning_protocol'.
7+
# 3. Wait until C1 is at the expected sync point, then wakeup X1.
8+
9+
10+
#
11+
# Connect X1, send signal.
12+
13+
#
14+
# Connect Cl, wait for signal.
15+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_popping';
16+
17+
#
18+
# Use C1 and list all sessions, wait before returning the protocol.
19+
SET DEBUG_SYNC= 'wait_before_checking_alive SIGNAL waiting_checking WAIT_FOR continue_checking';
20+
SELECT COUNT(*) > 0 from information_schema.processlist;
21+
22+
#
23+
# From the default connection, wait for threads being halted, then
24+
# let both clients continue.
25+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_checking';
26+
SET DEBUG_SYNC= 'now SIGNAL continue_popping';
27+
SET DEBUG_SYNC= 'now SIGNAL continue_checking';
28+
29+
#
30+
# Go back to C1 and reap.
31+
COUNT(*) > 0
32+
1
33+
Warnings:
34+
Warning 1287 'INFORMATION_SCHEMA.PROCESSLIST' is deprecated and will be removed in a future release. Please use performance_schema.processlist instead
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# This test is created to reproduce a race condition where the session is popping
2+
# the protocol while I_S processlist is accessing the protocol pointer.
3+
4+
--source include/have_debug_sync.inc
5+
--source include/have_debug.inc
6+
7+
--source include/xplugin_preamble.inc
8+
--source include/xplugin_create_user.inc
9+
10+
--echo # I. Reproduce race condition between popping the protocol and selecting
11+
--echo # from I_S processlist (Bug#34930219):
12+
--echo # 1. Create X Protocol session (X1) using "x_root" account and hang it
13+
--echo # before popping the protocol (sync point: wait_before_popping_protocol)
14+
--echo # 2. Create classic protocol session(C1) and list all sessions,
15+
--echo # hang it at sync point 'wait_before_returning_protocol'.
16+
--echo # 3. Wait until C1 is at the expected sync point, then wakeup X1.
17+
--echo
18+
19+
## Test setup.
20+
--let $xtest_file= $MYSQL_TMP_DIR/is_processlist_mysqlx.xpl
21+
--write_file $xtest_file
22+
23+
SET DEBUG_SYNC= 'wait_before_popping_protocol SIGNAL waiting_popping WAIT_FOR continue_popping';
24+
SELECT 1;
25+
EOF
26+
27+
## Start the X client.
28+
--echo
29+
--echo #
30+
--echo # Connect X1, send signal.
31+
--exec_in_background $MYSQLXTEST -ux_root --password='' --file=$xtest_file 2>&1
32+
33+
## Connect C1
34+
--echo
35+
--echo #
36+
--echo # Connect Cl, wait for signal.
37+
--connect(con1,localhost,root,,)
38+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_popping';
39+
40+
## Let C1 wait while returning the protocol.
41+
--echo
42+
--echo #
43+
--echo # Use C1 and list all sessions, wait before returning the protocol.
44+
SET DEBUG_SYNC= 'wait_before_checking_alive SIGNAL waiting_checking WAIT_FOR continue_checking';
45+
send SELECT COUNT(*) > 0 from information_schema.processlist;
46+
47+
## Both C1 and X1 are now halted. First let X1 continue, then C1.
48+
--echo
49+
--echo #
50+
--echo # From the default connection, wait for threads being halted, then
51+
--echo # let both clients continue.
52+
--connection default
53+
SET DEBUG_SYNC= 'now WAIT_FOR waiting_checking';
54+
SET DEBUG_SYNC= 'now SIGNAL continue_popping';
55+
--sleep 1
56+
SET DEBUG_SYNC= 'now SIGNAL continue_checking';
57+
58+
--echo
59+
--echo #
60+
--echo # Go back to C1 and reap.
61+
--connection con1
62+
reap;
63+
64+
## Cleanup
65+
--connection default
66+
--disconnect con1
67+
--source ../include/xplugin_cleanup.inc

plugin/test_service_sql_api/test_sql_sleep_is_connected.cc

+10
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ static void ensure_api_ok(const char *function, MYSQL_SESSION result) {
6767
struct Callback_data {
6868
bool limit_is_connected{false};
6969
int is_connected_calls{0};
70+
int is_disconnected_calls{2}; // Calls after disconnect.
71+
int is_preamble_calls{4}; // Calls before starting query.
7072
int handle_ok_calls{0};
7173
};
7274

@@ -213,6 +215,14 @@ static bool sql_connection_alive(void *ctx) {
213215
Callback_data *cbd = static_cast<Callback_data *>(ctx);
214216

215217
if (cbd->limit_is_connected) {
218+
// Patch for bug#34930219 changes the way connection_alive() is called.
219+
// Hence, we must adjust the expected number of calls.
220+
if (cbd->is_preamble_calls-- > 0) return true;
221+
if (cbd->is_connected_calls == 0 && cbd->is_disconnected_calls > 0) {
222+
cbd->is_disconnected_calls--;
223+
return false;
224+
}
225+
216226
// Connection is disconnected
217227
// after concrete number of calls
218228
cbd->is_connected_calls--;

sql/mdl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class MDL_context_owner {
126126
/**
127127
Does the owner still have connection to the client?
128128
*/
129-
virtual bool is_connected() = 0;
129+
virtual bool is_connected(bool = false) = 0;
130130

131131
/**
132132
Indicates that owner thread might have some commit order (non-MDL) waits

sql/mdl_context_backup.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class MDL_context_backup_manager::MDL_context_backup
8282

8383
uint get_rand_seed() const override { return 1; }
8484

85-
bool is_connected() override { return true; }
85+
bool is_connected(bool = false) override { return true; }
8686
// End implementation of MDL_context_owner interface.
8787

8888
private:

sql/protocol_callback.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "mysql_com.h"
3232
#include "sql-common/my_decimal.h"
3333
#include "sql/current_thd.h"
34+
#include "sql/debug_sync.h"
3435
#include "sql/field.h"
3536
#include "sql/item.h"
3637
#include "sql/item_func.h"
@@ -277,8 +278,10 @@ int Protocol_callback::shutdown(bool server_shutdown) {
277278
false disconnected
278279
*/
279280
bool Protocol_callback::connection_alive() const {
280-
if (callbacks.connection_alive)
281+
if (callbacks.connection_alive) {
282+
DEBUG_SYNC(current_thd, "wait_before_checking_alive");
281283
return callbacks.connection_alive(callbacks_ctx);
284+
}
282285

283286
return true;
284287
}

sql/sql_class.cc

+24-1
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,7 @@ THD::THD(bool enable_plugins)
824824
protocol_text->init(this);
825825
protocol_binary->init(this);
826826
protocol_text->set_client_capabilities(0); // minimalistic client
827+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
827828

828829
/*
829830
Make sure thr_lock_info_init() is called for threads which do not get
@@ -3173,32 +3174,54 @@ bool THD::is_classic_protocol() const {
31733174
get_protocol()->type() == Protocol::PROTOCOL_TEXT;
31743175
}
31753176

3176-
bool THD::is_connected() {
3177+
bool THD::is_connected(bool use_cached_connection_alive) {
31773178
/*
31783179
All system threads (e.g., the slave IO thread) are connected but
31793180
not using vio. So this function always returns true for all
31803181
system threads.
31813182
*/
31823183
if (system_thread) return true;
31833184

3185+
/*
3186+
In some situations, e.g. when generating information_schema.processlist,
3187+
we can live with a cached value to avoid introducing mutex usage.
3188+
*/
3189+
if (use_cached_connection_alive) {
3190+
DEBUG_SYNC(current_thd, "wait_before_checking_alive");
3191+
return m_cached_is_connection_alive.load();
3192+
}
3193+
31843194
if (is_classic_protocol())
31853195
return get_protocol()->connection_alive() &&
31863196
vio_is_connected(get_protocol_classic()->get_vio());
31873197

31883198
return get_protocol()->connection_alive();
31893199
}
31903200

3201+
Protocol *THD::get_protocol() {
3202+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
3203+
return m_protocol;
3204+
}
3205+
3206+
Protocol_classic *THD::get_protocol_classic() {
3207+
assert(is_classic_protocol());
3208+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
3209+
return pointer_cast<Protocol_classic *>(m_protocol);
3210+
}
3211+
31913212
void THD::push_protocol(Protocol *protocol) {
31923213
assert(m_protocol != nullptr);
31933214
assert(protocol != nullptr);
31943215
m_protocol->push_protocol(protocol);
31953216
m_protocol = protocol;
3217+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
31963218
}
31973219

31983220
void THD::pop_protocol() {
31993221
assert(m_protocol != nullptr);
32003222
m_protocol = m_protocol->pop_protocol();
32013223
assert(m_protocol != nullptr);
3224+
m_cached_is_connection_alive.store(m_protocol->connection_alive());
32023225
}
32033226

32043227
void THD::set_time() {

sql/sql_class.h

+11-6
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,14 @@ class THD : public MDL_context_owner,
12841284
private:
12851285
mysql_mutex_t LOCK_query_plan;
12861286

1287+
/**
1288+
Keep a cached value saying whether the connection is alive. Update when
1289+
pushing, popping or getting the protocol. Used by
1290+
information_schema.processlist to avoid locking mutexes that might
1291+
affect performance.
1292+
*/
1293+
std::atomic<bool> m_cached_is_connection_alive;
1294+
12871295
public:
12881296
/// Locks the query plan of this THD
12891297
void lock_query_plan() { mysql_mutex_lock(&LOCK_query_plan); }
@@ -1335,7 +1343,7 @@ class THD : public MDL_context_owner,
13351343

13361344
const Protocol *get_protocol() const { return m_protocol; }
13371345

1338-
Protocol *get_protocol() { return m_protocol; }
1346+
Protocol *get_protocol();
13391347

13401348
SSL_handle get_ssl() const {
13411349
#ifndef NDEBUG
@@ -1361,10 +1369,7 @@ class THD : public MDL_context_owner,
13611369
return pointer_cast<const Protocol_classic *>(m_protocol);
13621370
}
13631371

1364-
Protocol_classic *get_protocol_classic() {
1365-
assert(is_classic_protocol());
1366-
return pointer_cast<Protocol_classic *>(m_protocol);
1367-
}
1372+
Protocol_classic *get_protocol_classic();
13681373

13691374
private:
13701375
Protocol *m_protocol; // Current protocol
@@ -3240,7 +3245,7 @@ class THD : public MDL_context_owner,
32403245
bool is_classic_protocol() const;
32413246

32423247
/** Return false if connection to client is broken. */
3243-
bool is_connected() final;
3248+
bool is_connected(bool use_cached_connection_alive = false) final;
32443249

32453250
/**
32463251
Mark the current error as fatal. Warning: this does not

sql/sql_show.cc

+20-17
Original file line numberDiff line numberDiff line change
@@ -2783,16 +2783,16 @@ class List_process_list : public Do_THD_Impl {
27832783
const LEX_CSTRING inspect_sctx_host = inspect_sctx->host();
27842784
const LEX_CSTRING inspect_sctx_host_or_ip = inspect_sctx->host_or_ip();
27852785

2786-
{
2787-
MUTEX_LOCK(grd, &inspect_thd->LOCK_thd_protocol);
2788-
2789-
if ((!(inspect_thd->get_protocol() &&
2790-
inspect_thd->get_protocol()->connection_alive()) &&
2791-
!inspect_thd->system_thread) ||
2792-
(m_user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
2793-
strcmp(inspect_sctx_user.str, m_user)))) {
2794-
return;
2795-
}
2786+
/*
2787+
Since we only access a cached value of connection_alive, which is
2788+
also an atomic, we do not need to lock LOCK_thd_protocol here. We
2789+
may get a value that is slightly outdated, but we will not get a crash
2790+
due to reading invalid memory at least.
2791+
*/
2792+
if (!inspect_thd->is_connected(true) ||
2793+
(m_user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
2794+
strcmp(inspect_sctx_user.str, m_user)))) {
2795+
return;
27962796
}
27972797

27982798
thd_info = new (m_client_thd->mem_root) thread_info;
@@ -3031,13 +3031,16 @@ class Fill_process_list : public Do_THD_Impl {
30313031
? NullS
30323032
: client_priv_user;
30333033

3034-
{
3035-
MUTEX_LOCK(grd, &inspect_thd->LOCK_thd_protocol);
3036-
if ((!inspect_thd->get_protocol()->connection_alive() &&
3037-
!inspect_thd->system_thread) ||
3038-
(user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
3039-
strcmp(inspect_sctx_user.str, user))))
3040-
return;
3034+
/*
3035+
Since we only access a cached value of connection_alive, which is
3036+
also an atomic, we do not need to lock LOCK_thd_protocol here. We
3037+
may get a value that is slightly outdated, but we will not get a crash
3038+
due to reading invalid memory at least.
3039+
*/
3040+
if (!inspect_thd->is_connected(true) ||
3041+
(user && (inspect_thd->system_thread || !inspect_sctx_user.str ||
3042+
strcmp(inspect_sctx_user.str, user)))) {
3043+
return;
30413044
}
30423045

30433046
DBUG_EXECUTE_IF(

sql/srv_session.cc

+1
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,7 @@ int Srv_session::execute_command(enum enum_server_command command,
11821182
}
11831183
int ret = dispatch_command(m_thd, data, command);
11841184

1185+
DEBUG_SYNC(m_thd, "wait_before_popping_protocol");
11851186
m_thd->pop_protocol();
11861187
assert(m_thd->get_protocol() == &m_protocol_error);
11871188
return ret;

unittest/gunit/test_mdl_context_owner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Test_MDL_context_owner : public MDL_context_owner {
3636
int) override {}
3737

3838
int is_killed() const final { return 0; }
39-
bool is_connected() override { return true; }
39+
bool is_connected(bool = false) override { return true; }
4040
bool might_have_commit_order_waiters() const override { return false; }
4141

4242
THD *get_thd() override {

0 commit comments

Comments
 (0)