Skip to content

Commit

Permalink
Fix: auto-resize hash table destroy deadlock
Browse files Browse the repository at this point in the history
Fix a deadlock for auto-resize hash tables when cds_lfht_destroy
is called with RCU read-side lock held.

Example stack track of a hang:

  Thread 2 (Thread 0x7f21ba876700 (LWP 26114)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21beba7aa0 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:81
  #2  futex_noasync (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:90
  #3  wait_gp () at urcu.c:265
  #4  wait_for_readers (input_readers=input_readers@entry=0x7f21ba8751b0, cur_snap_readers=cur_snap_readers@entry=0x0,
      qsreaders=qsreaders@entry=0x7f21ba8751c0) at urcu.c:357
  #5  0x00007f21beba8339 in urcu_memb_synchronize_rcu () at urcu.c:498
  #6  0x00007f21be99f93f in fini_table (last_order=<optimized out>, first_order=13, ht=0x5651cec75400) at rculfhash.c:1489
  #7  _do_cds_lfht_shrink (new_size=<optimized out>, old_size=<optimized out>, ht=0x5651cec75400) at rculfhash.c:2001
  #8  _do_cds_lfht_resize (ht=ht@entry=0x5651cec75400) at rculfhash.c:2023
  #9  0x00007f21be99fa26 in do_resize_cb (work=0x5651e20621a0) at rculfhash.c:2063
  #10 0x00007f21be99dbfd in workqueue_thread (arg=0x5651cec74a00) at workqueue.c:234
  #11 0x00007f21bd7c06db in start_thread (arg=0x7f21ba876700) at pthread_create.c:463
  #12 0x00007f21bd4e961f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

  Thread 1 (Thread 0x7f21bf285300 (LWP 26098)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21be99d8b7 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:81
  #2  futex_async (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:113
  #3  futex_wait (futex=futex@entry=0x5651d8b38584) at workqueue.c:135
  #4  0x00007f21be99e2c8 in urcu_workqueue_wait_completion (completion=completion@entry=0x5651d8b38580) at workqueue.c:423
  #5  0x00007f21be99e3f9 in urcu_workqueue_flush_queued_work (workqueue=0x5651cec74a00) at workqueue.c:452
  #6  0x00007f21be9a0c83 in cds_lfht_destroy (ht=0x5651d8b2fcf0, attr=attr@entry=0x0) at rculfhash.c:1906

This deadlock is easy to reproduce when rapidly adding a large number of
entries in the cds_lfht, removing them, and calling cds_lfht_destroy().

The deadlock will occur if the call to cds_lfht_destroy() takes place
while a resize of the hash table is ongoing.

Fix this by moving the teardown of the lfht worker thread to libcds
library destructor, so it does not have to wait on synchronize_rcu from
a resize callback from within a read-side critical section. As a
consequence, the atfork callbacks are left registered within each urcu
flavor for which a resizeable hash table is created until the end of the
executable lifetime.

The other part of the fix is to move the hash table destruction to the
worker thread for auto-resize hash tables. This prevents having to wait
for resize callbacks from RCU read-side critical section. This is
guaranteed by the fact that the worker thread serializes previously
queued resize callbacks before the destroy callback.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: If8b1c3c8063dc7b9846dc5c3fc452efd917eab4d
  • Loading branch information
compudj committed Feb 10, 2023
1 parent 5159c1c commit b047e7a
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 39 deletions.
8 changes: 4 additions & 4 deletions include/urcu/rculfhash.h
Expand Up @@ -247,16 +247,16 @@ struct cds_lfht *cds_lfht_new(unsigned long init_size,
*
* Return 0 on success, negative error value on error.
* Threads calling this API need to be registered RCU read-side threads.
*
* Prior to liburcu 0.10:
* - Threads calling this API need to be registered RCU read-side
* threads.
* - cds_lfht_destroy should *not* be called from a RCU read-side
* critical section. It should *not* be called from a call_rcu thread
* context neither.
*
* Starting from liburcu 0.10, rculfhash implements its own worker
* thread to handle resize operations, which removes RCU requirements on
* cds_lfht_destroy.
* thread to handle resize operations, which removes the above RCU
* read-side critical section requirement on cds_lfht_destroy.
*/
extern
int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr);
Expand Down
8 changes: 6 additions & 2 deletions src/rculfhash-internal.h
Expand Up @@ -29,6 +29,8 @@
#include <stdio.h>
#include <stdlib.h>

#include "workqueue.h"

#ifdef DEBUG
#define dbg_printf(fmt, args...) printf("[debug rculfhash] " fmt, ## args)
#else
Expand Down Expand Up @@ -82,11 +84,13 @@ struct cds_lfht {
* therefore cause grace-period deadlock if we hold off RCU G.P.
* completion.
*/
pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
pthread_attr_t *resize_attr; /* Resize threads attributes */
pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
pthread_attr_t *caller_resize_attr; /* resize threads attributes from lfht_new caller */
pthread_attr_t resize_attr;
unsigned int in_progress_destroy;
unsigned long resize_target;
int resize_initiated;
struct urcu_work destroy_work;

/*
* Variables needed for add and remove fast-paths.
Expand Down
105 changes: 83 additions & 22 deletions src/rculfhash.c
Expand Up @@ -363,7 +363,6 @@ struct partition_resize_work {
};

static struct urcu_workqueue *cds_lfht_workqueue;
static unsigned long cds_lfht_workqueue_user_count;

/*
* Mutex ensuring mutual exclusion between workqueue initialization and
Expand All @@ -380,8 +379,8 @@ static struct urcu_atfork cds_lfht_atfork;
*/
static int cds_lfht_workqueue_atfork_nesting;

static void __attribute__((destructor)) cds_lfht_exit(void);
static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor);
static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor);

#ifdef CONFIG_CDS_LFHT_ITER_DEBUG

Expand Down Expand Up @@ -1286,7 +1285,8 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
work[thread].len = partition_len;
work[thread].start = thread * partition_len;
work[thread].fct = fct;
ret = pthread_create(&(work[thread].thread_id), ht->resize_attr,
ret = pthread_create(&(work[thread].thread_id),
ht->caller_resize_attr ? &ht->resize_attr : NULL,
partition_resize_thread, &work[thread]);
if (ret == EAGAIN) {
/*
Expand Down Expand Up @@ -1643,7 +1643,9 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,

ht->flags = flags;
ht->flavor = flavor;
ht->resize_attr = attr;
ht->caller_resize_attr = attr;
if (attr)
ht->resize_attr = *attr;
alloc_split_items_count(ht);
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
Expand Down Expand Up @@ -1857,6 +1859,35 @@ int cds_lfht_is_node_deleted(const struct cds_lfht_node *node)
return is_removed(CMM_LOAD_SHARED(node->next));
}

static
bool cds_lfht_is_empty(struct cds_lfht *ht)
{
struct cds_lfht_node *node, *next;
bool empty = true;
bool was_online;

was_online = ht->flavor->read_ongoing();
if (!was_online) {
ht->flavor->thread_online();
ht->flavor->read_lock();
}
/* Check that the table is empty */
node = bucket_at(ht, 0);
do {
next = rcu_dereference(node->next);
if (!is_bucket(next)) {
empty = false;
break;
}
node = clear_flag(next);
} while (!is_end(node));
if (!was_online) {
ht->flavor->read_unlock();
ht->flavor->thread_offline();
}
return empty;
}

static
int cds_lfht_delete_bucket(struct cds_lfht *ht)
{
Expand Down Expand Up @@ -1891,6 +1922,24 @@ int cds_lfht_delete_bucket(struct cds_lfht *ht)
return 0;
}

static
void do_auto_resize_destroy_cb(struct urcu_work *work)
{
struct cds_lfht *ht = caa_container_of(work, struct cds_lfht, destroy_work);
int ret;

ht->flavor->register_thread();
ret = cds_lfht_delete_bucket(ht);
if (ret)
urcu_die(ret);
free_split_items_count(ht);
ret = pthread_mutex_destroy(&ht->resize_mutex);
if (ret)
urcu_die(ret);
ht->flavor->unregister_thread();
poison_free(ht);
}

/*
* Should only be called when no more concurrent readers nor writers can
* possibly access the table.
Expand All @@ -1900,22 +1949,38 @@ int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr)
int ret;

if (ht->flags & CDS_LFHT_AUTO_RESIZE) {
/*
* Perform error-checking for emptiness before queuing
* work, so we can return error to the caller. This runs
* concurrently with ongoing resize.
*/
if (!cds_lfht_is_empty(ht))
return -EPERM;
/* Cancel ongoing resize operations. */
_CMM_STORE_SHARED(ht->in_progress_destroy, 1);
/* Wait for in-flight resize operations to complete */
urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
if (attr) {
*attr = ht->caller_resize_attr;
ht->caller_resize_attr = NULL;
}
/*
* Queue destroy work after prior queued resize
* operations. Given there are no concurrent writers
* accessing the hash table at this point, no resize
* operations can be queued after this destroy work.
*/
urcu_workqueue_queue_work(cds_lfht_workqueue,
&ht->destroy_work, do_auto_resize_destroy_cb);
return 0;
}
ret = cds_lfht_delete_bucket(ht);
if (ret)
return ret;
free_split_items_count(ht);
if (attr)
*attr = ht->resize_attr;
*attr = ht->caller_resize_attr;
ret = pthread_mutex_destroy(&ht->resize_mutex);
if (ret)
ret = -EBUSY;
if (ht->flags & CDS_LFHT_AUTO_RESIZE)
cds_lfht_fini_worker(ht->flavor);
poison_free(ht);
return ret;
}
Expand Down Expand Up @@ -2179,23 +2244,19 @@ static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor)
flavor->register_rculfhash_atfork(&cds_lfht_atfork);

mutex_lock(&cds_lfht_fork_mutex);
if (cds_lfht_workqueue_user_count++)
goto end;
cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL);
end:
if (!cds_lfht_workqueue)
cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL);
mutex_unlock(&cds_lfht_fork_mutex);
}

static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor)
static void cds_lfht_exit(void)
{
mutex_lock(&cds_lfht_fork_mutex);
if (--cds_lfht_workqueue_user_count)
goto end;
urcu_workqueue_destroy(cds_lfht_workqueue);
cds_lfht_workqueue = NULL;
end:
if (cds_lfht_workqueue) {
urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
urcu_workqueue_destroy(cds_lfht_workqueue);
cds_lfht_workqueue = NULL;
}
mutex_unlock(&cds_lfht_fork_mutex);

flavor->unregister_rculfhash_atfork(&cds_lfht_atfork);
}
21 changes: 10 additions & 11 deletions src/urcu-call-rcu-impl.h
Expand Up @@ -102,7 +102,6 @@ static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
static struct call_rcu_data *default_call_rcu_data;

static struct urcu_atfork *registered_rculfhash_atfork;
static unsigned long registered_rculfhash_atfork_refcount;

/*
* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
Expand Down Expand Up @@ -1028,20 +1027,20 @@ void call_rcu_after_fork_child(void)

void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork)
{
if (CMM_LOAD_SHARED(registered_rculfhash_atfork))
return;
call_rcu_lock(&call_rcu_mutex);
if (registered_rculfhash_atfork_refcount++)
goto end;
registered_rculfhash_atfork = atfork;
end:
if (!registered_rculfhash_atfork)
registered_rculfhash_atfork = atfork;
call_rcu_unlock(&call_rcu_mutex);
}

/*
* This unregistration function is deprecated, meant only for internal
* use by rculfhash.
*/
__attribute__((noreturn))
void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused)))
{
call_rcu_lock(&call_rcu_mutex);
if (--registered_rculfhash_atfork_refcount)
goto end;
registered_rculfhash_atfork = NULL;
end:
call_rcu_unlock(&call_rcu_mutex);
urcu_die(EPERM);
}

0 comments on commit b047e7a

Please sign in to comment.