Bluetooth: Mesh: Rx buffer exhaustion causes deadlock #16864

xiaoliang314 · 2019-06-17T11:17:51Z

In the Bluetooth Mesh environment, the following conditions caused a deadlock:

Advertising data reporting causes the Rx buffer to be exhausted
The Mesh protocol stack invoke the ACL to send data. It blocks the RX thread because it waits for the ACL numCompleted event.
The HCI driver cannot report the numCompleted event because the Rx buffer is exhausted, and the Rx thread cannot release the Rx buffer because it is blocked.

rx_thread call stack:
#0 __swap (key=0, key@entry=16965509) at /home/ubuntu/zephyr/arch/arm/core/swap.c:68
#1 z_swap_irqlock (key=16965509) at /home/ubuntu/zephyr/kernel/include/kswap.h:128
#2 z_swap (key=..., lock=0x2000346c <k_sys_work_q>) at /home/ubuntu/zephyr/kernel/include/kswap.h:145
#3 z_pend_curr (lock=lock@entry=0x2000346c <k_sys_work_q>, key=..., key@entry=..., wait_q=wait_q@entry=0x0, timeout=timeout@entry=-1) at /home/ubuntu/zephyr/kernel/sched.c:448
#4 z_impl_k_sem_take (sem=sem@entry=0x0, timeout=timeout@entry=-1) at /home/ubuntu/zephyr/kernel/sem.c:160
#5 k_sem_take (timeout=-1, sem=0x0) at /home/ubuntu/zephyr/samples/bluetooth/mesh_test_platform/build/xxxx/zephyr/include/generated/syscalls/kernel.h:103
#6 bt_att_send (conn=0x0, conn@entry=0x20000ae4 , buf=buf@entry=0x200083a8 <net_buf_acl_tx_pool+48>, cb=cb@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/att.c:2222
#7 gatt_notify (conn=conn@entry=0x20000ae4 , handle=, data=0x0, data@entry=0x20004980 <rx_thread_stack+3120>, len=len@entry=28, cb=cb@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/gatt.c:1167
#8 bt_gatt_notify_cb (conn=conn@entry=0x20000ae4 , attr=0x20009320 <proxy_attrs+80>, attr@entry=0x2000930c <proxy_attrs+60>, data=0x20004980 <rx_thread_stack+3120>, len=, func=func@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/gatt.c:1379
#9 bt_gatt_notify (len=, data=, attr=0x2000930c <proxy_attrs+60>, conn=0x20000ae4 ) at /home/ubuntu/zephyr/include/bluetooth/gatt.h:759
#10 proxy_send (conn=conn@entry=0x20000ae4 , data=, len=) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:893
#11 proxy_segment_and_send (conn=conn@entry=0x20000ae4 , type=type@entry=0 '\000', msg=msg@entry=0x20004974 <rx_thread_stack+3108>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:918
#12 bt_mesh_proxy_send (conn=0x20000ae4 , type=type@entry=0 '\000', msg=msg@entry=0x20004974 <rx_thread_stack+3108>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:955
#13 bt_mesh_proxy_relay (buf=buf@entry=0x200086b4 <net_buf_adv_buf_pool+56>, dst=) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:878
#14 bt_mesh_net_send (tx=tx@entry=0x20004a64 <rx_thread_stack+3348>, buf=buf@entry=0x200086ac <net_buf_adv_buf_pool+48>, cb=0x1030210 <seg_sent_cb>, cb_data=cb_data@entry=0x200001d8 <seg_tx>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/net.c:885
#15 send_seg (net_tx=net_tx@entry=0x20004a64 <rx_thread_stack+3348>, sdu=sdu@entry=0x20004a90 <rx_thread_stack+3392>, cb=cb@entry=0x0, cb_data=cb_data@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/transport.c:411
#16 bt_mesh_trans_send (tx=tx@entry=0x20004a64 <rx_thread_stack+3348>, msg=msg@entry=0x20004a90 <rx_thread_stack+3392>, cb=cb@entry=0x0, cb_data=cb_data@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/transport.c:507
#17 model_send (model=model@entry=0x20008a6c <root_models>, tx=tx@entry=0x20004a64 <rx_thread_stack+3348>, implicit_bind=implicit_bind@entry=false, msg=msg@entry=0x20004a90 <rx_thread_stack+3392>, cb=cb@entry=0x0, cb_data=cb_data@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/access.c:638
#18 bt_mesh_model_send (model=model@entry=0x20008a6c <root_models>, ctx=ctx@entry=0x20004be8 <rx_thread_stack+3736>, msg=msg@entry=0x20004a90 <rx_thread_stack+3392>, cb=cb@entry=0x0, cb_data=cb_data@entry=0x0) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/access.c:654
#19 dev_comp_data_get (model=0x20008a6c <root_models>, ctx=0x20004be8 <rx_thread_stack+3736>, buf=) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/cfg_srv.c:198
#20 bt_mesh_model_recv (rx=rx@entry=0x20004be4 <rx_thread_stack+3732>, buf=buf@entry=0x20004b28 <rx_thread_stack+3544>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/access.c:579
#21 sdu_recv (rx=rx@entry=0x20004be4 <rx_thread_stack+3732>, seq=1, hdr=, aszmic=aszmic@entry=0 '\000', buf=buf@entry=0x20004bfc <rx_thread_stack+3756>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/transport.c:627
#22 trans_unseg (buf=buf@entry=0x20004bfc <rx_thread_stack+3756>, rx=rx@entry=0x20004be4 <rx_thread_stack+3732>, seq_auth=seq_auth@entry=0x20004bc0 <rx_thread_stack+3696>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/transport.c:898
#23 bt_mesh_trans_recv (buf=buf@entry=0x20004bfc <rx_thread_stack+3756>, rx=rx@entry=0x20004be4 <rx_thread_stack+3732>) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/transport.c:1400
#24 bt_mesh_net_recv (data=data@entry=0x20009218 <clients+24>, rssi=rssi@entry=0 '\000', net_if=net_if@entry=BT_MESH_NET_IF_PROXY) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/net.c:1324
#25 proxy_complete_pdu (client=client@entry=0x20009200 ) at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:405
#26 proxy_recv (conn=, attr=, buf=0x20008261 <net_buf_data_hci_rx_pool+109>, len=, offset=0, flags=2 '\002') at /home/ubuntu/zephyr/subsys/bluetooth/host/mesh/proxy.c:467
#27 write_cb (attr=0x200092f8 <proxy_attrs+40>, user_data=0x20004c98 <rx_thread_stack+3912>) at /home/ubuntu/zephyr/subsys/bluetooth/host/att.c:1225
#28 bt_gatt_foreach_attr (start_handle=start_handle@entry=25, end_handle=end_handle@entry=25, func=func@entry=0x10287f5 <write_cb>, user_data=user_data@entry=0x20004c98 <rx_thread_stack+3912>) at /home/ubuntu/zephyr/subsys/bluetooth/host/gatt.c:935
#29 att_write_rsp (conn=conn@entry=0x20000ae4 , req=req@entry=0 '\000', rsp=rsp@entry=0 '\000', handle=, offset=offset@entry=0, value=0x20008261 <net_buf_data_hci_rx_pool+109>, len=22 '\026') at /home/ubuntu/zephyr/subsys/bluetooth/host/att.c:1268
#30 att_write_cmd (att=, buf=0x200081ac <net_buf_hci_rx_pool+24>) at /home/ubuntu/zephyr/subsys/bluetooth/host/att.c:1500
#31 bt_att_recv (chan=0x20000c18 <bt_req_pool>, buf=0x200081ac <net_buf_hci_rx_pool+24>) at /home/ubuntu/zephyr/subsys/bluetooth/host/att.c:1947
#32 l2cap_chan_recv (chan=, buf=buf@entry=0x200081ac <net_buf_hci_rx_pool+24>) at /home/ubuntu/zephyr/subsys/bluetooth/host/l2cap.c:1573
#33 bt_l2cap_recv (conn=conn@entry=0x20000ae4 , buf=buf@entry=0x200081ac <net_buf_hci_rx_pool+24>) at /home/ubuntu/zephyr/subsys/bluetooth/host/l2cap.c:1606
#34 bt_conn_recv (conn=conn@entry=0x20000ae4 , buf=buf@entry=0x200081ac <net_buf_hci_rx_pool+24>, flags=flags@entry=2 '\002') at /home/ubuntu/zephyr/subsys/bluetooth/host/conn.c:1149
#35 hci_acl (buf=buf@entry=0x200081ac <net_buf_hci_rx_pool+24>) at /home/ubuntu/zephyr/subsys/bluetooth/host/hci_core.c:551
#36 hci_rx_thread () at /home/ubuntu/zephyr/subsys/bluetooth/host/hci_core.c:4683
#37 z_thread_entry (entry=0x1005b11 <hci_rx_thread>, p1=, p2=, p3=) at /home/ubuntu/zephyr/lib/os/thread_entry.c:29
#38 0xaaaaaaaa in ?? ())

jhedberg · 2019-06-17T11:43:02Z

Hi @xiaoliang314 . Could you describe your setup a bit more. What hardware is this on? Which version of Zephyr are you using? Which HCI driver are you using? What Mesh features are you using? It sounds like you're not just using native (advertising-based) Mesh but also the GATT compatibility support (GATT Proxy), correct?

In the Bluetooth Mesh environment, the following conditions caused a deadlock:

Advertising data reporting causes the Rx buffer to be exhausted

The Mesh protocol stack invoke the ACL to send data. It blocks the RX thread because it waits for the ACL numCompleted event.

The HCI driver cannot report the numCompleted event because the Rx buffer is exhausted, and the Rx thread cannot release the Rx buffer because it is blocked.

Just to be clear: the numCompleted HCI event is handled in the RX priority thread, not the normal RX thread. So the processing of the event should not be blocked as such. The ability to allocate another RX buffer sounds like a real issue however, since both advertising events and num_completed_packets are allocated from the same buffer pool.

Do you have any suggestions for a potential fix? I'm asking since it seems you've done a fairly thorough analysis of the situation. For other critical HCI events such as cmd_status and cmd_complete the stack tries to reuse the original HCI command buffer to ensure that an available buffer is found. I'm thinking we should perhaps have a separate pool for num_completed_packets. Another idea would be to group all the priority events (command status, command complete & num completed packets) to use a dedicated pool. I think this pool might even work with a single event, since I think bt_recv_prio() consumes buffers given to it synchronously, meaning you should be able to trust that the buffer goes back to the pool after calling bt_recv_prio().

@joerchan @Vudentz @carlescufi FYI

jhedberg · 2019-06-17T12:31:40Z

@xiaoliang314 wrote:

The Mesh protocol stack invoke the ACL to send data. It blocks the RX thread because it waits for the ACL numCompleted event.

This is something I don't understand. The host stack has a dedicated TX thread that waits on the num_complted_packets HCI events. So if you try to send ACL data from the RX thread it should only block on new TX buffer allocation but not directly the num_completed_pkts event.

jhedberg · 2019-06-17T12:31:54Z

@xiaoliang314 wrote:

The Mesh protocol stack invoke the ACL to send data. It blocks the RX thread because it waits for the ACL numCompleted event.

This is something I don't understand. The host stack has a dedicated TX thread that waits on the num_complted_packets HCI events. So if you try to send ACL data from the RX thread it should only block on new TX buffer allocation but not directly the num_completed_pkts event.

xiaoliang314 · 2019-06-17T12:33:45Z

@jhedberg
The version of zephyr is :
commit 4278bf5

My device is a 16M microprocessor，It integrates with our own Bluetooth controller. The HCI driver reference Bluetooth H4 driver. I have configured PB-GATT and GATT Proxy functions.

I have the same opinion as you, I think we should use a separate pool for bt_recv_prio.

xiaoliang314 · 2019-06-17T12:41:33Z

@jhedberg
Because the Rx thread is blocked, it cannot continue to process other HCI events, causing no buffer to be released to the Rx buffers. The HCI driver cannot get the buffer to report the numCompleted event.

jhedberg · 2019-06-17T12:59:39Z

@xiaoliang314 could you test if #16870 fixes this issue?

xiaoliang314 · 2019-06-17T13:06:28Z

@jhedberg Ok, I will test this fix tomorrow, I am already off work now.

xiaoliang314 · 2019-06-17T13:25:18Z

@jhedberg Sorry, I think using a separate pool may not solve this problem. We ignore one factor because the controller may also accumulate HCI events, and in the controller report queue, the numCompleted event may be after the advertising event. What do you think?

jhedberg · 2019-06-17T13:52:39Z

@xiaoliang314 advertising events and num_completed_pkts events are handled through different threads. Priority events, like num_completed_pkts are passed to the host using bt_recv_prio() whereas other events use bt_recv(). The priority events are consumed synchronously whereas other events go through a queue. So even if num_completed is after many advertising events it should still get processed before them, especially if the RX thread is blocked on something.

xiaoliang314 · 2019-06-17T14:08:21Z

@jhedberg The communication between the controller and the host is serial. If the normal event reception is blocked, the priority events after it will not be received.

jhedberg · 2019-06-17T14:17:41Z

@xiaoliang314 yes, that's serial. I was talking about the communication between the HCI driver and the host stack. But now that you mentioned it, there might be issues with specific HCI drivers like the h4.c one since I think they stop UART flow control when they're unable to allocate a buffer. I'll need to double check that. For other drivers, like the one for the Zephyr-internal controller this shouldn't be an issue since the "flow of HW events" isn't halted because of something like this.

jhedberg · 2019-06-17T14:25:06Z

@xiaoliang314 I double-checked drivers/bluetooth/hci/h4.c. When it comes to advertising events, those are considered discardable, so they will simply be discarded if allocation fails, meaning they will not block the inflow of more data from the controller:

zephyr/drivers/bluetooth/hci/h4.c

Lines 241 to 248 in 3e56c2c

    
           rx.buf = get_rx(K_NO_WAIT); 
        
           if (!rx.buf) { 
        
           	if (rx.discardable) { 
        
           		BT_WARN("Discarding event 0x%02x", rx.evt.evt); 
        
           		rx.discard = rx.remaining; 
        
           		reset_rx(); 
        
           		return; 
        
           	}

xiaoliang314 · 2019-06-17T14:25:23Z

@jhedberg We are looking forward to your ability to solve this problem for the H4 driver, we are considering the separation of the controller and host.

jhedberg · 2019-06-17T14:26:52Z

@xiaoliang314 based on my previous comment, at this point it'd be best if you just test my PR in practice, so we don't need to speculate too much if it helps or not :)

xiaoliang314 · 2019-06-17T14:27:11Z

@jhedberg Thank you, I will test this fix tomorrow.

xiaoliang314 · 2019-06-18T02:28:39Z

@jhedberg Your PR solved my problem.
But I see this output in the LOG message.
Failed to allocate, deferring to rx_thread

Could it block communication between the controller and the host?
It's in line 250 of h4.c.

zephyr/drivers/bluetooth/hci/h4.c

Lines 242 to 253 in 3e56c2c

    
           if (!rx.buf) { 
        
           	if (rx.discardable) { 
        
           		BT_WARN("Discarding event 0x%02x", rx.evt.evt); 
        
           		rx.discard = rx.remaining; 
        
           		reset_rx(); 
        
           		return; 
        
           	} 
        
           	BT_WARN("Failed to allocate, deferring to rx_thread"); 
        
           	uart_irq_rx_disable(h4_dev); 
        
           	return; 
        
           }

xiaoliang314 · 2019-06-18T02:32:48Z

@jhedberg If the HCI rx thread is blocking, is deadlock possible?

xiaoliang314 · 2019-06-18T02:50:56Z

@jhedberg I looked at these events and there were three types:

LE Remote Connection Parameter Request Event
LE Data Length Change Event
ACL Data

Is the first two possible to block communication?

jhedberg · 2019-06-18T06:01:30Z

Failed to allocate, deferring to rx_thread

This is a warning message that indicates that the RX buffer pool has run empty and the HCI driver is forced to wait for buffers to become available. It's mainly intended as an indication that you should try to increase the number of RX buffers, but doesn't automatically mean that you're risking a deadlock.

LE Remote Connection Parameter Request Event

LE Data Length Change Event

ACL Data

Is the first two possible to block communication?

@xiaoliang314 in theory yes, since they come from the same buffer pool and are processed by the same thread as any other non-priority packets. There's no easy structural solution I can think of for this, since the HCI driver is supposed to be a reliable component that doesn't loose data, with the exception of advertising report events which are considered ok to be dropped if out of buffers. One option would be to be able to iterate the existing RX queue and free up any discardable buffers (advertising events) from there, in order to free up memory for non-discardable events (like the first two you mentioned). This is however quite inefficient and the k_fifo API we're currently using doesn't even provide such a possibility. Now that I'm thinking about this, one solution would be to have two RX queues: one for discardable packets and another for everything else. We'd then have to use k_poll instead of k_fifo_get to block in the RX thread since it would have to wait on two different k_fifo objects. This is mainly an issue for the h4.c HCI driver design and e.g. the Zephyr controller doesn't suffer from this, so I'm not suggesting that we change anything on the host stack side for this.

If I understood right, your code may end up blocking the RX thread due to low resources on the TX path. I think one way to mitigate this would be to configure your L2CAP TX buffers (CONFIG_BT_L2CAP_TX_BUF_COUNT) to always be at least many as your controller ACL buffer count. Once you've done that you should probably try to in crease CONFIG_BT_RX_BUF_COUNT to be as large as possible (i.e. as long as you don't run out of RAM).

Btw, do I understand right that your concern is mainly a theoretical one now, i.e. with my PR you haven't been able to reproduce any deadlock in practice (yet)?

jhedberg · 2019-06-18T06:05:58Z

Now that I'm thinking about this, one solution would be to have two RX queues: one for discardable packets and another for everything else. We'd then have to use k_poll instead of k_fifo_get to block in the RX thread since it would have to wait on two different k_fifo objects.

Just to be clear, I'm suggesting this since it would then be easier to free up discardable packets since you'd just pick the first one from the discardable RX queue, rather than having to iterate a single RX queue with potentially many non-discardable packets (causing inefficiency of the design).

xiaoliang314 · 2019-06-18T06:42:09Z

@jhedberg Your PR is working well now. You understand it right, I think there's a legitimate sequence of events causing deadlocks on the host.

Now that I'm thinking about this, one solution would be to have two RX queues: one for discardable packets and another for everything else. We'd then have to use k_poll instead of k_fifo_get to block in the RX thread since it would have to wait on two different k_fifo objects.

I think this is feasible. But I think the best thing to do is not block Rx threads. An API similar to att_send should not be invoked synchronously in Rx threads. What do you think?

jhedberg · 2019-06-18T06:55:40Z

@xiaoliang314 the stack is intentionally designed so that you should be able to block in the RX thread, since this simplifies the code (asynchronous, event driven code is often quite hard to follow). This is the reason we have bt_recv_prio() and why many blocking operations, like bt_hci_cmd_send_sync(), are possible from the RX thread. I'm currently experimenting with making the h4.c driver discard buffers from the RX queue. I've abandoned the idea with k_poll since we would potentially loose ordering of packets with it. The single-queue based discarding is actually not that bad thanks to the k_fifo_put_slist() API, i.e. we will be forced to iterate through the entire RX queue, however inserting the non-discarded buffers back to the RX queue is an O(1) operation.

jhedberg · 2019-06-18T07:00:28Z

@xiaoliang314 regarding the HCI events you mentioned, the set of this kind of events should have a strict upper bound, I think (depends on how many connections you support, but it should still have a bound). This is not the case for advertising reports. So if we know that the non-discardable events have an upper bound (i.e. a definite maximum of how many such events can be pending at any given moment) we should be able to know that setting the RX buffer count high enough will guarantee that we cannot deadlock, as long as we can always go and free up buffers used by events which don't have an upper bound (i.e. advertising reports). Does this make sense?

xiaoliang314 · 2019-06-18T07:05:29Z

@jhedberg Yes, it can solve this issue.

xiaoliang314 · 2019-06-18T07:11:49Z

This should be an effective way. Non-discardable HCI events do not occur frequently, so traversing the entire queue will not have a significant impact on overall performance.

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes zephyrproject-rtos#16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

jhedberg · 2019-06-18T07:34:26Z

@xiaoliang314 I've pushed a second patch to my PR to add this feature. Let me know what you think. Since iterating the RX queue has performance implications I added an initial wait before commencing the iteration. I used an arbitrary 100ms for this but made it configurable. Does this make sense? Note that the patch is only compile-tested so I hope you could do some stress testing for it.

jhedberg · 2019-06-18T07:56:49Z

@xiaoliang314 I just realised my patch needed a tweak: when controller to host ACL flow control is enabled then ACL data and events use different pools, and we should only try the discard traversal if the incoming packet is an event. I've updated the second patch accordingly. Btw, you should consider enabling CONFIG_BT_HCI_ACL_FLOW_CONTROL if your controller supports controller-to-host ACL flow control, since that way you guarantee that the controller will not cause you to run out of ACL buffers for incoming data.

xiaoliang314 · 2019-06-18T08:16:29Z

I've updated the second patch accordingly.

@jhedberg I don't seem to see the third commit.

xiaoliang314 · 2019-06-18T08:18:45Z

@jhedberg I see your modification, and I'll test it.

xiaoliang314 · 2019-06-19T01:30:41Z

@jhedberg Hi, Your fix is working well. But I can't prove the validity of your latest patch. Because it's hard for me to create conditions that can verify this problem.

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes zephyrproject-rtos#16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

jhedberg · 2019-06-19T06:39:42Z

@jhedberg Hi, Your fix is working well. But I can't prove the validity of your latest patch. Because it's hard for me to create conditions that can verify this problem.

@xiaoliang314 thanks! Could you also post a comment in the pull request saying that you've tested it and it works well? That way the reviewers can see it and we make it easier to get the PR eventually merged.

jhedberg · 2019-06-19T06:42:29Z

@xiaoliang314 regarding reproducing, the only trick I can think of is to try to reduce the number of available buffers.

xiaoliang314 · 2019-06-19T07:20:05Z

@jhedberg I've reduced the number of buffers to a very small number. The reason why it is difficult to reproduce is LE Remote Connection Parameter Request Event and LE Data Length Change Event are difficult to occur between ACL sending and numCompleted reply.

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes zephyrproject-rtos#16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes #16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes zephyrproject-rtos#16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

This event is a priority one, so it's not safe to have it use the RX buffer pool which may be depleted due to non-priority events (e.g. advertising events). Since the event is consumed synchronously it's safe to have a single-buffer pool for it. Also introduce a new bt_buf_get_evt() API for HCI drivers to simplify the driver-side code, this effectively also deprecates bt_buf_get_cmd_complete() which now has no in-tree HCI driver users anymore. Fixes #16864 Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>

xiaoliang314 added the bug The issue is a bug, or the PR is fixing a bug label Jun 17, 2019

jhedberg self-assigned this Jun 17, 2019

jhedberg added area: Bluetooth priority: medium Medium impact/importance bug labels Jun 17, 2019

jhedberg mentioned this issue Jun 17, 2019

Bluetooth: Fix various deadlock issues when running low on buffers #16870

Closed

jhedberg mentioned this issue Jun 28, 2019

Bluetooth: Fix deadlock issues when running low on buffers #17137

Merged

jhedberg closed this as completed in #17137 Jul 1, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bluetooth: Mesh: Rx buffer exhaustion causes deadlock #16864

Bluetooth: Mesh: Rx buffer exhaustion causes deadlock #16864

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019 •

edited

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019 •

edited

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019 •

edited

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019 •

edited

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 19, 2019

jhedberg commented Jun 19, 2019

jhedberg commented Jun 19, 2019

xiaoliang314 commented Jun 19, 2019

Bluetooth: Mesh: Rx buffer exhaustion causes deadlock #16864

Bluetooth: Mesh: Rx buffer exhaustion causes deadlock #16864

Comments

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019 • edited

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

jhedberg commented Jun 17, 2019

xiaoliang314 commented Jun 17, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019 • edited

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019 • edited

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

jhedberg commented Jun 18, 2019

jhedberg commented Jun 18, 2019 • edited

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 18, 2019

xiaoliang314 commented Jun 19, 2019

jhedberg commented Jun 19, 2019

jhedberg commented Jun 19, 2019

xiaoliang314 commented Jun 19, 2019

xiaoliang314 commented Jun 17, 2019 •

edited

xiaoliang314 commented Jun 18, 2019 •

edited

xiaoliang314 commented Jun 18, 2019 •

edited

jhedberg commented Jun 18, 2019 •

edited