Skip to content

Commit

Permalink
net: implement threaded-able napi poll loop support
Browse files Browse the repository at this point in the history
This patch allows running each napi poll loop inside its own
kernel thread.
The kthread is created during netif_napi_add() if dev->threaded
is set. And threaded mode is enabled in napi_enable(). We will
provide a way to set dev->threaded and enable threaded mode
without a device up/down in the following patch.

Once that threaded mode is enabled and the kthread is
started, napi_schedule() will wake-up such thread instead
of scheduling the softirq.

The threaded poll loop behaves quite likely the net_rx_action,
but it does not have to manipulate local irqs and uses
an explicit scheduling point based on netdev_budget.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Co-developed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Wei Wang <weiwan@google.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
tracywwnj authored and davem330 committed Feb 9, 2021
1 parent 898f801 commit 29863d4
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 14 deletions.
21 changes: 7 additions & 14 deletions include/linux/netdevice.h
Expand Up @@ -347,6 +347,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
struct task_struct *thread;
};

enum {
Expand All @@ -358,6 +359,7 @@ enum {
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};

enum {
Expand All @@ -369,6 +371,7 @@ enum {
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};

enum gro_result {
Expand Down Expand Up @@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n)
*/
void napi_disable(struct napi_struct *n);

/**
* napi_enable - enable NAPI scheduling
* @n: NAPI context
*
* Resume NAPI from being scheduled on this context.
* Must be paired with napi_disable.
*/
static inline void napi_enable(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
clear_bit(NAPI_STATE_NPSVC, &n->state);
}
void napi_enable(struct napi_struct *n);

/**
* napi_synchronize - wait until NAPI is not running
Expand Down Expand Up @@ -1827,6 +1817,8 @@ enum netdev_priv_flags {
*
* @wol_enabled: Wake-on-LAN is enabled
*
* @threaded: napi threaded mode is enabled
*
* @net_notifier_list: List of per-net netdev notifier block
* that follow this device when it is moved
* to another network namespace.
Expand Down Expand Up @@ -2145,6 +2137,7 @@ struct net_device {
struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
unsigned threaded:1;

struct list_head net_notifier_list;

Expand Down
112 changes: 112 additions & 0 deletions net/core/dev.c
Expand Up @@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
Expand Down Expand Up @@ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);

static int napi_threaded_poll(void *data);

static int napi_kthread_create(struct napi_struct *n)
{
int err = 0;

/* Create and wake up the kthread once to put it in
* TASK_INTERRUPTIBLE mode to avoid the blocked task
* warning and work with loadavg.
*/
n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
n->dev->name, n->napi_id);
if (IS_ERR(n->thread)) {
err = PTR_ERR(n->thread);
pr_err("kthread_run failed with err %d\n", err);
n->thread = NULL;
}

return err;
}

static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
Expand Down Expand Up @@ -4265,6 +4287,21 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
struct task_struct *thread;

if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
* napi_enable(). Use READ_ONCE() to guarantee
* a complete read on napi->thread. Only call
* wake_up_process() when it's not NULL.
*/
thread = READ_ONCE(napi->thread);
if (thread) {
wake_up_process(thread);
return;
}
}

list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
Expand Down Expand Up @@ -6728,6 +6765,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
/* Create kthread for this napi if dev->threaded is set.
* Clear dev->threaded if kthread creation failed so that
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);

Expand All @@ -6745,9 +6788,28 @@ void napi_disable(struct napi_struct *n)

clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);

/**
* napi_enable - enable NAPI scheduling
* @n: NAPI context
*
* Resume NAPI from being scheduled on this context.
* Must be paired with napi_disable.
*/
void napi_enable(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
clear_bit(NAPI_STATE_NPSVC, &n->state);
if (n->dev->threaded && n->thread)
set_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_enable);

static void flush_gro_hash(struct napi_struct *napi)
{
int i;
Expand All @@ -6773,6 +6835,11 @@ void __netif_napi_del(struct napi_struct *napi)

flush_gro_hash(napi);
napi->gro_bitmask = 0;

if (napi->thread) {
kthread_stop(napi->thread);
napi->thread = NULL;
}
}
EXPORT_SYMBOL(__netif_napi_del);

Expand Down Expand Up @@ -6867,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
return work;
}

static int napi_thread_wait(struct napi_struct *napi)
{
set_current_state(TASK_INTERRUPTIBLE);

while (!kthread_should_stop() && !napi_disable_pending(napi)) {
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
}

schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return -1;
}

static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
void *have;

while (!napi_thread_wait(napi)) {
for (;;) {
bool repoll = false;

local_bh_disable();

have = netpoll_poll_lock(napi);
__napi_poll(napi, &repoll);
netpoll_poll_unlock(have);

__kfree_skb_flush();
local_bh_enable();

if (!repoll)
break;

cond_resched();
}
}
return 0;
}

static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
Expand Down

0 comments on commit 29863d4

Please sign in to comment.