Skip to content

Commit

Permalink
netfilter: nf_tables: adapt set backend to use GC transaction API
Browse files Browse the repository at this point in the history
commit f6c383b upstream.

Use the GC transaction API to replace the old and buggy gc API and the
busy mark approach.

No set elements are removed from async garbage collection anymore,
instead the _DEAD bit is set on so the set element is not visible from
lookup path anymore. Async GC enqueues transaction work that might be
aborted and retried later.

rbtree and pipapo set backends does not set on the _DEAD bit from the
sync GC path since this runs in control plane path where mutex is held.
In this case, set elements are deactivated, removed and then released
via RCU callback, sync GC never fails.

Fixes: 3c4287f ("nf_tables: Add set type for arbitrary concatenation of ranges")
Fixes: 8d8540c ("netfilter: nft_set_rbtree: add timeout support")
Fixes: 9d09829 ("netfilter: nft_hash: add support for timeouts")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
ummakynes authored and gregkh committed Aug 16, 2023
1 parent 0624f19 commit e4d71d6
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 103 deletions.
7 changes: 2 additions & 5 deletions net/netfilter/nf_tables_api.c
Expand Up @@ -6357,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,

if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
Expand All @@ -6372,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,

list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_is_active(net, ext) ||
nft_set_elem_mark_busy(ext))
if (!nft_is_active(net, ext))
continue;

kfree(elem->priv);
Expand Down Expand Up @@ -7083,8 +7081,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,

list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask) ||
nft_set_elem_mark_busy(ext))
if (!nft_set_elem_active(ext, genmask))
continue;

elem.priv = catchall->elem;
Expand Down
77 changes: 48 additions & 29 deletions net/netfilter/nft_set_hash.c
Expand Up @@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,

if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
Expand Down Expand Up @@ -188,20 +190,16 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;

nft_set_elem_change_active(net, set, &he->ext);
nft_set_elem_clear_busy(&he->ext);
}

static bool nft_rhash_flush(const struct net *net,
const struct nft_set *set, void *priv)
{
struct nft_rhash_elem *he = priv;

if (!nft_set_elem_mark_busy(&he->ext) ||
!nft_is_active(net, &he->ext)) {
nft_set_elem_change_active(net, set, &he->ext);
return true;
}
return false;
nft_set_elem_change_active(net, set, &he->ext);

return true;
}

static void *nft_rhash_deactivate(const struct net *net,
Expand All @@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,

rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL &&
!nft_rhash_flush(net, set, he))
he = NULL;
if (he)
nft_set_elem_change_active(net, set, &he->ext);

rcu_read_unlock();

Expand Down Expand Up @@ -312,52 +309,75 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,

static void nft_rhash_gc(struct work_struct *work)
{
struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
struct nft_trans_gc *gc;
struct net *net;
u32 gc_seq;

priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
nft_net = nft_pernet(net);
gc_seq = READ_ONCE(nft_net->gc_seq);

gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
if (!gc)
goto done;

rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);

while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
if (PTR_ERR(he) != -EAGAIN)
break;
if (PTR_ERR(he) != -EAGAIN) {
nft_trans_gc_destroy(gc);
gc = NULL;
goto try_later;
}
continue;
}

/* Ruleset has been updated, try later. */
if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
nft_trans_gc_destroy(gc);
gc = NULL;
goto try_later;
}

if (nft_set_elem_is_dead(&he->ext))
goto dead_elem;

if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;

if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
if (nft_set_elem_mark_busy(&he->ext))
continue;
nft_set_elem_dead(&he->ext);
dead_elem:
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
if (!gc)
goto try_later;

gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (gcb == NULL)
break;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, he);
nft_trans_gc_elem_add(gc, he);
}

gc = nft_trans_gc_catchall(gc, gc_seq);

try_later:
/* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);

he = nft_set_catchall_gc(set);
if (he) {
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (gcb)
nft_set_gc_batch_add(gcb, he);
}
nft_set_gc_batch_complete(gcb);
if (gc)
nft_trans_gc_queue_async_done(gc);

done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
Expand Down Expand Up @@ -420,7 +440,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};

cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
Expand Down
48 changes: 36 additions & 12 deletions net/netfilter/nft_set_pipapo.c
Expand Up @@ -1537,16 +1537,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}

static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
struct nft_pipapo_elem *e)

{
struct nft_set_elem elem = {
.priv = e,
};

nft_setelem_data_deactivate(net, set, &elem);
}

/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;

gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
return;

while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
Expand All @@ -1570,13 +1588,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
if (nft_set_elem_expired(&e->ext) &&
!nft_set_elem_mark_busy(&e->ext)) {

/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
pipapo_drop(m, rulemap);

rcu_barrier();
nft_set_elem_destroy(set, e, true);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (!gc)
break;

nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
nft_trans_gc_elem_add(gc, e);

/* And check again current first rule, which is now the
* first we haven't checked.
Expand All @@ -1586,11 +1611,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}

e = nft_set_catchall_gc(set);
if (e)
nft_set_elem_destroy(set, e, true);

priv->last_gc = jiffies;
gc = nft_trans_gc_catchall(gc, 0);
if (gc) {
nft_trans_gc_queue_sync_done(gc);
priv->last_gc = jiffies;
}
}

/**
Expand Down Expand Up @@ -1715,7 +1740,6 @@ static void nft_pipapo_activate(const struct net *net,
return;

nft_set_elem_change_active(net, set, &e->ext);
nft_set_elem_clear_busy(&e->ext);
}

/**
Expand Down

0 comments on commit e4d71d6

Please sign in to comment.