Skip to content

Commit

Permalink
ipv6: elide flowlabel check if no exclusive leases exist
Browse files Browse the repository at this point in the history
Processes can request ipv6 flowlabels with cmsg IPV6_FLOWINFO.
If not set, by default an autogenerated flowlabel is selected.

Explicit flowlabels require a control operation per label plus a
datapath check on every connection (every datagram if unconnected).
This is particularly expensive on unconnected sockets multiplexing
many flows, such as QUIC.

In the common case, where no lease is exclusive, the check can be
safely elided, as both lease request and check trivially succeed.
Indeed, autoflowlabel does the same even with exclusive leases.

Elide the check if no process has requested an exclusive lease.

fl6_sock_lookup previously returns either a reference to a lease or
NULL to denote failure. Modify to return a real error and update
all callers. On return NULL, they can use the label and will elide
the atomic_dec in fl6_sock_release.

This is an optimization. Robust applications still have to revert to
requesting leases if the fast path fails due to an exclusive lease.

Changes RFC->v1:
  - use static_key_false_deferred to rate limit jump label operations
    - call static_key_deferred_flush to stop timers on exit
  - move decrement out of RCU context
  - defer optimization also if opt data is associated with a lease
  - updated all fp6_sock_lookup callers, not just udp

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
wdebruij authored and davem330 committed Jul 9, 2019
1 parent ee4f56f commit 59c820b
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 14 deletions.
14 changes: 13 additions & 1 deletion include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/hardirq.h>
#include <linux/jhash.h>
#include <linux/refcount.h>
#include <linux/jump_label_ratelimit.h>
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/flow.h>
Expand Down Expand Up @@ -389,7 +390,18 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
kfree_rcu(opt, rcu);
}

struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);

extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
__be32 label)
{
if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);

return NULL;
}

struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
struct ipv6_txoptions *fopt);
Expand Down
2 changes: 1 addition & 1 deletion net/dccp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
if (IS_ERR(flowlabel))
return -EINVAL;
fl6_sock_release(flowlabel);
}
Expand Down
27 changes: 23 additions & 4 deletions net/ipv6/ip6_flowlabel.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/pid_namespace.h>
#include <linux/jump_label_ratelimit.h>

#include <net/net_namespace.h>
#include <net/sock.h>
Expand Down Expand Up @@ -53,6 +54,9 @@ static DEFINE_SPINLOCK(ip6_fl_lock);

static DEFINE_SPINLOCK(ip6_sk_fl_lock);

DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
EXPORT_SYMBOL(ipv6_flowlabel_exclusive);

#define for_each_fl_rcu(hash, fl) \
for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
fl != NULL; \
Expand Down Expand Up @@ -90,6 +94,13 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}

static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
{
return fl->share == IPV6_FL_S_EXCL ||
fl->share == IPV6_FL_S_PROCESS ||
fl->share == IPV6_FL_S_USER;
}

static void fl_free_rcu(struct rcu_head *head)
{
struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
Expand All @@ -103,8 +114,13 @@ static void fl_free_rcu(struct rcu_head *head)

static void fl_free(struct ip6_flowlabel *fl)
{
if (fl)
call_rcu(&fl->rcu, fl_free_rcu);
if (!fl)
return;

if (fl_shared_exclusive(fl) || fl->opt)
static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);

call_rcu(&fl->rcu, fl_free_rcu);
}

static void fl_release(struct ip6_flowlabel *fl)
Expand Down Expand Up @@ -240,7 +256,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,

/* Socket flowlabel lists */

struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
{
struct ipv6_fl_socklist *sfl;
struct ipv6_pinfo *np = inet6_sk(sk);
Expand All @@ -260,7 +276,7 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
rcu_read_unlock_bh();
return NULL;
}
EXPORT_SYMBOL_GPL(fl6_sock_lookup);
EXPORT_SYMBOL_GPL(__fl6_sock_lookup);

void fl6_free_socklist(struct sock *sk)
{
Expand Down Expand Up @@ -419,6 +435,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
}
fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1);
if (fl_shared_exclusive(fl) || fl->opt)
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
switch (fl->share) {
case IPV6_FL_S_EXCL:
case IPV6_FL_S_ANY:
Expand Down Expand Up @@ -854,6 +872,7 @@ int ip6_flowlabel_init(void)

void ip6_flowlabel_cleanup(void)
{
static_key_deferred_flush(&ipv6_flowlabel_exclusive);
del_timer(&ip6_fl_gc_timer);
unregister_pernet_subsys(&ip6_flowlabel_net_ops);
}
4 changes: 2 additions & 2 deletions net/ipv6/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
return -EINVAL;
}
}
Expand Down Expand Up @@ -876,7 +876,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
return -EINVAL;
}
if (!(opt->opt_nflen|opt->opt_flen))
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
return -EINVAL;
fl6_sock_release(flowlabel);
}
Expand Down
4 changes: 2 additions & 2 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
return -EINVAL;
}
}
Expand Down Expand Up @@ -1371,7 +1371,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
return -EINVAL;
}
if (!(opt->opt_nflen|opt->opt_flen))
Expand Down
4 changes: 2 additions & 2 deletions net/l2tp/l2tp_ip6.c
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
if (IS_ERR(flowlabel))
return -EINVAL;
}
}
Expand Down Expand Up @@ -577,7 +577,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if ((fl6.flowlabel & IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
if (IS_ERR(flowlabel))
return -EINVAL;
}
if (!(opt->opt_nflen|opt->opt_flen))
Expand Down
2 changes: 1 addition & 1 deletion net/sctp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct ip6_flowlabel *flowlabel;

flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (!flowlabel)
if (IS_ERR(flowlabel))
goto out;
fl6_sock_release(flowlabel);
}
Expand Down

0 comments on commit 59c820b

Please sign in to comment.