Skip to content

Commit

Permalink
ip_tunnel: add collect_md mode to IPIP tunnel
Browse files Browse the repository at this point in the history
Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to
operate in 'collect metadata' mode.
bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away.
ovs can use it as well in the future (once appropriate ovs-vport
abstractions and user apis are added).
Note that just like in other tunnels we cannot cache the dst,
since tunnel_info metadata can be different for every packet.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
4ast authored and davem330 committed Sep 17, 2016
1 parent eb94737 commit cfc7381
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 6 deletions.
2 changes: 2 additions & 0 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);

void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const u8 proto);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/if_tunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ enum {
IFLA_IPTUN_ENCAP_FLAGS,
IFLA_IPTUN_ENCAP_SPORT,
IFLA_IPTUN_ENCAP_DPORT,
IFLA_IPTUN_COLLECT_METADATA,
__IFLA_IPTUN_MAX,
};
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
Expand Down
76 changes: 76 additions & 0 deletions net/ipv4/ip_tunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
#include <net/dst_metadata.h>

#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
Expand Down Expand Up @@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}

void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
u32 headroom = sizeof(struct iphdr);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
const struct iphdr *inner_iph;
struct rtable *rt;
struct flowi4 fl4;
__be16 df = 0;
u8 tos, ttl;

tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto tx_error;
key = &tun_info->key;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
tos = key->tos;
if (tos == 1) {
if (skb->protocol == htons(ETH_P_IP))
tos = inner_iph->tos;
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
RT_TOS(tos), tunnel->parms.link);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++;
goto tx_error;
}
if (rt->dst.dev == dev) {
ip_rt_put(rt);
dev->stats.collisions++;
goto tx_error;
}
tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = key->ttl;
if (ttl == 0) {
if (skb->protocol == htons(ETH_P_IP))
ttl = inner_iph->ttl;
else if (skb->protocol == htons(ETH_P_IPV6))
ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
else
ttl = ip4_dst_hoplimit(&rt->dst);
}
if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);
else if (skb->protocol == htons(ETH_P_IP))
df = inner_iph->frag_off & htons(IP_DF);
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
if (headroom > dev->needed_headroom)
dev->needed_headroom = headroom;

if (skb_cow_head(skb, dev->needed_headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
return;
tx_error:
dev->stats.tx_errors++;
goto kfree;
tx_dropped:
dev->stats.tx_dropped++;
kfree:
kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);

void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
Expand Down
35 changes: 29 additions & 6 deletions net/ipv4/ipip.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/dst_metadata.h>

static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
Expand Down Expand Up @@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
struct metadata_dst *tun_dst = NULL;
struct ip_tunnel *tunnel;
const struct iphdr *iph;

Expand All @@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
if (tunnel->collect_md) {
tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
return 0;
}
return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}

return -1;
Expand Down Expand Up @@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,

skb_set_inner_ipproto(skb, ipproto);

ip_tunnel_xmit(skb, dev, tiph, ipproto);
if (tunnel->collect_md)
ip_md_tunnel_xmit(skb, dev, ipproto);
else
ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;

tx_error:
Expand Down Expand Up @@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}

static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
struct ip_tunnel_parm *parms, bool *collect_md)
{
memset(parms, 0, sizeof(*parms));

parms->iph.version = 4;
parms->iph.protocol = IPPROTO_IPIP;
parms->iph.ihl = 5;
*collect_md = false;

if (!data)
return;
Expand Down Expand Up @@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[],

if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);

if (data[IFLA_IPTUN_COLLECT_METADATA])
*collect_md = true;
}

/* This function returns true when ENCAP attributes are present in the nl msg */
Expand Down Expand Up @@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;

if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
int err = ip_tunnel_encap_setup(t, &ipencap);

if (err < 0)
return err;
}

ipip_netlink_parms(data, &p);
ipip_netlink_parms(data, &p, &t->collect_md);
return ip_tunnel_newlink(dev, tb, &p);
}

Expand All @@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
bool collect_md;

if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
Expand All @@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}

ipip_netlink_parms(data, &p);
ipip_netlink_parms(data, &p, &collect_md);
if (collect_md)
return -EINVAL;

if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
Expand Down Expand Up @@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
0;
}

Expand Down Expand Up @@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
tunnel->encap.flags))
goto nla_put_failure;

if (tunnel->collect_md)
if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
goto nla_put_failure;
return 0;

nla_put_failure:
Expand All @@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
};

static struct rtnl_link_ops ipip_link_ops __read_mostly = {
Expand Down

0 comments on commit cfc7381

Please sign in to comment.