From 3511494ce2f3d3b77544c79b87511a4ddb61dc89 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 15 Jan 2015 03:53:55 +0100 Subject: vxlan: Group Policy extension Implements supports for the Group Policy VXLAN extension [0] to provide a lightweight and simple security label mechanism across network peers based on VXLAN. The security context and associated metadata is mapped to/from skb->mark. This allows further mapping to a SELinux context using SECMARK, to implement ACLs directly with nftables, iptables, OVS, tc, etc. The group membership is defined by the lower 16 bits of skb->mark, the upper 16 bits are used for flags. SELinux allows to manage label to secure local resources. However, distributed applications require ACLs to implemented across hosts. This is typically achieved by matching on L2-L4 fields to identify the original sending host and process on the receiver. On top of that, netlabel and specifically CIPSO [1] allow to map security contexts to universal labels. However, netlabel and CIPSO are relatively complex. This patch provides a lightweight alternative for overlay network environments with a trusted underlay. No additional control protocol is required. Host 1: Host 2: Group A Group B Group B Group A +-----+ +-------------+ +-------+ +-----+ | lxc | | SELinux CTX | | httpd | | VM | +--+--+ +--+----------+ +---+---+ +--+--+ \---+---/ \----+---/ | | +---+---+ +---+---+ | vxlan | | vxlan | +---+---+ +---+---+ +------------------------------+ Backwards compatibility: A VXLAN-GBP socket can receive standard VXLAN frames and will assign the default group 0x0000 to such frames. A Linux VXLAN socket will drop VXLAN-GBP frames. The extension is therefore disabled by default and needs to be specifically enabled: ip link add [...] type vxlan [...] gbp In a mixed environment with VXLAN and VXLAN-GBP sockets, the GBP socket must run on a separate port number. Examples: iptables: host1# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200 host2# iptables -I INPUT -m mark --mark 0x200 -j DROP OVS: # ovs-ofctl add-flow br0 'in_port=1,actions=load:0x200->NXM_NX_TUN_GBP_ID[],NORMAL' # ovs-ofctl add-flow br0 'in_port=2,tun_gbp_id=0x200,actions=drop' [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy [1] http://lwn.net/Articles/204905/ Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/vxlan.h | 79 +++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/if_link.h | 1 + 2 files changed, 75 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0a7443b49133..f4a3583171bd 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -11,15 +11,76 @@ #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<" +#endif + __be16 policy_id; + __be32 vx_vni; +}; + +#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) + +/* skb->mark mapping + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) +#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) +#define VXLAN_GBP_ID_MASK (0xFFFF) + +/* VXLAN protocol header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |G|R|R|R|I|R|R|C| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * G = 1 Group Policy (VXLAN-GBP) + * I = 1 VXLAN Network Identifier (VNI) present + * C = 1 Remote checksum offload (RCO) + */ struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; }; /* VXLAN header flags. */ -#define VXLAN_HF_VNI 0x08000000 -#define VXLAN_HF_RCO 0x00200000 +#define VXLAN_HF_RCO BIT(24) +#define VXLAN_HF_VNI BIT(27) +#define VXLAN_HF_GBP BIT(31) /* Remote checksum offload header option */ #define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ @@ -32,8 +93,14 @@ struct vxlanhdr { #define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +struct vxlan_metadata { + __be32 vni; + u32 gbp; +}; + struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); +typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, + struct vxlan_metadata *md); /* per UDP socket information */ struct vxlan_sock { @@ -60,6 +127,7 @@ struct vxlan_sock { #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_RX 0x400 +#define VXLAN_F_GBP 0x800 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, @@ -70,7 +138,8 @@ void vxlan_sock_release(struct vxlan_sock *vs); int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); + __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, + bool xnet); static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b2723f65846f..2a8380edbb7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -372,6 +372,7 @@ enum { IFLA_VXLAN_UDP_ZERO_CSUM6_RX, IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From ac5132d1a03fe1ebbefb2382b36e829dff056283 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 15 Jan 2015 03:53:56 +0100 Subject: vxlan: Only bind to sockets with compatible flags enabled A VXLAN net_device looking for an appropriate socket may only consider a socket which has a matching set of flags/extensions enabled. If incompatible flags are enabled, return a conflict to have the caller create a distinct socket with distinct port. The OVS VXLAN port is kept unaware of extensions at this point. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 29 ++++++++++++++++++----------- include/net/vxlan.h | 3 +++ 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6dbf8e041922..6b6b45622a0a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } -/* Find VXLAN socket based on network namespace, address family and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, - sa_family_t family, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port + * and enabled unshareable flags. + */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, + __be16 port, u32 flags) { struct vxlan_sock *vs; + u32 match_flags = flags & VXLAN_F_UNSHAREABLE; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { if (inet_sk(vs->sock->sk)->inet_sport == port && - inet_sk(vs->sock->sk)->sk.sk_family == family) + inet_sk(vs->sock->sk)->sk.sk_family == family && + (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags) return vs; } return NULL; @@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) /* Look up VNI in a per net namespace table */ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, - sa_family_t family, __be16 port) + sa_family_t family, __be16 port, + u32 flags) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, family, port); + vs = vxlan_find_sock(net, family, port, flags); if (!vs) return NULL; @@ -1957,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ip_rt_put(rt); dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port); + dst->sa.sa_family, dst_port, + vxlan->flags); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -2016,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_release(ndst); dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port); + dst->sa.sa_family, dst_port, + vxlan->flags); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -2186,7 +2193,7 @@ static int vxlan_init(struct net_device *dev) spin_lock(&vn->sock_lock); vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port); + vxlan->dst_port, vxlan->flags); if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { /* If we have a socket with same port already, reuse it */ vxlan_vs_add_dev(vs, vxlan); @@ -2593,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, return vs; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags); if (vs && ((vs->rcv != rcv) || !atomic_add_unless(&vs->refcnt, 1, 0))) vs = ERR_PTR(-EBUSY); @@ -2761,7 +2768,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, vxlan->flags |= VXLAN_F_GBP; if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port)) { + vxlan->dst_port, vxlan->flags)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index f4a3583171bd..7be8c342fc95 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -129,6 +129,9 @@ struct vxlan_sock { #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 +/* These flags must match in order for a socket to be shareable */ +#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP + struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share, u32 flags); -- cgit v1.2.3 From 1dd144cf5b4b47e12438c2c6883925ce1a9b499f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 15 Jan 2015 03:53:59 +0100 Subject: openvswitch: Support VXLAN Group Policy extension Introduces support for the group policy extension to the VXLAN virtual port. The extension is disabled by default and only enabled if the user has provided the respective configuration. ovs-vsctl add-port br0 vxlan0 -- \ set Interface vxlan0 type=vxlan options:exts=gbp The configuration interface to enable the extension is based on a new attribute OVS_VXLAN_EXT_GBP nested inside OVS_TUNNEL_ATTR_EXTENSION which can carry additional extensions as needed in the future. The group policy metadata is stored as binary blob (struct ovs_vxlan_opts) internally just like Geneve options but transported as nested Netlink attributes to user space. Renames the existing TUNNEL_OPTIONS_PRESENT to TUNNEL_GENEVE_OPT with the binary value kept intact, a new flag TUNNEL_VXLAN_OPT is introduced. The attributes OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and existing OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS are implemented mutually exclusive. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 5 +- include/uapi/linux/openvswitch.h | 11 ++++ net/openvswitch/flow_netlink.c | 114 ++++++++++++++++++++++++++++++++++----- net/openvswitch/vport-geneve.c | 15 ++++-- net/openvswitch/vport-vxlan.c | 82 +++++++++++++++++++++++++++- net/openvswitch/vport-vxlan.h | 11 ++++ 6 files changed, 218 insertions(+), 20 deletions(-) create mode 100644 net/openvswitch/vport-vxlan.h (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 25a59eb388a6..ce4db3cc5647 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -97,7 +97,10 @@ struct ip_tunnel { #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) #define TUNNEL_OAM __cpu_to_be16(0x0200) #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) -#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800) +#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) +#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) + +#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) struct tnl_ptk_info { __be16 flags; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f714e8633352..cd8d933963c2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -252,11 +252,21 @@ enum ovs_vport_attr { #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) +enum { + OVS_VXLAN_EXT_UNSPEC, + OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ + __OVS_VXLAN_EXT_MAX, +}; + +#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1) + + /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. */ enum { OVS_TUNNEL_ATTR_UNSPEC, OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ + OVS_TUNNEL_ATTR_EXTENSION, __OVS_TUNNEL_ATTR_MAX }; @@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ + OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 518941c5bdf1..d210d1be3470 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,7 @@ #include #include "flow_netlink.h" +#include "vport-vxlan.h" struct ovs_len_tbl { int len; @@ -268,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -308,6 +312,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -460,6 +465,41 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -468,6 +508,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -527,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -560,6 +620,23 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } @@ -596,10 +673,15 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } @@ -680,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -1578,17 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - err = validate_geneve_opts(&key); - if (err < 0) - return err; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 88a010c98c05..7ca3d454ff3b 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, + tun_key->tun_flags, vni, opts_len, opts, false); if (err < 0) ip_rt_put(rt); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 9919d71c52c3..8a2d54cba9ba 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -63,16 +65,26 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY; + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -84,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -96,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -128,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -141,6 +203,21 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); @@ -173,6 +250,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) src_port = udp_flow_src_port(net, skb, 0, 0, true); md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, fl.saddr, tun_key->ipv4_dst, diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include +#include + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif -- cgit v1.2.3