From 4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Tue, 26 Mar 2019 06:13:46 +0530 Subject: net: openvswitch: Add a new action check_pkt_len This patch adds a new action - 'check_pkt_len' which checks the packet length and executes a set of actions if the packet length is greater than the specified length or executes another set of actions if the packet length is lesser or equal to. This action takes below nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. The main use case for adding this action is to solve the packet drops because of MTU mismatch in OVN virtual networking solution. When a VM (which belongs to a logical switch of OVN) sends a packet destined to go via the gateway router and if the nic which provides external connectivity, has a lesser MTU, OVS drops the packet if the packet length is greater than this MTU. With the help of this action, OVN will check the packet length and if it is greater than the MTU size, it will generate an ICMP packet (type 3, code 4) and includes the next hop mtu in it so that the sender can fragment the packets. Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff Signed-off-by: Numan Siddique CC: Gregory Rose CC: Pravin B Shelar Acked-by: Pravin B Shelar Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index dbe0cbe4f1b7..dfabacee6903 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -798,6 +798,44 @@ struct ovs_action_push_eth { struct ovs_key_ethernet addresses; }; +/* + * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. + * + * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is greater than the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is lesser or equal to the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + */ +enum ovs_check_pkt_len_attr { + OVS_CHECK_PKT_LEN_ATTR_UNSPEC, + OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, + __OVS_CHECK_PKT_LEN_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */ +#endif +}; + +#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1) + +#ifdef __KERNEL__ +struct check_pkt_len_arg { + u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */ + bool exec_for_greater; /* When true, actions in IF_GREATER will + * not change flow keys. False otherwise. + */ + bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL + * will not change flow keys. False + * otherwise. + */ +}; +#endif + /** * enum ovs_action_attr - Action types. * @@ -842,6 +880,9 @@ struct ovs_action_push_eth { * packet, or modify the packet (e.g., change the DSCP field). * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of * actions without affecting the original packet and key. + * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set + * of actions if greater than the specified packet length, else execute + * another set of actions. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -876,6 +917,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_POP_NSH, /* No argument. */ OVS_ACTION_ATTR_METER, /* u32 meter ID. */ OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ + OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit v1.2.3 From 06bd2bdf19d2f3d22731625e1a47fa1dff5ac407 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 26 Mar 2019 11:31:14 -0700 Subject: openvswitch: Add timeout support to ct action Add support for fine-grain timeout support to conntrack action. The new OVS_CT_ATTR_TIMEOUT attribute of the conntrack action specifies a timeout to be associated with this connection. If no timeout is specified, it acts as is, that is the default timeout for the connection will be automatically applied. Example usage: $ nfct timeout add timeout_1 inet tcp syn_sent 100 established 200 $ ovs-ofctl add-flow br0 in_port=1,ip,tcp,action=ct(commit,timeout=timeout_1) CC: Pravin Shelar CC: Pablo Neira Ayuso Signed-off-by: Yi-Hung Wei Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 3 +++ net/openvswitch/conntrack.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index dfabacee6903..0cac5d802c6a 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -734,6 +734,7 @@ struct ovs_action_hash { * be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups, * respectively. Remaining bits control the changes for which an event is * delivered on the NFNLGRP_CONNTRACK_UPDATE group. + * @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -746,6 +747,8 @@ enum ovs_ct_attr { OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ + OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for + * fine-grain timeout tuning. */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 845b83598e0d..121b01d4a3c0 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ struct ovs_conntrack_info { u32 eventmask; /* Mask of 1 << IPCT_*. */ struct md_mark mark; struct md_labels labels; + char timeout[CTNL_TIMEOUT_NAME_MAX]; #ifdef CONFIG_NF_NAT_NEEDED struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif @@ -1471,6 +1473,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { #endif [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1, + .maxlen = CTNL_TIMEOUT_NAME_MAX }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -1556,6 +1560,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, info->have_eventmask = true; info->eventmask = nla_get_u32(a); break; +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + case OVS_CT_ATTR_TIMEOUT: + memcpy(info->timeout, nla_data(a), nla_len(a)); + if (!memchr(info->timeout, '\0', nla_len(a))) { + OVS_NLERR(log, "Invalid conntrack helper"); + return -EINVAL; + } + break; +#endif default: OVS_NLERR(log, "Unknown conntrack attr (%d)", @@ -1637,6 +1650,14 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } + + if (ct_info.timeout[0]) { + if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, + ct_info.timeout)) + pr_info_ratelimited("Failed to associated timeout " + "policy `%s'\n", ct_info.timeout); + } + if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -1757,6 +1778,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (ct_info->have_eventmask && nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) return -EMSGSIZE; + if (ct_info->timeout[0]) { + if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout)) + return -EMSGSIZE; + } #ifdef CONFIG_NF_NAT_NEEDED if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) @@ -1778,8 +1803,11 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { if (ct_info->helper) nf_conntrack_helper_put(ct_info->helper); - if (ct_info->ct) + if (ct_info->ct) { nf_ct_tmpl_free(ct_info->ct); + if (ct_info->timeout[0]) + nf_ct_destroy_timeout(ct_info->ct); + } } #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) -- cgit v1.2.3 From 18b6f717483a835fb98de9f0df6c724df9324e78 Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 28 Mar 2019 12:43:23 +0800 Subject: openvswitch: Make metadata_dst tunnel work in IP_TUNNEL_INFO_BRIDGE mode There is currently no support for the multicast/broadcast aspects of VXLAN in ovs. In the datapath flow the tun_dst must specific. But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific. And the packet can forward through the fdb table of vxlan devcice. In this mode the broadcast/multicast packet can be sent through the following ways in ovs. ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \ options:key=1000 options:remote_ip=flow ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff, \ action=output:vxlan bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \ src_vni 1000 vni 1000 self bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \ src_vni 1000 vni 1000 self Signed-off-by: wenxu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + net/openvswitch/flow_netlink.c | 46 +++++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 0cac5d802c6a..f271f1ec50ae 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -364,6 +364,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ + OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index b7543700db87..bd019058fc6f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -404,6 +404,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE] = { .len = 0 }, }; static const struct ovs_len_tbl @@ -667,6 +668,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; + bool info_bridge_mode = false; __be16 tun_flags = 0; int opts_type = 0; struct nlattr *a; @@ -783,6 +785,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_ERSPAN_OPT; opts_type = type; break; + case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: + info_bridge_mode = true; + ipv4 = true; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -813,16 +819,29 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, OVS_NLERR(log, "IP tunnel dst address not specified"); return -EINVAL; } - if (ipv4 && !match->key->tun_key.u.ipv4.dst) { - OVS_NLERR(log, "IPv4 tunnel dst address is zero"); - return -EINVAL; + if (ipv4) { + if (info_bridge_mode) { + if (match->key->tun_key.u.ipv4.src || + match->key->tun_key.u.ipv4.dst || + match->key->tun_key.tp_src || + match->key->tun_key.tp_dst || + match->key->tun_key.ttl || + match->key->tun_key.tos || + tun_flags & ~TUNNEL_KEY) { + OVS_NLERR(log, "IPv4 tun info is not correct"); + return -EINVAL; + } + } else if (!match->key->tun_key.u.ipv4.dst) { + OVS_NLERR(log, "IPv4 tunnel dst address is zero"); + return -EINVAL; + } } if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { OVS_NLERR(log, "IPv6 tunnel dst address is zero"); return -EINVAL; } - if (!ttl) { + if (!ttl && !info_bridge_mode) { OVS_NLERR(log, "IP tunnel TTL not specified."); return -EINVAL; } @@ -851,12 +870,17 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb, static int __ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, - unsigned short tun_proto) + unsigned short tun_proto, u8 mode) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; + + if (mode & IP_TUNNEL_INFO_BRIDGE) + return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE) + ? -EMSGSIZE : 0; + switch (tun_proto) { case AF_INET: if (output->u.ipv4.src && @@ -919,7 +943,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, static int ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, - unsigned short tun_proto) + unsigned short tun_proto, u8 mode) { struct nlattr *nla; int err; @@ -929,7 +953,7 @@ static int ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, - tun_proto); + tun_proto, mode); if (err) return err; @@ -943,7 +967,7 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb, return __ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, - ip_tunnel_info_af(tun_info)); + ip_tunnel_info_af(tun_info), tun_info->mode); } static int encode_vlan_from_nlattrs(struct sw_flow_match *match, @@ -1981,7 +2005,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, - swkey->tun_opts_len, swkey->tun_proto)) + swkey->tun_opts_len, swkey->tun_proto, 0)) goto nla_put_failure; } @@ -2606,6 +2630,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, tun_info->mode = IP_TUNNEL_INFO_TX; if (key.tun_proto == AF_INET6) tun_info->mode |= IP_TUNNEL_INFO_IPV6; + else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0) + tun_info->mode |= IP_TUNNEL_INFO_BRIDGE; tun_info->key = key.tun_key; /* We need to store the options in the action itself since @@ -3367,7 +3393,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) err = ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, - ip_tunnel_info_af(tun_info)); + ip_tunnel_info_af(tun_info), tun_info->mode); if (err) return err; nla_nest_end(skb, start); -- cgit v1.2.3