From 3b15d09f7e6db44065aaba5fd16dc7420035c5ad Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 28 Feb 2019 13:13:26 +0800 Subject: time: Introduce jiffies64_to_msecs() there is a similar helper in net/netfilter/nf_tables_api.c, this maybe become a common request someday, so move it to time.c Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Acked-by: John Stultz Signed-off-by: Pablo Neira Ayuso --- include/linux/jiffies.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fa928242567d..1b6d31da7cbc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -297,6 +297,7 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) } extern u64 jiffies64_to_nsecs(u64 j); +extern u64 jiffies64_to_msecs(u64 j); extern unsigned long __msecs_to_jiffies(const unsigned int m); #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) -- cgit v1.2.3 From 84c0d5e96f3ae20344fb3a79161eab18905dae56 Mon Sep 17 00:00:00 2001 From: Jacky Hu Date: Tue, 26 Mar 2019 18:31:21 +0800 Subject: ipvs: allow tunneling with gue encapsulation ipip packets are blocked in some public cloud environments, this patch allows gue encapsulation with the tunneling method, which would make tunneling working in those environments. Signed-off-by: Jacky Hu Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 5 +++ include/uapi/linux/ip_vs.h | 11 ++++++ net/netfilter/ipvs/ip_vs_ctl.c | 35 ++++++++++++++++- net/netfilter/ipvs/ip_vs_xmit.c | 84 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 130 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 047f9a5ccaad..2ac40135b576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern { /* Address family of addr */ u16 af; + + u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ }; @@ -660,6 +663,8 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ atomic_t last_weight; /* server latest weight */ + __u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 1c916b2f89dc..e34f436fc79d 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -124,6 +124,13 @@ #define IP_VS_PEDATA_MAXLEN 255 +/* Tunnel types */ +enum { + IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */ + IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */ + IP_VS_CONN_F_TUNNEL_TYPE_MAX, +}; + /* * The struct ip_vs_service_user and struct ip_vs_dest_user are * used to set IPVS rules through setsockopt. @@ -392,6 +399,10 @@ enum { IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */ + IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */ + + IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */ + __IPVS_DEST_ATTR_MAX, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4b933669fd83..ab119a7540db 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -831,6 +831,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags |= IP_VS_CONN_F_INACTIVE; + /* set the tunnel info */ + dest->tun_type = udest->tun_type; + dest->tun_port = udest->tun_port; + /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; @@ -987,6 +991,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -1051,6 +1062,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -2333,6 +2351,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, udest->u_threshold = udest_compat->u_threshold; udest->l_threshold = udest_compat->l_threshold; udest->af = AF_INET; + udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; } static int @@ -2890,6 +2909,8 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, + [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, @@ -3193,6 +3214,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) IP_VS_CONN_F_FWD_MASK)) || nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)) || + nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, + dest->tun_type) || + nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, + dest->tun_port) || nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, @@ -3315,12 +3340,14 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* If a full entry was requested, check for the additional fields */ if (full_entry) { struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, - *nla_l_thresh; + *nla_l_thresh, *nla_tun_type, *nla_tun_port; nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; + nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) return -EINVAL; @@ -3330,6 +3357,12 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, udest->weight = nla_get_u32(nla_weight); udest->u_threshold = nla_get_u32(nla_u_thresh); udest->l_threshold = nla_get_u32(nla_l_thresh); + + if (nla_tun_type) + udest->tun_type = nla_get_u8(nla_tun_type); + + if (nla_tun_port) + udest->tun_port = nla_get_be16(nla_tun_port); } return 0; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 175349fcf91f..8d6f94b67772 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -32,6 +32,7 @@ #include #include /* for tcphdr */ #include +#include #include /* for csum_tcpudp_magic */ #include #include /* for icmp_send */ @@ -382,6 +383,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); } else { mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; @@ -533,6 +538,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); else { mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); @@ -989,6 +998,41 @@ static inline int __tun_gso_type_mask(int encaps_af, int orig_af) } } +static int +ipvs_gue_encap(struct net *net, struct sk_buff *skb, + struct ip_vs_conn *cp, __u8 *next_protocol) +{ + __be16 dport; + __be16 sport = udp_flow_src_port(net, skb, 0, 0, false); + struct udphdr *udph; /* Our new UDP header */ + struct guehdr *gueh; /* Our new GUE header */ + + skb_push(skb, sizeof(struct guehdr)); + + gueh = (struct guehdr *)skb->data; + + gueh->control = 0; + gueh->version = 0; + gueh->hlen = 0; + gueh->flags = 0; + gueh->proto_ctype = *next_protocol; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + udph = udp_hdr(skb); + + dport = cp->dest->tun_port; + udph->dest = dport; + udph->source = sport; + udph->len = htons(skb->len); + udph->check = 0; + + *next_protocol = IPPROTO_UDP; + + return 0; +} + /* * IP Tunneling transmitter * @@ -1025,6 +1069,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); @@ -1046,6 +1091,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, @@ -1054,11 +1104,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1102,6 +1161,8 @@ int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct netns_ipvs *ipvs = cp->ipvs; + struct net *net = ipvs->net; struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -1112,10 +1173,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); - local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | @@ -1134,17 +1196,31 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, &next_protocol, &payload_len, &dsfield, &ttl, NULL); if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET6, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1167,7 +1243,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) - ip6_local_out(cp->ipvs->net, skb->sk, skb); + ip6_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); -- cgit v1.2.3 From 01902f8c85bfde343a4c2b7428d18762442f3a25 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 26 Mar 2019 20:06:20 +0800 Subject: netfilter: optimize nf_inet_addr_cmp optimize nf_inet_addr_cmp by 64bit xor computation similar to ipv6_addr_equal() Signed-off-by: Yuan Linsi Signed-off-by: Li RongQing Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 72cb19c3db6a..4e0145ea033e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -24,10 +24,17 @@ static inline int NF_DROP_GETERR(int verdict) static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ul2 = (const unsigned long *)a2; + + return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; +#else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; +#endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, -- cgit v1.2.3 From d164385ec572cbe3335a635ac308760e126d4ec0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:24 +0100 Subject: netfilter: nat: add inet family nat support We need minimal support from the nat core for this, as we do not want to register additional base hooks. When an inet hook is registered, interally register ipv4 and ipv6 hooks for them and unregister those when inet hooks are removed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 7 +++-- net/netfilter/nf_nat_core.c | 16 +++++------- net/netfilter/nf_nat_proto.c | 43 ++++++++++++++++++++++++++----- net/netfilter/nft_chain_nat.c | 36 ++++++++++++++++++++++++++ net/netfilter/nft_nat.c | 58 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 141 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index cf332c4e0b32..423cda2c6542 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -69,9 +69,9 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -98,6 +98,9 @@ void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af7dc6537758..a9ec49edd7f4 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1009,7 +1009,7 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); @@ -1019,14 +1019,12 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, struct nf_hook_ops *nat_ops; int i, ret; - if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))) + if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net))) return -EINVAL; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; for (i = 0; i < ops_count; i++) { - if (WARN_ON(orig_nat_ops[i].pf != ops->pf)) - return -EINVAL; if (orig_nat_ops[i].hooknum == hooknum) { hooknum = i; break; @@ -1086,8 +1084,8 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, return ret; } -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, - unsigned int ops_count) +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, + unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); struct nf_nat_hooks_net *nat_proto_net; @@ -1096,10 +1094,10 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, int hooknum = ops->hooknum; int i; - if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)) + if (pf >= ARRAY_SIZE(nat_net->nat_proto_net)) return; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; mutex_lock(&nf_nat_proto_mutex); if (WARN_ON(nat_proto_net->users == 0)) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 62743da3004f..606d0a740615 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -725,7 +725,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv4_ops[] = { +const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -758,13 +758,14 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); @@ -977,7 +978,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { +const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, @@ -1010,14 +1011,44 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); #endif /* CONFIG_IPV6 */ + +#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT) +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) +{ + int ret; + + if (WARN_ON_ONCE(ops->pf != NFPROTO_INET)) + return -EINVAL; + + ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops, + ARRAY_SIZE(nf_nat_ipv6_ops)); + if (ret) + return ret; + + ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); + if (ret) + nf_nat_ipv6_unregister_fn(net, ops); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); + +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn); +#endif /* NFT INET NAT */ diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index ee4852088d50..2f89bde3c61c 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -74,6 +74,36 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = { }; #endif +#ifdef CONFIG_NF_TABLES_INET +static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops) +{ + return nf_nat_inet_register_fn(net, ops); +} + +static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_inet_unregister_fn(net, ops); +} + +static const struct nft_chain_type nft_chain_nat_inet = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_do_chain, + [NF_INET_LOCAL_IN] = nft_nat_do_chain, + [NF_INET_LOCAL_OUT] = nft_nat_do_chain, + [NF_INET_POST_ROUTING] = nft_nat_do_chain, + }, + .ops_register = nft_nat_inet_reg, + .ops_unregister = nft_nat_inet_unreg, +}; +#endif + static int __init nft_chain_nat_init(void) { #ifdef CONFIG_NF_TABLES_IPV6 @@ -82,6 +112,9 @@ static int __init nft_chain_nat_init(void) #ifdef CONFIG_NF_TABLES_IPV4 nft_register_chain_type(&nft_chain_nat_ipv4); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_nat_inet); +#endif return 0; } @@ -94,6 +127,9 @@ static void __exit nft_chain_nat_exit(void) #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_chain_type(&nft_chain_nat_ipv6); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_nat_inet); +#endif } module_init(nft_chain_nat_init); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index e93aed9bda88..d90d421826aa 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -140,7 +140,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != ctx->family) + if (ctx->family != NFPROTO_INET && ctx->family != family) return -EOPNOTSUPP; switch (family) { @@ -278,13 +278,67 @@ static struct nft_expr_type nft_nat_type __read_mostly = { .owner = THIS_MODULE, }; +#ifdef CONFIG_NF_TABLES_INET +static void nft_nat_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + if (priv->family == nft_pf(pkt)) + nft_nat_eval(expr, regs, pkt); +} + +static const struct nft_expr_ops nft_nat_inet_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_inet_eval, + .init = nft_nat_init, + .destroy = nft_nat_destroy, + .dump = nft_nat_dump, + .validate = nft_nat_validate, +}; + +static struct nft_expr_type nft_inet_nat_type __read_mostly = { + .name = "nat", + .family = NFPROTO_INET, + .ops = &nft_nat_inet_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int nft_nat_inet_module_init(void) +{ + return nft_register_expr(&nft_inet_nat_type); +} + +static void nft_nat_inet_module_exit(void) +{ + nft_unregister_expr(&nft_inet_nat_type); +} +#else +static int nft_nat_inet_module_init(void) { return 0; } +static void nft_nat_inet_module_exit(void) { } +#endif + static int __init nft_nat_module_init(void) { - return nft_register_expr(&nft_nat_type); + int ret = nft_nat_inet_module_init(); + + if (ret) + return ret; + + ret = nft_register_expr(&nft_nat_type); + if (ret) + nft_nat_inet_module_exit(); + + return ret; } static void __exit nft_nat_module_exit(void) { + nft_nat_inet_module_exit(); nft_unregister_expr(&nft_nat_type); } -- cgit v1.2.3 From c1deb065cf3b5bcd483e3f03479f930edb151b99 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:25 +0100 Subject: netfilter: nf_tables: merge route type into core very little code, so it really doesn't make sense to have extra modules or even a kconfig knob for this. Merge them and make functionality available unconditionally. The merge makes inet family route support trivial, so add it as well here. Before: text data bss dec hex filename 835 832 0 1667 683 nft_chain_route_ipv4.ko 870 832 0 1702 6a6 nft_chain_route_ipv6.ko 111568 2556 529 114653 1bfdd nf_tables.ko After: text data bss dec hex filename 113133 2556 529 116218 1c5fa nf_tables.ko Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 15 +++ include/net/netfilter/nf_tables.h | 2 + net/ipv4/netfilter/Kconfig | 8 -- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nft_chain_route_ipv4.c | 89 ---------------- net/ipv6/netfilter/Kconfig | 8 -- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/nft_chain_route_ipv6.c | 91 ---------------- net/netfilter/Makefile | 3 +- net/netfilter/nf_nat_proto.c | 16 +-- net/netfilter/nf_tables_api.c | 2 + net/netfilter/nft_chain_route.c | 169 ++++++++++++++++++++++++++++++ 12 files changed, 191 insertions(+), 214 deletions(-) delete mode 100644 net/ipv4/netfilter/nft_chain_route_ipv4.c delete mode 100644 net/ipv6/netfilter/nft_chain_route_ipv6.c create mode 100644 net/netfilter/nft_chain_route.c (limited to 'include') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 471e9467105b..12113e502656 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -87,6 +87,21 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, } int ip6_route_me_harder(struct net *net, struct sk_buff *skb); + +static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3e9ab643eedf..55dff3ab44c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1411,4 +1411,6 @@ struct nft_trans_flowtable { int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c98391d49200..ea688832fc4e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -27,14 +27,6 @@ config NF_TABLES_IPV4 if NF_TABLES_IPV4 -config NFT_CHAIN_ROUTE_IPV4 - tristate "IPv4 nf_tables route chain support" - help - This option enables the "route" chain for IPv4 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, type of service and - the packet mark. - config NFT_REJECT_IPV4 select NF_REJECT_IPV4 default NFT_REJECT diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index e241f5188ebe..2cfdda7b109f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -24,7 +24,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c deleted file mode 100644 index 7d82934c46f4..000000000000 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - u32 mark; - __be32 saddr, daddr; - u_int8_t tos; - const struct iphdr *iph; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - mark = skb->mark; - iph = ip_hdr(skb); - saddr = iph->saddr; - daddr = iph->daddr; - tos = iph->tos; - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN) { - iph = ip_hdr(skb); - - if (iph->saddr != saddr || - iph->daddr != daddr || - skb->mark != mark || - iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv4 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV4, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv4); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv4); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ddc99a1653aa..3de3adb1a0c9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -23,14 +23,6 @@ config NF_TABLES_IPV6 if NF_TABLES_IPV6 -config NFT_CHAIN_ROUTE_IPV6 - tristate "IPv6 nf_tables route chain support" - help - This option enables the "route" chain for IPv6 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, flowlabel, hop-limit and - the packet mark. - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3853c648ebaa..93aff604b243 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c deleted file mode 100644 index da3f1f8cb325..000000000000 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u32 mark, flowlabel; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - /* save source/dest address, mark, hoplimit, flowlabel, priority */ - memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); - mark = skb->mark; - hop_limit = ipv6_hdr(skb)->hop_limit; - - /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u32 *)ipv6_hdr(skb)); - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || - skb->mark != mark || - ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } - - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv6 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv6); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv6); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4894a85cdd0b..afbf475e02b2 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,7 +77,8 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ - nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o + nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ + nft_chain_route.o nf_tables_set-objs := nf_tables_set_core.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 606d0a740615..84f5c90a7f21 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -926,20 +926,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } -static int nat_route_me_harder(struct net *net, struct sk_buff *skb) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, skb); -#else - return ip6_route_me_harder(net, skb); -#endif -} - static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -959,7 +945,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = nat_route_me_harder(state->net, skb); + err = nf_ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2d28b138ed18..a2bd439937e6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7530,6 +7530,7 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err5; + nft_chain_route_init(); return err; err5: rhltable_destroy(&nft_objname_ht); @@ -7549,6 +7550,7 @@ static void __exit nf_tables_module_exit(void) nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); + nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c new file mode 100644 index 000000000000..8826bbe71136 --- /dev/null +++ b/net/netfilter/nft_chain_route.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_NF_TABLES_IPV4 +static unsigned int nf_route_table_hook4(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct iphdr *iph; + struct nft_pktinfo pkt; + __be32 saddr, daddr; + unsigned int ret; + u32 mark; + int err; + u8 tos; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv4 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook4, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_IPV6 +static unsigned int nf_route_table_hook6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr, daddr; + struct nft_pktinfo pkt; + u32 mark, flowlabel; + unsigned int ret; + u8 hop_limit; + int err; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either)*/ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u32 *)ipv6_hdr(skb)))) { + err = nf_ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } + + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv6 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook6, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_INET +static unsigned int nf_route_table_inet(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + switch (state->pf) { + case NFPROTO_IPV4: + return nf_route_table_hook4(priv, skb, state); + case NFPROTO_IPV6: + return nf_route_table_hook6(priv, skb, state); + default: + nft_set_pktinfo(&pkt, skb, state); + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_route_inet = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_inet, + }, +}; +#endif + +void __init nft_chain_route_init(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_register_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_register_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_route_inet); +#endif +} + +void __exit nft_chain_route_fini(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_unregister_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_route_inet); +#endif +} -- cgit v1.2.3 From 4806e975729f99c7908d1688a143f1e16d464e6c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:26 +0100 Subject: netfilter: replace NF_NAT_NEEDED with IS_ENABLED(CONFIG_NF_NAT) NF_NAT_NEEDED is true whenever nat support for either ipv4 or ipv6 is enabled. Now that the af-specific nat configuration switches have been removed, IS_ENABLED(CONFIG_NF_NAT) has the same effect. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- net/netfilter/Kconfig | 5 ----- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 16 ++++++++-------- net/netfilter/nf_conntrack_sip.c | 2 +- net/openvswitch/conntrack.c | 18 +++++++++--------- 7 files changed, 21 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4e0145ea033e..a7252f3baeb0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -367,7 +367,7 @@ extern struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_hook *nat_hook; rcu_read_lock(); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 006e430d1cdf..93ce6b0daaba 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -48,7 +48,7 @@ struct nf_conntrack_expect { /* Expectation class */ unsigned int class; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6548271209a0..f4384c096d0d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -404,11 +404,6 @@ config NF_NAT forms of full Network Address Port Translation. This can be controlled by iptables, ip6tables or nft. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y - config NF_NAT_AMANDA tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 334d6e5b7762..59c18804a10a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -336,7 +336,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->tuple.dst.u.all = *dst; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); #endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66c596d287a5..32fe3060375a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,7 @@ #include #include #include -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include #include #endif @@ -655,7 +655,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -1494,7 +1494,7 @@ static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, return -EOPNOTSUPP; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, @@ -1586,7 +1586,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) static int ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) int ret; if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) @@ -2369,7 +2369,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -2699,7 +2699,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; struct nf_conn_help *help; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; #endif @@ -2717,7 +2717,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nla_put_failure; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); @@ -3180,7 +3180,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_expect *exp, u_int8_t u3) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *tb[CTA_EXPECT_NAT_MAX+1]; struct nf_conntrack_tuple nat_tuple = {}; int err; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 39fcc1ed18f3..d5454d1031a3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -928,7 +928,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nfct_help(exp->master)->helper != nfct_help(ct)->helper || exp->class != class) break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!direct_rtp && (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0be3ab5bde26..626629944450 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -29,7 +29,7 @@ #include #include -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include #endif @@ -75,7 +75,7 @@ struct ovs_conntrack_info { struct md_mark mark; struct md_labels labels; char timeout[CTNL_TIMEOUT_NAME_MAX]; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif }; @@ -721,7 +721,7 @@ static bool skb_nfct_cached(struct net *net, return ct_executed; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -903,7 +903,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, return err; } -#else /* !CONFIG_NF_NAT_NEEDED */ +#else /* !CONFIG_NF_NAT */ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb, struct nf_conn *ct, @@ -1330,7 +1330,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, return 0; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int parse_nat(const struct nlattr *attr, struct ovs_conntrack_info *info, bool log) { @@ -1467,7 +1467,7 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN }, -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif @@ -1547,7 +1547,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, return -EINVAL; } break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) case OVS_CT_ATTR_NAT: { int err = parse_nat(a, info, log); @@ -1677,7 +1677,7 @@ err_free_ct: return err; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, struct sk_buff *skb) { @@ -1783,7 +1783,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, return -EMSGSIZE; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; #endif -- cgit v1.2.3 From 22c7652cdaa8cd33ce78bacceb4e826a3f795873 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 27 Mar 2019 11:36:26 +0100 Subject: netfilter: nft_osf: Add version option support Add version option support to the nftables "osf" expression. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_osf.h | 11 ++++++++--- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nfnetlink_osf.c | 14 +++++++------- net/netfilter/nft_osf.c | 30 +++++++++++++++++++++++++----- 4 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index c6000046c966..788613f36935 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -21,13 +21,18 @@ struct nf_osf_finger { struct nf_osf_user_finger finger; }; +struct nf_osf_data { + const char *genre; + const char *version; +}; + bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, int hooknum, struct net_device *in, struct net_device *out, const struct nf_osf_info *info, struct net *net, const struct list_head *nf_osf_fingers); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check); +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data); #endif /* _NFOSF_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a66c8de006cc..061bb3eb20c3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1522,15 +1522,21 @@ enum nft_flowtable_hook_attributes { * * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + * @NFTA_OSF_FLAGS: flags (NLA_U32) */ enum nft_osf_attributes { NFTA_OSF_UNSPEC, NFTA_OSF_DREG, NFTA_OSF_TTL, + NFTA_OSF_FLAGS, __NFTA_OSF_MAX, }; #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) +enum nft_osf_flags { + NFT_OSF_F_VERSION = (1 << 0), +}; + /** * enum nft_device_attributes - nf_tables device netlink attributes * diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 1f1d90c1716b..7b827bcb412c 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -255,9 +255,9 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, } EXPORT_SYMBOL_GPL(nf_osf_match); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check) +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; @@ -265,24 +265,24 @@ const char *nf_osf_find(const struct sk_buff *skb, const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; - const char *genre = NULL; memset(&ctx, 0, sizeof(ctx)); tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); if (!tcp) - return NULL; + return false; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; - genre = f->genre; + data->genre = f->genre; + data->version = f->version; break; } - return genre; + return true; } EXPORT_SYMBOL_GPL(nf_osf_find); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b13618c764ec..87b60d6617ef 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -7,11 +7,13 @@ struct nft_osf { enum nft_registers dreg:8; u8 ttl; + u32 flags; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, + [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -20,9 +22,10 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_osf *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; struct sk_buff *skb = pkt->skb; + char os_match[NFT_OSF_MAXGENRELEN + 1]; const struct tcphdr *tcp; + struct nf_osf_data data; struct tcphdr _tcph; - const char *os_name; tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); @@ -35,11 +38,17 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl); - if (!os_name) + if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) { strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); - else - strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN); + } else { + if (priv->flags & NFT_OSF_F_VERSION) + snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", + data.genre, data.version); + else + strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); + + strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN); + } } static int nft_osf_init(const struct nft_ctx *ctx, @@ -47,6 +56,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_osf *priv = nft_expr_priv(expr); + u32 flags; int err; u8 ttl; @@ -57,6 +67,13 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->ttl = ttl; } + if (tb[NFTA_OSF_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS])); + if (flags != NFT_OSF_F_VERSION) + return -EINVAL; + priv->flags = flags; + } + priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); @@ -73,6 +90,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; -- cgit v1.2.3 From 3b0a081db1f730373993c7a27936778402a3322c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Apr 2019 10:58:20 +0200 Subject: netfilter: make two functions static They have no external callers anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 - include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 5 ++--- net/netfilter/x_tables.c | 3 +-- 4 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bf384b3eedb8..1f852ef7b098 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -317,7 +317,6 @@ struct xt_table_info *xt_replace_table(struct xt_table *table, int *error); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 55dff3ab44c7..2d5a0a1a87b8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -475,8 +475,6 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a2bd439937e6..e058273c5dde 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3782,8 +3782,8 @@ bind: } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool event) +static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); @@ -3794,7 +3794,6 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, GFP_KERNEL); } } -EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e5e5c64df8d1..0a6656ed1534 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -227,7 +227,7 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) EXPORT_SYMBOL_GPL(xt_request_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) +static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = -ENOENT; @@ -255,7 +255,6 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) return ERR_PTR(err); } -EXPORT_SYMBOL(xt_find_target); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { -- cgit v1.2.3 From bf8981a2aa082d9d64771b47c8a1c9c388d8cd40 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 10:44:06 +0200 Subject: netfilter: nf_nat: merge ip/ip6 masquerade headers Both are now implemented by nf_nat_masquerade.c, so no need to keep different headers. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_nat_masquerade.h | 15 --------------- include/net/netfilter/ipv6/nf_nat_masquerade.h | 11 ----------- include/net/netfilter/nf_nat_masquerade.h | 21 +++++++++++++++++++++ net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/netfilter/nf_nat_masquerade.c | 3 +-- net/netfilter/nft_masq.c | 3 +-- 7 files changed, 25 insertions(+), 32 deletions(-) delete mode 100644 include/net/netfilter/ipv4/nf_nat_masquerade.h delete mode 100644 include/net/netfilter/ipv6/nf_nat_masquerade.h create mode 100644 include/net/netfilter/nf_nat_masquerade.h (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h deleted file mode 100644 index 13d55206bb9f..000000000000 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV4_H_ -#define _NF_NAT_MASQUERADE_IPV4_H_ - -#include - -unsigned int -nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, - const struct nf_nat_range2 *range, - const struct net_device *out); - -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); - -#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h deleted file mode 100644 index 2917bf95c437..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV6_H_ -#define _NF_NAT_MASQUERADE_IPV6_H_ - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); - -#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h new file mode 100644 index 000000000000..cafe71822a53 --- /dev/null +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ + +#include + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out); + +int nf_nat_masquerade_ipv4_register_notifier(void); +void nf_nat_masquerade_ipv4_unregister_notifier(void); + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); +int nf_nat_masquerade_ipv6_register_notifier(void); +void nf_nat_masquerade_ipv6_unregister_notifier(void); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index fd3f9e8a74da..0a2bffb6a0ad 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 29c7f1915a96..4a22343ed67a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index d85c4d902e7b..10053e70f69d 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -7,8 +7,7 @@ #include #include -#include -#include +#include static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt4 __read_mostly; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 35a1794acf4c..0783a3e99bd7 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -14,8 +14,7 @@ #include #include #include -#include -#include +#include struct nft_masq { u32 flags; -- cgit v1.2.3 From 610a43149cabd0c7aa7bed19cbcf05a0249ab32a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 10:44:08 +0200 Subject: netfilter: nf_nat_masquerade: unify ipv4/6 notifier registration Only reason for having two different register functions was because of ipt_MASQUERADE and ip6t_MASQUERADE being two different modules. Previous patch merged those into xt_MASQUERADE, so we can merge this too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_masquerade.h | 6 +- net/netfilter/nf_nat_masquerade.c | 101 +++++++++++------------------- net/netfilter/nft_masq.c | 16 +---- net/netfilter/xt_MASQUERADE.c | 16 +---- 4 files changed, 44 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h index cafe71822a53..54a14d643c34 100644 --- a/include/net/netfilter/nf_nat_masquerade.h +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -9,13 +9,11 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct nf_nat_range2 *range, const struct net_device *out); -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); #endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 10053e70f69d..8e8a65d46345 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -10,8 +10,7 @@ #include static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt4 __read_mostly; -static unsigned int masq_refcnt6 __read_mostly; +static unsigned int masq_refcnt __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -136,56 +135,6 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -int nf_nat_masquerade_ipv4_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { - ret = -EOVERFLOW; - goto out_unlock; - } - - /* check if the notifier was already set */ - if (++masq_refcnt4 > 1) - goto out_unlock; - - /* Register for device down reports */ - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - /* Register IP address change reports */ - ret = register_inetaddr_notifier(&masq_inet_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt4--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); - -void nf_nat_masquerade_ipv4_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt4 > 0) - goto out_unlock; - - unregister_netdevice_notifier(&masq_dev_notifier); - unregister_inetaddr_notifier(&masq_inet_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); - #if IS_ENABLED(CONFIG_IPV6) static atomic_t v6_worker_count __read_mostly; @@ -321,44 +270,68 @@ static struct notifier_block masq_inet6_notifier = { .notifier_call = masq_inet6_event, }; -int nf_nat_masquerade_ipv6_register_notifier(void) +static int nf_nat_masquerade_ipv6_register_notifier(void) +{ + return register_inet6addr_notifier(&masq_inet6_notifier); +} +#else +static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } +#endif + +int nf_nat_masquerade_inet_register_notifiers(void) { int ret = 0; mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { ret = -EOVERFLOW; goto out_unlock; } - /* check if the notifier is already set */ - if (++masq_refcnt6 > 1) + /* check if the notifier was already set */ + if (++masq_refcnt > 1) goto out_unlock; - ret = register_inet6addr_notifier(&masq_inet6_notifier); + /* Register for device down reports */ + ret = register_netdevice_notifier(&masq_dev_notifier); if (ret) goto err_dec; + /* Register IP address change reports */ + ret = register_inetaddr_notifier(&masq_inet_notifier); + if (ret) + goto err_unregister; + + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret) + goto err_unreg_inet; mutex_unlock(&masq_mutex); return ret; +err_unreg_inet: + unregister_inetaddr_notifier(&masq_inet_notifier); +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt6--; + masq_refcnt--; out_unlock: mutex_unlock(&masq_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); -void nf_nat_masquerade_ipv6_unregister_notifier(void) +void nf_nat_masquerade_inet_unregister_notifiers(void) { mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt6 > 0) + /* check if the notifiers still have clients */ + if (--masq_refcnt > 0) goto out_unlock; + unregister_netdevice_notifier(&masq_dev_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&masq_inet6_notifier); +#endif out_unlock: mutex_unlock(&masq_mutex); } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); -#endif +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 0783a3e99bd7..86fd90085eaf 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -195,22 +195,12 @@ static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { static int __init nft_masq_module_init_ipv6(void) { - int ret = nft_register_expr(&nft_masq_ipv6_type); - - if (ret) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret < 0) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; + return nft_register_expr(&nft_masq_ipv6_type); } static void nft_masq_module_exit_ipv6(void) { nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); } #else static inline int nft_masq_module_init_ipv6(void) { return 0; } @@ -293,7 +283,7 @@ static int __init nft_masq_module_init(void) return ret; } - ret = nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_inet_register_notifiers(); if (ret < 0) { nft_masq_module_exit_ipv6(); nft_masq_module_exit_inet(); @@ -309,7 +299,7 @@ static void __exit nft_masq_module_exit(void) nft_masq_module_exit_ipv6(); nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); - nf_nat_masquerade_ipv4_unregister_notifier(); + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(nft_masq_module_init); diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c index 96d884718749..ece20d832adc 100644 --- a/net/netfilter/xt_MASQUERADE.c +++ b/net/netfilter/xt_MASQUERADE.c @@ -107,32 +107,20 @@ static int __init masquerade_tg_init(void) if (ret) return ret; - ret = nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_inet_register_notifiers(); if (ret) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); return ret; } -#if IS_ENABLED(CONFIG_IPV6) - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret) { - xt_unregister_targets(masquerade_tg_reg, - ARRAY_SIZE(masquerade_tg_reg)); - nf_nat_masquerade_ipv4_unregister_notifier(); - return ret; - } -#endif return ret; } static void __exit masquerade_tg_exit(void) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); - nf_nat_masquerade_ipv4_unregister_notifier(); -#if IS_ENABLED(CONFIG_IPV6) - nf_nat_masquerade_ipv6_unregister_notifier(); -#endif + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(masquerade_tg_init); -- cgit v1.2.3 From 971502d77faa50a37c89bc6d172450294ad9a5fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:41 +0200 Subject: bridge: netfilter: unroll NF_HOOK helper in bridge input path Replace NF_HOOK() based invocation of the netfilter hooks with a private copy of nf_hook_slow(). This copy has one difference: it can return the rx handler value expected by the stack, i.e. RX_HANDLER_CONSUMED or RX_HANDLER_PASS. This is needed by the next patch to invoke the ebtables "broute" table via the standard netfilter hooks rather than the custom "br_should_route_hook" indirection that is used now. When the skb is to be "brouted", we must return RX_HANDLER_PASS from the bridge rx input handler, but there is no way to indicate this via NF_HOOK(), unless perhaps by some hack such as exposing bridge_cb in the netfilter core or a percpu flag. text data bss dec filename 3369 56 0 3425 net/bridge/br_input.o.before 3458 40 0 3498 net/bridge/br_input.o.after This allows removal of the "br_should_route_hook" in the next patch. Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 3 +++ net/bridge/br_input.c | 55 +++++++++++++++++++++++++++++++++++++--- net/netfilter/core.c | 1 + net/netfilter/nf_internals.h | 3 --- net/netfilter/nf_queue.c | 1 + 5 files changed, 56 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index a50a69f5334c..7239105d9d2e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, return queue; } +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2f93e5c72da..4ac34fb5f943 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,55 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu return 0; } +static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) +{ +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries *e = NULL; + struct nf_hook_state state; + unsigned int verdict, i; + struct net *net; + int ret; + + net = dev_net(skb->dev); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING])) + goto frame_finish; +#endif + + e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]); + if (!e) + goto frame_finish; + + nf_hook_state_init(&state, NF_BR_PRE_ROUTING, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + net, br_handle_frame_finish); + + for (i = 0; i < e->num_hook_entries; i++) { + verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + break; + case NF_DROP: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + case NF_QUEUE: + ret = nf_queue(skb, &state, e, i, verdict); + if (ret == 1) + continue; + return RX_HANDLER_CONSUMED; + default: /* STOLEN */ + return RX_HANDLER_CONSUMED; + } + } +frame_finish: + net = dev_net(skb->dev); + br_handle_frame_finish(net, NULL, skb); +#else + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); +#endif + return RX_HANDLER_CONSUMED; +} + /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -304,10 +354,7 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - br_handle_frame_finish); - break; + return nf_hook_bridge_pre(skb, pskb); default: drop: kfree_skb(skb); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 93aaec3a54ec..71f06900473e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "nf_internals.h" diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e15779fd58e3..d6c43902ebd7 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -7,9 +7,6 @@ #include /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, - unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a36a77bae1d6..9dc1d6e04946 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -240,6 +240,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } +EXPORT_SYMBOL_GPL(nf_queue); static unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, -- cgit v1.2.3 From 223fd0adfa8af36d5d9b5d38016e579ee052f367 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:42 +0200 Subject: bridge: broute: make broute a real ebtables table This makes broute a normal ebtables table, hooking at PREROUTING. The broute hook is removed. It uses skb->cb to signal to bridge rx handler that the skb should be routed instead of being bridged. This change is backwards compatible with ebtables as no userspace visible parts are changed. This means we can also remove the !ops test in ebt_register_table, it was only there for broute table sake. Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- include/linux/if_bridge.h | 3 -- net/bridge/br_input.c | 18 +++------- net/bridge/br_private.h | 3 ++ net/bridge/netfilter/ebtable_broute.c | 63 ++++++++++++++++++++++++----------- net/bridge/netfilter/ebtables.c | 7 +--- 5 files changed, 52 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 627b788ba0ff..ef0819ced0fc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -56,9 +56,6 @@ struct br_ip_list { extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -typedef int br_should_route_hook_t(struct sk_buff *skb); -extern br_should_route_hook_t __rcu *br_should_route_hook; - #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4ac34fb5f943..e0aacfedcfe1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -24,10 +24,6 @@ #include "br_private.h" #include "br_private_tunnel.h" -/* Hook for brouter */ -br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; -EXPORT_SYMBOL(br_should_route_hook); - static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -234,6 +230,10 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: + if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) { + *pskb = skb; + return RX_HANDLER_PASS; + } break; case NF_DROP: kfree_skb(skb); @@ -265,7 +265,6 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; - br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; @@ -341,15 +340,6 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) forward: switch (p->state) { case BR_STATE_FORWARDING: - rhook = rcu_dereference(br_should_route_hook); - if (rhook) { - if ((*rhook)(skb)) { - *pskb = skb; - return RX_HANDLER_PASS; - } - dest = eth_hdr(skb)->h_dest; - } - /* fall through */ case BR_STATE_LEARNING: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e7110a6e2b7e..4bea2f11da9b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -433,6 +433,9 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + u8 br_netfilter_broute:1; +#endif #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 276b60262981..ec2652a459da 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -15,6 +15,8 @@ #include #include +#include "../br_private.h" + /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ @@ -48,30 +50,63 @@ static const struct ebt_table broute_table = { .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff *skb) +static unsigned int ebt_broute(void *priv, struct sk_buff *skb, + const struct nf_hook_state *s) { + struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; + unsigned char *dest; int ret; + if (!p || p->state != BR_STATE_FORWARDING) + return NF_ACCEPT; + nf_hook_state_init(&state, NF_BR_BROUTING, - NFPROTO_BRIDGE, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); + NFPROTO_BRIDGE, s->in, NULL, NULL, + s->net, NULL); ret = ebt_do_table(skb, &state, state.net->xt.broute_table); - if (ret == NF_DROP) - return 1; /* route it */ - return 0; /* bridge it */ + + if (ret != NF_DROP) + return ret; + + /* DROP in ebtables -t broute means that the + * skb should be routed, not bridged. + * This is awkward, but can't be changed for compatibility + * reasons. + * + * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. + */ + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; + + /* undo PACKET_HOST mangling done in br_input in case the dst + * address matches the logical bridge but not the port. + */ + dest = eth_hdr(skb)->h_dest; + if (skb->pkt_type == PACKET_HOST && + !ether_addr_equal(skb->dev->dev_addr, dest) && + ether_addr_equal(p->br->dev->dev_addr, dest)) + skb->pkt_type = PACKET_OTHERHOST; + + return NF_ACCEPT; } +static const struct nf_hook_ops ebt_ops_broute = { + .hook = ebt_broute, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_FIRST, +}; + static int __net_init broute_net_init(struct net *net) { - return ebt_register_table(net, &broute_table, NULL, + return ebt_register_table(net, &broute_table, &ebt_ops_broute, &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, NULL); + ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); } static struct pernet_operations broute_net_ops = { @@ -81,21 +116,11 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret; - - ret = register_pernet_subsys(&broute_net_ops); - if (ret < 0) - return ret; - /* see br_input.c */ - RCU_INIT_POINTER(br_should_route_hook, - (br_should_route_hook_t *)ebt_broute); - return 0; + return register_pernet_subsys(&broute_net_ops); } static void __exit ebtable_broute_fini(void) { - RCU_INIT_POINTER(br_should_route_hook, NULL); - synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index eb15891f8b9f..383f0328ff68 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1221,10 +1221,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, mutex_unlock(&ebt_mutex); WRITE_ONCE(*res, table); - - if (!ops) - return 0; - ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); @@ -1248,8 +1244,7 @@ out: void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *ops) { - if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } -- cgit v1.2.3