summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_queue.h3
-rw-r--r--net/ipv6/ip6_tunnel.c27
-rw-r--r--net/netfilter/nf_flow_table_ip.c243
-rw-r--r--net/netfilter/nfnetlink_queue.c146
-rw-r--r--net/netfilter/xt_time.c8
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh62
6 files changed, 408 insertions, 81 deletions
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4aeffddb7586..e6803831d6af 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -6,11 +6,13 @@
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
+#include <linux/rhashtable-types.h>
#include <linux/skbuff.h>
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
+ struct rhash_head hash_node;
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
@@ -20,6 +22,7 @@ struct nf_queue_entry {
#endif
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */
+ u16 queue_num;
/* extra space to store route keys */
};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c1f39735a236..f68f6f110a3e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
+static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct ip6_tnl *t = netdev_priv(ctx->dev);
+ struct flowi6 fl6 = {
+ .daddr = t->parms.raddr,
+ };
+ struct dst_entry *dst;
+ int err;
+
+ dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
+ if (!dst->error) {
+ path->type = DEV_PATH_TUN;
+ path->tun.src_v6 = t->parms.laddr;
+ path->tun.dst_v6 = t->parms.raddr;
+ path->tun.l3_proto = IPPROTO_IPV6;
+ path->dev = ctx->dev;
+ ctx->dev = dst->dev;
+ }
+
+ err = dst->error;
+ dst_release(dst);
+
+ return err;
+}
+
static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
+ .ndo_fill_forward_path = ip6_tnl_fill_forward_path,
};
#define IPXIPX_FEATURES (NETIF_F_SG | \
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 11da560f38bf..3fdb10d9bf7f 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -14,6 +14,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -144,12 +145,26 @@ static bool ip_has_options(unsigned int thoff)
return thoff != sizeof(struct iphdr);
}
-static void nf_flow_tuple_encap(struct sk_buff *skb,
+struct nf_flowtable_ctx {
+ const struct net_device *in;
+ u32 offset;
+ u32 hdrsize;
+ struct {
+ /* Tunnel IP header size */
+ u32 hdr_size;
+ /* IP tunnel protocol */
+ u8 proto;
+ } tun;
+};
+
+static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
__be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
struct pppoe_hdr *phdr;
+ struct ipv6hdr *ip6h;
struct iphdr *iph;
u16 offset = 0;
int i = 0;
@@ -176,22 +191,28 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
break;
}
- if (inner_proto == htons(ETH_P_IP)) {
+ switch (inner_proto) {
+ case htons(ETH_P_IP):
iph = (struct iphdr *)(skb_network_header(skb) + offset);
- if (iph->protocol == IPPROTO_IPIP) {
+ if (ctx->tun.proto == IPPROTO_IPIP) {
tuple->tun.dst_v4.s_addr = iph->daddr;
tuple->tun.src_v4.s_addr = iph->saddr;
tuple->tun.l3_proto = IPPROTO_IPIP;
}
+ break;
+ case htons(ETH_P_IPV6):
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ if (ctx->tun.proto == IPPROTO_IPV6) {
+ tuple->tun.dst_v6 = ip6h->daddr;
+ tuple->tun.src_v6 = ip6h->saddr;
+ tuple->tun.l3_proto = IPPROTO_IPV6;
+ }
+ break;
+ default:
+ break;
}
}
-struct nf_flowtable_ctx {
- const struct net_device *in;
- u32 offset;
- u32 hdrsize;
-};
-
static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
@@ -259,7 +280,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
tuple->iifidx = ctx->in->ifindex;
- nf_flow_tuple_encap(skb, tuple);
+ nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -295,15 +316,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb)
{
struct iphdr *iph;
u16 size;
- if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+ if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return false;
- iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -312,25 +334,62 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
if (iph->ttl <= 1)
return false;
- if (iph->protocol == IPPROTO_IPIP)
- *psize += size;
+ if (iph->protocol == IPPROTO_IPIP) {
+ ctx->tun.proto = IPPROTO_IPIP;
+ ctx->tun.hdr_size = size;
+ ctx->offset += size;
+ }
return true;
}
-static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb)
{
- struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *ip6h, _ip6h;
+ __be16 frag_off;
+ u8 nexthdr;
+ int hdrlen;
+
+ ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return false;
+
+ if (ip6h->hop_limit <= 1)
+ return false;
+
+ nexthdr = ip6h->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr,
+ &frag_off);
+ if (hdrlen < 0)
+ return false;
+
+ if (nexthdr == IPPROTO_IPV6) {
+ ctx->tun.hdr_size = hdrlen;
+ ctx->tun.proto = IPPROTO_IPV6;
+ }
+ ctx->offset += ctx->tun.hdr_size;
+
+ return true;
+#else
+ return false;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
- if (iph->protocol != IPPROTO_IPIP)
+static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb)
+{
+ if (ctx->tun.proto != IPPROTO_IPIP &&
+ ctx->tun.proto != IPPROTO_IPV6)
return;
- skb_pull(skb, iph->ihl << 2);
+ skb_pull(skb, ctx->tun.hdr_size);
skb_reset_network_header(skb);
}
-static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
- u32 *offset)
+static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb, __be16 proto)
{
__be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
@@ -343,7 +402,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
- *offset += VLAN_HLEN;
+ ctx->offset += VLAN_HLEN;
inner_proto = proto;
ret = true;
}
@@ -351,19 +410,28 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
- *offset += PPPOE_SES_HLEN;
+ ctx->offset += PPPOE_SES_HLEN;
ret = true;
}
break;
}
- if (inner_proto == htons(ETH_P_IP))
- ret = nf_flow_ip4_tunnel_proto(skb, offset);
+ switch (inner_proto) {
+ case htons(ETH_P_IP):
+ ret = nf_flow_ip4_tunnel_proto(ctx, skb);
+ break;
+ case htons(ETH_P_IPV6):
+ ret = nf_flow_ip6_tunnel_proto(ctx, skb);
+ break;
+ default:
+ break;
+ }
return ret;
}
-static void nf_flow_encap_pop(struct sk_buff *skb,
+static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
+ struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
struct vlan_hdr *vlan_hdr;
@@ -389,8 +457,9 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
}
}
- if (skb->protocol == htons(ETH_P_IP))
- nf_flow_ip4_tunnel_pop(skb);
+ if (skb->protocol == htons(ETH_P_IP) ||
+ skb->protocol == htons(ETH_P_IPV6))
+ nf_flow_ip_tunnel_pop(ctx, skb);
}
struct nf_flow_xmit {
@@ -416,7 +485,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
- if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -460,7 +529,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash);
+ nf_flow_encap_pop(ctx, skb, tuplehash);
thoff -= ctx->offset;
iph = ip_hdr(skb);
@@ -569,6 +638,97 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
return 0;
}
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
+
+static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ struct in6_addr **ip6_daddr,
+ int encap_limit)
+{
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
+ u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
+ struct rtable *rt = dst_rtable(tuple->dst_cache);
+ __u8 dsfield = ipv6_get_dsfield(ip6h);
+ struct flowi6 fl6 = {
+ .daddr = tuple->tun.src_v6,
+ .saddr = tuple->tun.dst_v6,
+ .flowi6_proto = proto,
+ };
+ int err, mtu;
+ u32 headroom;
+
+ err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
+ if (err)
+ return err;
+
+ skb_set_inner_ipproto(skb, proto);
+ headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
+ rt->dst.header_len;
+ if (encap_limit)
+ headroom += 8;
+ err = skb_cow_head(skb, headroom);
+ if (err)
+ return err;
+
+ skb_scrub_packet(skb, true);
+ mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
+ if (encap_limit)
+ mtu -= 8;
+ mtu = max(mtu, IPV6_MIN_MTU);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (encap_limit > 0) {
+ struct ipv6_tel_txoption opt = {
+ .dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
+ .dst_opt[3] = 1,
+ .dst_opt[4] = encap_limit,
+ .dst_opt[5] = IPV6_TLV_PADN,
+ .dst_opt[6] = 1,
+ };
+ struct ipv6_opt_hdr *hopt;
+
+ opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
+ opt.ops.opt_nflen = 8;
+
+ hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
+ memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
+ hopt->nexthdr = IPPROTO_IPV6;
+ proto = NEXTHDR_DEST;
+ }
+
+ skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+
+ ip6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ip6h, dsfield,
+ ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
+ ip6h->hop_limit = hop_limit;
+ ip6h->nexthdr = proto;
+ ip6h->daddr = tuple->tun.src_v6;
+ ip6h->saddr = tuple->tun.dst_v6;
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ *ip6_daddr = &tuple->tun.src_v6;
+
+ return 0;
+}
+
+static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ struct in6_addr **ip6_daddr,
+ int encap_limit)
+{
+ if (tuple->tun_num)
+ return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr,
+ encap_limit);
+
+ return 0;
+}
+
static int nf_flow_encap_push(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
@@ -838,7 +998,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
tuple->iifidx = ctx->in->ifindex;
- nf_flow_tuple_encap(skb, tuple);
+ nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -846,7 +1006,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *tuplehash,
- struct sk_buff *skb)
+ struct sk_buff *skb, int encap_limit)
{
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
@@ -857,6 +1017,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
+ if (flow->tuplehash[!dir].tuple.tun_num) {
+ mtu -= sizeof(*ip6h);
+ if (encap_limit > 0)
+ mtu -= 8; /* encap limit option */
+ }
+
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return 0;
@@ -875,7 +1041,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash);
+ nf_flow_encap_pop(ctx, skb, tuplehash);
ip6h = ipv6_hdr(skb);
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -896,8 +1062,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
- if (skb->protocol != htons(ETH_P_IPV6) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+ if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
return NULL;
if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
@@ -910,6 +1075,7 @@ unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple *other_tuple;
@@ -928,7 +1094,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (tuplehash == NULL)
return NF_ACCEPT;
- ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
+ ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb,
+ encap_limit);
if (ret < 0)
return NF_DROP;
else if (ret == 0)
@@ -947,6 +1114,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
other_tuple = &flow->tuplehash[!dir].tuple;
ip6_daddr = &other_tuple->src_v6;
+ if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
+ &ip6_daddr, encap_limit) < 0)
+ return NF_DROP;
+
if (nf_flow_encap_push(skb, other_tuple) < 0)
return NF_DROP;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8fa0807973c9..671b52c652ef 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,8 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
+#include <linux/rhashtable.h>
+#include <linux/jhash.h>
#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
@@ -47,6 +49,8 @@
#endif
#define NFQNL_QMAX_DEFAULT 1024
+#define NFQNL_HASH_MIN 1024
+#define NFQNL_HASH_MAX 1048576
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
* includes the header length. Thus, the maximum packet length that we
@@ -56,6 +60,26 @@
*/
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
+/* Composite key for packet lookup: (net, queue_num, packet_id) */
+struct nfqnl_packet_key {
+ possible_net_t net;
+ u32 packet_id;
+ u16 queue_num;
+} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
+
+/* Global rhashtable - one for entire system, all netns */
+static struct rhashtable nfqnl_packet_map __read_mostly;
+
+/* Helper to initialize composite key */
+static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
+ struct net *net, u32 packet_id, u16 queue_num)
+{
+ memset(key, 0, sizeof(*key));
+ write_pnet(&key->net, net);
+ key->packet_id = packet_id;
+ key->queue_num = queue_num;
+}
+
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
@@ -100,6 +124,39 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
+/* Extract composite key from nf_queue_entry for hashing */
+static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nf_queue_entry *entry = data;
+ struct nfqnl_packet_key key;
+
+ nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
+
+ return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
+}
+
+/* Compare stack-allocated key against entry */
+static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct nfqnl_packet_key *key = arg->key;
+ const struct nf_queue_entry *entry = obj;
+
+ return !net_eq(entry->state.net, read_pnet(&key->net)) ||
+ entry->queue_num != key->queue_num ||
+ entry->id != key->packet_id;
+}
+
+static const struct rhashtable_params nfqnl_rhashtable_params = {
+ .head_offset = offsetof(struct nf_queue_entry, hash_node),
+ .key_len = sizeof(struct nfqnl_packet_key),
+ .obj_hashfn = nfqnl_packet_obj_hashfn,
+ .obj_cmpfn = nfqnl_packet_obj_cmpfn,
+ .automatic_shrinking = true,
+ .min_size = NFQNL_HASH_MIN,
+ .max_size = NFQNL_HASH_MAX,
+};
+
static struct nfqnl_instance *
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
@@ -188,33 +245,45 @@ instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
spin_unlock(&q->instances_lock);
}
-static inline void
+static int
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
- list_add_tail(&entry->list, &queue->queue_list);
- queue->queue_total++;
+ int err;
+
+ entry->queue_num = queue->queue_num;
+
+ err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
+ nfqnl_rhashtable_params);
+ if (unlikely(err))
+ return err;
+
+ list_add_tail(&entry->list, &queue->queue_list);
+ queue->queue_total++;
+
+ return 0;
}
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
+ rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
+ nfqnl_rhashtable_params);
list_del(&entry->list);
queue->queue_total--;
}
static struct nf_queue_entry *
-find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
+ struct net *net)
{
- struct nf_queue_entry *entry = NULL, *i;
+ struct nfqnl_packet_key key;
+ struct nf_queue_entry *entry;
- spin_lock_bh(&queue->lock);
+ nfqnl_init_key(&key, net, id, queue->queue_num);
- list_for_each_entry(i, &queue->queue_list, list) {
- if (i->id == id) {
- entry = i;
- break;
- }
- }
+ spin_lock_bh(&queue->lock);
+ entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
+ nfqnl_rhashtable_params);
if (entry)
__dequeue_entry(queue, entry);
@@ -404,8 +473,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_lock_bh(&queue->lock);
list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
- queue->queue_total--;
+ __dequeue_entry(queue, entry);
nfqnl_reinject(entry, NF_DROP);
}
}
@@ -885,23 +953,23 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
if (nf_ct_drop_unconfirmed(entry))
goto err_out_free_nskb;
- if (queue->queue_total >= queue->queue_maxlen) {
- if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
- failopen = 1;
- err = 0;
- } else {
- queue->queue_dropped++;
- net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
- queue->queue_total);
- }
- goto err_out_free_nskb;
- }
+ if (queue->queue_total >= queue->queue_maxlen)
+ goto err_out_queue_drop;
+
entry->id = ++queue->id_sequence;
*packet_id_ptr = htonl(entry->id);
+ /* Insert into hash BEFORE unicast. If failure don't send to userspace. */
+ err = __enqueue_entry(queue, entry);
+ if (unlikely(err))
+ goto err_out_queue_drop;
+
/* nfnetlink_unicast will either free the nskb or add it to a socket */
err = nfnetlink_unicast(nskb, net, queue->peer_portid);
if (err < 0) {
+ /* Unicast failed - remove entry we just inserted */
+ __dequeue_entry(queue, entry);
+
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
err = 0;
@@ -911,11 +979,22 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
goto err_out_unlock;
}
- __enqueue_entry(queue, entry);
-
spin_unlock_bh(&queue->lock);
return 0;
+err_out_queue_drop:
+ if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+ failopen = 1;
+ err = 0;
+ } else {
+ queue->queue_dropped++;
+
+ if (queue->queue_total >= queue->queue_maxlen)
+ net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
+ queue->queue_total);
+ else
+ net_warn_ratelimited("nf_queue: hash insert failed: %d\n", err);
+ }
err_out_free_nskb:
kfree_skb(nskb);
err_out_unlock:
@@ -1427,7 +1506,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
verdict = ntohl(vhdr->verdict);
- entry = find_dequeue_entry(queue, ntohl(vhdr->id));
+ entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
if (entry == NULL)
return -ENOENT;
@@ -1774,10 +1853,14 @@ static int __init nfnetlink_queue_init(void)
{
int status;
+ status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
+ if (status < 0)
+ return status;
+
status = register_pernet_subsys(&nfnl_queue_net_ops);
if (status < 0) {
pr_err("failed to register pernet ops\n");
- goto out;
+ goto cleanup_rhashtable;
}
netlink_register_notifier(&nfqnl_rtnl_notifier);
@@ -1802,7 +1885,8 @@ cleanup_netlink_subsys:
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
-out:
+cleanup_rhashtable:
+ rhashtable_destroy(&nfqnl_packet_map);
return status;
}
@@ -1814,6 +1898,8 @@ static void __exit nfnetlink_queue_fini(void)
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
+ rhashtable_destroy(&nfqnl_packet_map);
+
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 6aa12d0f54e2..00319d2a54da 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -14,6 +14,7 @@
#include <linux/ktime.h>
#include <linux/module.h>
+#include <linux/rtc.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/netfilter/x_tables.h>
@@ -64,11 +65,6 @@ static const u_int16_t days_since_epoch[] = {
3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
};
-static inline bool is_leap(unsigned int y)
-{
- return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
-}
-
/*
* Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
* Since we match against days and daytime, the SSTE value needs to be
@@ -138,7 +134,7 @@ static void localtime_3(struct xtm *r, time64_t time)
* (A different approach to use would be to subtract a monthlength
* from w repeatedly while counting.)
*/
- if (is_leap(year)) {
+ if (is_leap_year(year)) {
/* use days_since_leapyear[] in a leap year */
for (i = ARRAY_SIZE(days_since_leapyear) - 1;
i > 0 && days_since_leapyear[i] > w; --i)
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a68bc882fa4e..14d7f67715ed 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,16 +592,28 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
ip -net "$nsr1" route change default via 192.168.100.2
ip -net "$nsr2" route change default via 192.168.100.1
+ip -6 -net "$nsr1" route change default via fee1:3::2
+ip -6 -net "$nsr2" route change default via fee1:3::1
ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
@@ -611,28 +623,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Create vlan tagged devices for IPIP traffic.
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
ip -net "$nsr1" link set veth1.10 up
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route change default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
ip -net "$nsr2" link set veth0.10 up
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route change default via fee1:5::1
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +675,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Restore the previous configuration
ip -net "$nsr1" route change default via 192.168.10.2
ip -net "$nsr2" route change default via 192.168.10.1
ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
}
# Another test: