summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_conntrack_core.h5
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h20
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h4
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c4
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c8
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c39
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c11
-rw-r--r--net/netfilter/nf_conntrack_netlink.c75
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_conntrack_sip.c18
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c20
-rw-r--r--net/netfilter/nft_set_rbtree.c92
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh70
16 files changed, 296 insertions, 102 deletions
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3384859a8921..8883575adcc1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
+static inline void lockdep_nfct_expect_lock_held(void)
+{
+ lockdep_assert_held(&nf_conntrack_expect_lock);
+}
+
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 165e7a03b8e9..e9a8350e7ccf 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -22,10 +22,16 @@ struct nf_conntrack_expect {
/* Hash member */
struct hlist_node hnode;
+ /* Network namespace */
+ possible_net_t net;
+
/* We expect this tuple, with the following mask */
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ struct nf_conntrack_zone zone;
+#endif
/* Usage count. */
refcount_t use;
@@ -40,7 +46,7 @@ struct nf_conntrack_expect {
struct nf_conntrack_expect *this);
/* Helper to assign to new connection */
- struct nf_conntrack_helper *helper;
+ struct nf_conntrack_helper __rcu *helper;
/* The conntrack of the master connection */
struct nf_conn *master;
@@ -62,7 +68,17 @@ struct nf_conntrack_expect {
static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
{
- return nf_ct_net(exp->master);
+ return read_pnet(&exp->net);
+}
+
+static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_zone *b)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ return a->zone.id == b->id;
+#else
+ return true;
+#endif
}
#define NF_CT_EXP_POLICY_NAME_LEN 16
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 26071021e986..56b6b60a814f 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events {
#define NF_CT_EXPECT_INACTIVE 0x2
#define NF_CT_EXPECT_USERSPACE 0x4
+#ifdef __KERNEL__
+#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \
+ NF_CT_EXPECT_USERSPACE)
+#endif
#endif /* _UAPI_NF_CONNTRACK_COMMON_H */
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 4ad8b2032f1f..5561bd9cea81 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", rtinfo->invflags);
return -EINVAL;
}
+ if (rtinfo->addrnr > IP6T_RT_HOPS) {
+ pr_debug("too many addresses specified\n");
+ return -EINVAL;
+ }
if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
(!(rtinfo->flags & IP6T_RT_TYP) ||
(rtinfo->rt_type != 0) ||
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a7552a46d6ac..4f39bf7c843f 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
unsigned int timeout)
{
const struct nf_conntrack_helper *helper;
+ struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
@@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->expectfn = NULL;
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
- exp->helper = NULL;
-
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 81baf2082604..9df159448b89 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
+ lockdep_nfct_expect_lock_held();
+
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index cfc2daa3fc7f..24d0576d84b7 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
+ lockdep_nfct_expect_lock_held();
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
@@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- net_eq(net, nf_ct_net(i->master)) &&
- nf_ct_zone_equal_any(i->master, zone);
+ net_eq(net, read_pnet(&i->net)) &&
+ nf_ct_exp_zone_equal_any(i, zone);
}
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
{
+ lockdep_nfct_expect_lock_held();
+
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
+ lockdep_nfct_expect_lock_held();
+
if (!cnet->expect_count)
return NULL;
@@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
+/* This function can only be used from packet path, where accessing
+ * master's helper is safe, because the packet holds a reference on
+ * the conntrack object. Never use it from control plane.
+ */
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conn *ct = exp->master;
+ struct net *net = read_pnet(&ct->ct_net);
+ struct nf_conn_help *help;
int len;
if (family == AF_INET)
@@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
- exp->helper = NULL;
+
+ help = nfct_help(ct);
+ if (help)
+ helper = rcu_dereference(help->helper);
+
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
@@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
unsigned int h;
int ret = 0;
+ lockdep_nfct_expect_lock_held();
+
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
@@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
nf_ct_expect_insert(expect);
- spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
return 0;
out:
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
struct nf_conntrack_helper *helper;
+ struct net *net = seq_file_net(s);
struct hlist_node *n = v;
char *delim = "";
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
+ if (!net_eq(nf_ct_exp_net(expect), net))
+ return 0;
+
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
@@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
if (expect->flags & NF_CT_EXPECT_USERSPACE)
seq_printf(s, "%sUSERSPACE", delim);
- helper = rcu_dereference(nfct_help(expect->master)->helper);
+ helper = rcu_dereference(expect->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
if (helper->expect_policy[expect->class].name[0])
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a2a0e22ccee1..3f5c50455b71 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = &nf_conntrack_helper_h245;
+ rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
- exp->helper = nf_conntrack_helper_ras;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ceb48c3ca0a4..1b330ba6613b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conn_help *help = nfct_help(exp->master);
const struct nf_conntrack_helper *me = data;
const struct nf_conntrack_helper *this;
- if (exp->helper == me)
- return true;
-
- this = rcu_dereference_protected(help->helper,
+ this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
@@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
+
+ /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as
+ * last step, this ensures rcu readers of exp->helper are done.
+ * No need for another synchronize_rcu() here.
+ */
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c156574e1273..3f408f3713bb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -910,8 +910,8 @@ struct ctnetlink_filter {
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
- [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 },
- [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 },
+ [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
+ [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
};
static int ctnetlink_parse_filter(const struct nlattr *attr,
@@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr,
if (ret)
return ret;
- if (tb[CTA_FILTER_ORIG_FLAGS]) {
+ if (tb[CTA_FILTER_ORIG_FLAGS])
filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
- if (filter->orig_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
- if (tb[CTA_FILTER_REPLY_FLAGS]) {
+ if (tb[CTA_FILTER_REPLY_FLAGS])
filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
- if (filter->reply_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
return 0;
}
@@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK),
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
@@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
{
struct nf_conn *master = exp->master;
long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
- struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
struct nf_conntrack_tuple nat_tuple = {};
@@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
- help = nfct_help(master);
- if (help) {
- struct nf_conntrack_helper *helper;
- helper = rcu_dereference(help->helper);
- if (helper &&
- nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
- goto nla_put_failure;
- }
+ helper = rcu_dereference(exp->helper);
+ if (helper &&
+ nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
+ goto nla_put_failure;
+
expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn);
if (expfn != NULL &&
nla_put_string(skb, CTA_EXPECT_FN, expfn->name))
@@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
}
}
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb2) {
- nf_ct_expect_put(exp);
- return -ENOMEM;
- }
-
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
if (err <= 0) {
kfree_skb(skb2);
return -ENOMEM;
@@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
{
struct nf_conntrack_helper *helper;
- const struct nf_conn_help *m_help;
const char *name = data;
- m_help = nfct_help(exp->master);
-
- helper = rcu_dereference(m_help->helper);
+ helper = rcu_dereference(exp->helper);
if (!helper)
return false;
@@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
/* bump usage count to 2 */
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_expect_lock);
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(info->nlh));
@@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
- u_int32_t class = 0;
+ struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct nf_conn_help *help;
+ u32 class = 0;
int err;
help = nfct_help(ct);
@@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->class = class;
exp->master = ct;
- exp->helper = helper;
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
+ if (!helper)
+ helper = rcu_dereference(help->helper);
+ rcu_assign_pointer(exp->helper, helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 0c1d086e96cb..b67426c2189b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1385,9 +1385,9 @@ nla_put_failure:
}
static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
- [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
@@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (err < 0)
return err;
- if (tb[CTA_PROTOINFO_TCP_STATE] &&
- nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
- return -EINVAL;
-
spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4ab5ef71d96d..939502ff7c87 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (!exp || exp->master == ct ||
- nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
+ exp->helper != nfct_help(ct)->helper ||
exp->class != class)
break;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
unsigned int port;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
+ bool have_rtp_addr = false;
hooks = rcu_dereference(nf_nat_sip_hooks);
@@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
caddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
- &matchoff, &matchlen, &caddr) > 0)
+ &matchoff, &matchlen, &caddr) > 0) {
caddr_len = matchlen;
+ memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
+ have_rtp_addr = true;
+ }
mediaoff = sdpoff;
for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
@@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
&matchoff, &matchlen, &maddr) > 0) {
maddr_len = matchlen;
memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
- } else if (caddr_len)
+ have_rtp_addr = true;
+ } else if (caddr_len) {
memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
- else {
+ have_rtp_addr = true;
+ } else {
nf_ct_helper_log(skb, ct, "cannot parse SDP message");
return NF_DROP;
}
@@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
/* Update session connection and owner addresses */
hooks = rcu_dereference(nf_nat_sip_hooks);
- if (hooks && ct->status & IPS_NAT_MASK)
+ if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr)
ret = hooks->sdp_session(skb, protoff, dataoff,
dptr, datalen, sdpoff,
&rtp_addr);
@@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- exp->helper = helper;
+ rcu_assign_pointer(exp->helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b35a90955e2e..fcbe54940b2e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log,
if (data_len) {
struct nlattr *nla;
- int size = nla_attr_size(data_len);
- if (skb_tailroom(inst->skb) < nla_total_size(data_len))
+ nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len);
+ if (!nla)
goto nla_put_failure;
- nla = skb_put(inst->skb, nla_total_size(data_len));
- nla->nla_type = NFULA_PAYLOAD;
- nla->nla_len = size;
-
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 7ff90325c97f..6395982e4d95 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fe8bd497d74a..737c339decd0 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals)
return array;
}
-#define NFT_ARRAY_EXTRA_SIZE 10240
-
/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
* packed representation coming from userspace for anonymous sets too.
*/
static u32 nft_array_elems(const struct nft_set *set)
{
- u32 nelems = atomic_read(&set->nelems);
+ u32 nelems = atomic_read(&set->nelems) - set->ndeact;
/* Adjacent intervals are represented with a single start element in
* anonymous sets, use the current element counter as is.
@@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set)
return (nelems / 2) + 2;
}
-static int nft_array_may_resize(const struct nft_set *set)
+#define NFT_ARRAY_INITIAL_SIZE 1024
+#define NFT_ARRAY_INITIAL_ANON_SIZE 16
+#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval))
+
+static int nft_array_may_resize(const struct nft_set *set, bool flush)
{
- u32 nelems = nft_array_elems(set), new_max_intervals;
+ u32 initial_intervals, max_intervals, new_max_intervals, delta;
+ u32 shrinked_max_intervals, nelems = nft_array_elems(set);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array;
- if (!priv->array_next) {
- array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
- if (!array)
- return -ENOMEM;
+ if (nft_set_is_anonymous(set))
+ initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;
+ else
+ initial_intervals = NFT_ARRAY_INITIAL_SIZE;
- priv->array_next = array;
+ if (priv->array_next) {
+ max_intervals = priv->array_next->max_intervals;
+ new_max_intervals = priv->array_next->max_intervals;
+ } else {
+ if (priv->array) {
+ max_intervals = priv->array->max_intervals;
+ new_max_intervals = priv->array->max_intervals;
+ } else {
+ max_intervals = 0;
+ new_max_intervals = initial_intervals;
+ }
}
- if (nelems < priv->array_next->max_intervals)
- return 0;
+ if (nft_set_is_anonymous(set))
+ goto maybe_grow;
+
+ if (flush) {
+ /* Set flush just started, nelems still report elements.*/
+ nelems = 0;
+ new_max_intervals = NFT_ARRAY_INITIAL_SIZE;
+ goto realloc_array;
+ }
+
+ if (check_add_overflow(new_max_intervals, new_max_intervals,
+ &shrinked_max_intervals))
+ return -EOVERFLOW;
+
+ shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);
- new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
- if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&
+ nelems < shrinked_max_intervals) {
+ new_max_intervals = shrinked_max_intervals;
+ goto realloc_array;
+ }
+maybe_grow:
+ if (nelems > new_max_intervals) {
+ if (nft_set_is_anonymous(set) &&
+ new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {
+ new_max_intervals <<= 1;
+ } else {
+ delta = new_max_intervals >> 1;
+ if (check_add_overflow(new_max_intervals, delta,
+ &new_max_intervals))
+ return -EOVERFLOW;
+ }
+ }
+
+realloc_array:
+ if (WARN_ON_ONCE(nelems > new_max_intervals))
return -ENOMEM;
+ if (priv->array_next) {
+ if (max_intervals == new_max_intervals)
+ return 0;
+
+ if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ return -ENOMEM;
+ } else {
+ array = nft_array_alloc(new_max_intervals);
+ if (!array)
+ return -ENOMEM;
+
+ priv->array_next = array;
+ }
+
return 0;
}
@@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return -ENOMEM;
do {
@@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_null(set, this))
priv->start_rbe_cookie = 0;
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return NULL;
while (parent != NULL) {
@@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
switch (iter->type) {
case NFT_ITER_UPDATE_CLONE:
- if (nft_array_may_resize(set) < 0) {
+ if (nft_array_may_resize(set, true) < 0) {
iter->err = -ENOMEM;
break;
}
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 394166f224a4..ffdc6ccc6511 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate
+ insert_overlap load_flush_load4 load_flush_load8"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -432,6 +433,30 @@ race_repeat 0
perf_duration 0
"
+TYPE_load_flush_load4="
+display reload with flush, 4bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_load_flush_load8="
+display reload with flush, 8bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1997,6 +2022,49 @@ test_bug_insert_overlap()
return 0
}
+test_bug_load_flush_load4()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 0 255); do
+ local addelem="add element inet filter test"
+ local j
+
+ for j in $(seq 0 20); do
+ echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }"
+ echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }"
+ done
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
+test_bug_load_flush_load8()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 1 100); do
+ echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }"
+ echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }"
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}