summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2025-10-10 09:47:04 -0700
committerMartin KaFai Lau <martin.lau@kernel.org>2025-10-10 10:43:04 -0700
commit7dc484fe481e5cc5b850fc1aa974a46e0f29ed68 (patch)
treef6bd442ece2caefa9b6f852d1c1976f0cc5ba75a
parent2e36338df42d64955236d9f9b665d1443fd74f8c (diff)
parentbc3eeb42597a514a0b5286f085d91c8b34e4a532 (diff)
Merge branch 'support-non-linear-skbs-for-bpf_prog_test_run'
Paul Chaignon says: ==================== Support non-linear skbs for BPF_PROG_TEST_RUN This patchset adds support for non-linear skbs when running tc programs with BPF_PROG_TEST_RUN. We've had multiple bugs in the past few years in Cilium caused by missing calls to bpf_skb_pull_data(). Daniel suggested to support non linear skb in BPF_PROG_TEST_RUN to uncover these bugs in our BPF tests. Changes in v8: - Fix uninitialized data pointer spotted by Martin. - Error out in test_loader if __linear_size tag is used on unsupported program types. Changes in v7: - Refactor use of 'size' variable as suggested by Martin. - Support copying back the non-linear area to data_out. - Minor code changes for readability, suggested by Martin. Changes in v6: - Disallow non-linear skb in prog_run_skb only for LWT programs instead of all non-L2 program types, on suggestion from Martin. - Reject non-null ctx->data and ctx->data_meta, as suggested by Amery. - Bound linear_size to 'PAGE_SIZE - headroom - tailroom' to be consistent with prog_run_xdp, as suggested by Martin. - Allocate exactly linear_size bytes in bpf_test_init, spotted by Martin. - Fix wrong conflict resolution on double-free fix, spotted by Amery. - Rebased. Changes in v5: - Fix double free on data in first patch. Changes in v4: - Per Martin's suggestion, follow the XDP code pattern and use bpf_test_init only to initialize the linear area. That way data is directly copied to the right areas and we avoid the call to __pskb_pull_tail. - Fixed outdated commit descriptions. - Rebased. Changes in v3: - Dropped BPF_F_TEST_SKB_NON_LINEAR and used the ctx->data_end to determine if the user wants non-linear skb, as suggested by Amery. - Introduced a second commit with a bit of refactoring to allow for the above requested change. - Fix bug found by syzkaller on third commit. - Rebased. Changes in v2: - Made the linear size configurable via ctx->data_end, as suggested by Amery. - Reworked the selftests to allow testing the configurable linear size. - Fix warnings reported by kernel test robot on first commit. - Rebased. ==================== Link: https://patch.msgid.link/cover.1760037899.git.paul.chaignon@gmail.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r--net/bpf/test_run.c143
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c59
-rw-r--r--tools/testing/selftests/bpf/test_loader.c29
4 files changed, 193 insertions, 42 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index dfb03ee0bb62..05e30ff5b6f9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -447,7 +447,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- struct skb_shared_info *sinfo, u32 size,
+ struct skb_shared_info *sinfo, u32 size, u32 frag_size,
u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
@@ -464,7 +464,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
}
if (data_out) {
- int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+ int len = sinfo ? copy_size - frag_size : copy_size;
if (len < 0) {
err = -ENOSPC;
@@ -910,6 +910,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* cb is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
+ offsetof(struct __sk_buff, data_end)))
+ return -EINVAL;
+
+ /* data_end is allowed, but not copied to skb */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
@@ -984,46 +990,39 @@ static struct proto bpf_dummy_proto = {
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
- bool is_l2 = false, is_direct_pkt_access = false;
+ bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
+ u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct net *net = current->nsproxy->net_ns;
struct net_device *dev = net->loopback_dev;
- u32 size = kattr->test.data_size_in;
+ u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
+ u32 linear_sz = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct __sk_buff *ctx = NULL;
+ struct sk_buff *skb = NULL;
+ struct sock *sk = NULL;
u32 retval, duration;
int hh_len = ETH_HLEN;
- struct sk_buff *skb;
- struct sock *sk;
- void *data;
+ void *data = NULL;
int ret;
if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
- if (size < ETH_HLEN)
+ if (kattr->test.data_size_in < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, kattr->test.data_size_in,
- size, NET_SKB_PAD + NET_IP_ALIGN,
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
- if (IS_ERR(ctx)) {
- kfree(data);
- return PTR_ERR(ctx);
- }
-
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
+ is_direct_pkt_access = true;
is_l2 = true;
- fallthrough;
+ break;
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
+ is_lwt = true;
+ fallthrough;
case BPF_PROG_TYPE_CGROUP_SKB:
is_direct_pkt_access = true;
break;
@@ -1031,25 +1030,88 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
+ ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (ctx) {
+ if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (ctx->data_end) {
+ /* Non-linear LWT test_run is unsupported for now. */
+ if (is_lwt) {
+ ret = -EINVAL;
+ goto out;
+ }
+ linear_sz = max(ETH_HLEN, ctx->data_end);
+ }
+ }
+
+ linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
+
+ data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ data = NULL;
+ goto out;
+ }
+
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
- kfree(data);
- kfree(ctx);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
sock_init_data(NULL, sk);
skb = slab_build_skb(data);
if (!skb) {
- kfree(data);
- kfree(ctx);
- sk_free(sk);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
skb->sk = sk;
+ data = NULL; /* data released via kfree_skb */
+
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- __skb_put(skb, size);
+ __skb_put(skb, linear_sz);
+
+ if (unlikely(kattr->test.data_size_in > linear_sz)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+ u32 copied = linear_sz;
+
+ while (copied < kattr->test.data_size_in) {
+ struct page *page;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data_len = min_t(u32, kattr->test.data_size_in - copied,
+ PAGE_SIZE);
+ skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
+
+ if (copy_from_user(page_address(page), data_in + copied,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ skb->data_len += data_len;
+ skb->truesize += PAGE_SIZE;
+ skb->len += data_len;
+ copied += data_len;
+ }
+ }
if (ctx && ctx->ifindex > 1) {
dev = dev_get_by_index(net, ctx->ifindex);
@@ -1129,12 +1191,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
convert_skb_to___skb(skb, ctx);
- size = skb->len;
- /* bpf program can never convert linear skb to non-linear */
- if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
- size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
- duration);
+ if (skb_is_nonlinear(skb))
+ /* bpf program can never convert linear skb to non-linear */
+ WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
+ ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
+ skb->data_len, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@@ -1142,7 +1203,9 @@ out:
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
- sk_free(sk);
+ kfree(data);
+ if (sk)
+ sk_free(sk);
kfree(ctx);
return ret;
}
@@ -1340,7 +1403,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size,
retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
@@ -1431,7 +1494,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
goto out;
ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
- sizeof(flow_keys), retval, duration);
+ sizeof(flow_keys), 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@@ -1532,7 +1595,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@@ -1732,7 +1795,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog,
if (ret)
goto out;
- ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
out:
kfree(user_ctx);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index a7a1a684eed1..c9bfbe1bafc1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -126,6 +126,9 @@
* Several __arch_* annotations could be specified at once.
* When test case is not run on current arch it is marked as skipped.
* __caps_unpriv Specify the capabilities that should be set when running the test.
+ *
+ * __linear_size Specify the size of the linear area of non-linear skbs, or
+ * 0 for linear skbs.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
@@ -159,6 +162,7 @@
#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 28b602ac9cbe..911caa8fd1b7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
+#include <linux/if_ether.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -800,4 +801,62 @@ l0_%=: /* exit(0) */ \
: __clobber_all);
}
+#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \
+ SEC(type) \
+ __description("direct packet access: " #name " (non-linear, " type ", " desc ")") \
+ __success __retval(retval) \
+ __linear_size(linear_sz) \
+ __naked void access_non_linear_##name(void) \
+ { \
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[skb_data]); \
+ r3 = *(u32*)(r1 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += %[offset]; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ r0 = 0; \
+ exit; \
+ l0_%=: r0 = 1; \
+ exit; \
+ " : \
+ : __imm_const(skb_data, offsetof(struct __sk_buff, data)), \
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \
+ __imm_const(offset, off) \
+ : __clobber_all); \
+ }
+
+access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
+access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
+access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
+access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
+access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
+access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
+
+SEC("tc")
+__description("direct packet access: test37 (non-linear, linearized)")
+__success __retval(0)
+__linear_size(ETH_HLEN)
+__naked void access_non_linear_linearized(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 22; \
+ call %[bpf_skb_pull_data]; \
+ r2 = *(u32*)(r6 + %[skb_data]); \
+ r3 = *(u32*)(r6 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ exit; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_skb_pull_data),
+ __imm_const(skb_data, offsetof(struct __sk_buff, data)),
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 74ecc281bb8c..338c035c3688 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -43,6 +43,7 @@
#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
+#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@@ -89,6 +90,7 @@ struct test_spec {
int mode_mask;
int arch_mask;
int load_mask;
+ int linear_sz;
bool auxiliary;
bool valid;
};
@@ -633,6 +635,21 @@ static int parse_test_spec(struct test_loader *tester,
&spec->unpriv.stdout);
if (err)
goto cleanup;
+ } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+ switch (bpf_program__type(prog)) {
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
+ err = parse_int(val, &spec->linear_sz, "test linear size");
+ if (err)
+ goto cleanup;
+ break;
+ default:
+ PRINT_FAIL("__linear_size for unsupported program type");
+ err = -EINVAL;
+ goto cleanup;
+ }
}
}
@@ -1007,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
}
}
-static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
{
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
__u8 tmp_in[TEST_DATA_LEN] = {};
+ struct __sk_buff ctx = {};
int err, saved_errno;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = tmp_in,
@@ -1020,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
.repeat = 1,
);
+ if (linear_sz) {
+ ctx.data_end = linear_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ }
+
if (empty_opts) {
memset(&topts, 0, sizeof(struct bpf_test_run_opts));
topts.sz = sizeof(struct bpf_test_run_opts);
@@ -1269,7 +1293,8 @@ void run_subtest(struct test_loader *tester,
}
err = do_prog_test_run(bpf_program__fd(tprog), &retval,
- bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
+ spec->linear_sz);
if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;